diff --git a/- name: Cache uses: actions/ - name: Cache uses: actions/cache@v4.2.3 with: # A list of files, directories, and wildcard patterns to cache and restore path: # An explicit key for restoring and saving the cache key: # An ordered multiline string listing the prefix-matched keys, that are used for restoring stale cache if no cache hit occurred for key. Note `cache-hit` returns false in this case. restore-keys: # optional # The chunk size used to split up large files during upload, in bytes upload-chunk-size: # optional # An optional boolean when enabled, allows windows runners to save or restore caches that can be restored or saved respectively on other platforms enableCrossOsArchive: # optional, default is false # Fail the workflow if cache entry is not found fail-on-cache-miss: # optional, default is false # Check if a cache entry exists for the given input(s) (key, restore-keys) without downloading the cache lookup-only: # optional, default is false # Run the post step to save the cache even if another step before fails save-always: # optional, default is false b/- name: Cache uses: actions/ - name: Cache uses: actions/cache@v4.2.3 with: # A list of files, directories, and wildcard patterns to cache and restore path: # An explicit key for restoring and saving the cache key: # An ordered multiline string listing the prefix-matched keys, that are used for restoring stale cache if no cache hit occurred for key. Note `cache-hit` returns false in this case. restore-keys: # optional # The chunk size used to split up large files during upload, in bytes upload-chunk-size: # optional # An optional boolean when enabled, allows windows runners to save or restore caches that can be restored or saved respectively on other platforms enableCrossOsArchive: # optional, default is false # Fail the workflow if cache entry is not found fail-on-cache-miss: # optional, default is false # Check if a cache entry exists for the given input(s) (key, restore-keys) without downloading the cache lookup-only: # optional, default is false # Run the post step to save the cache even if another step before fails save-always: # optional, default is false new file mode 100644 index 0000000..e69de29 diff --git a/- name: Cache uses: actions/cache@v4.2.3 b/- name: Cache uses: actions/cache@v4.2.3 new file mode 100644 index 0000000..e69de29 diff --git a/.github/workflows/5codeql.yml b/.github/workflows/5codeql.yml new file mode 100644 index 0000000..205e8fd --- /dev/null +++ b/.github/workflows/5codeql.yml @@ -0,0 +1,100 @@ +# For most projects, this workflow file will not need changing; you simply need +# to commit it to your repository. +# +# You may wish to alter this file to override the set of languages analyzed, +# or to provide custom queries or build logic. +# +# ******** NOTE ******** +# We have attempted to detect the languages in your repository. Please check +# the `language` matrix defined below to confirm you have the correct set of +# supported CodeQL languages. +# +name: "CodeQL Advanced" + +on: + push: + branches: [ "main", "Ihtbbs" ] + pull_request: + branches: [ "main", "Ihtbbs" ] + schedule: + - cron: '31 15 * * 1' + +jobs: + analyze: + name: Analyze (${{ matrix.language }}) + # Runner size impacts CodeQL analysis time. To learn more, please see: + # - https://gh.io/recommended-hardware-resources-for-running-codeql + # - https://gh.io/supported-runners-and-hardware-resources + # - https://gh.io/using-larger-runners (GitHub.com only) + # Consider using larger runners or machines with greater resources for possible analysis time improvements. + runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} + permissions: + # required for all workflows + security-events: write + + # required to fetch internal or private CodeQL packs + packages: read + + # only required for workflows in private repositories + actions: read + contents: read + + strategy: + fail-fast: false + matrix: + include: + - language: actions + build-mode: none + - language: javascript-typescript + build-mode: none + # CodeQL supports the following values keywords for 'language': 'actions', 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' + # Use `c-cpp` to analyze code written in C, C++ or both + # Use 'java-kotlin' to analyze code written in Java, Kotlin or both + # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both + # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis, + # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning. + # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how + # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + # Add any setup steps before running the `github/codeql-action/init` action. + # This includes steps like installing compilers or runtimes (`actions/setup-node` + # or others). This is typically only required for manual builds. + # - name: Setup runtime (example) + # uses: actions/setup-example@v1 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: ${{ matrix.language }} + build-mode: ${{ matrix.build-mode }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + + # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs + # queries: security-extended,security-and-quality + + # If the analyze step fails for one of the languages you are analyzing with + # "We were unable to automatically build your code", modify the matrix above + # to set the build mode to "manual" for that language. Then modify this step + # to build your code. + # ℹī¸ Command-line programs to run using the OS shell. + # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun + - if: matrix.build-mode == 'manual' + shell: bash + run: | + echo 'If you are using a "manual" build mode for one or more of the' \ + 'languages you are analyzing, replace this with the commands to build' \ + 'your code, for example:' + echo ' make bootstrap' + echo ' make release' + exit 1 + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 + with: + category: "/language:${{matrix.language}}" diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml new file mode 100644 index 0000000..c14bbbc --- /dev/null +++ b/.github/workflows/dependency-review.yml @@ -0,0 +1,39 @@ +# Dependency Review Action +# +# This Action will scan dependency manifest files that change as part of a Pull Request, +# surfacing known-vulnerable versions of the packages declared or updated in the PR. +# Once installed, if the workflow run is marked as required, PRs introducing known-vulnerable +# packages will be blocked from merging. +# +# Source repository: https://github.com/actions/dependency-review-action +# Public documentation: https://docs.github.com/en/code-security/supply-chain-security/understanding-your-software-supply-chain/about-dependency-review#dependency-review-enforcement +name: 'Dependency review' +on: + pull_request: + branches: [ "main", "Ihtbbs" ] + +# If using a dependency submission action in this workflow this permission will need to be set to: +# +# permissions: +# contents: write +# +# https://docs.github.com/en/enterprise-cloud@latest/code-security/supply-chain-security/understanding-your-software-supply-chain/using-the-dependency-submission-api +permissions: + contents: read + # Write permissions for pull-requests are required for using the `comment-summary-in-pr` option, comment out if you aren't using this option + pull-requests: write + +jobs: + dependency-review: + runs-on: ubuntu-latest + steps: + - name: 'Checkout repository' + uses: actions/checkout@v4 + - name: 'Dependency Review' + uses: actions/dependency-review-action@v4 + # Commonly enabled options, see https://github.com/actions/dependency-review-action#configuration-options for all available options. + with: + comment-summary-in-pr: always + # fail-on-severity: moderate + # deny-licenses: GPL-1.0-or-later, LGPL-2.0-or-later + # retry-on-snapshot-warnings: true diff --git a/.github/workflows/google.yml b/.github/workflows/google.yml new file mode 100644 index 0000000..0b5c7d1 --- /dev/null +++ b/.github/workflows/google.yml @@ -0,0 +1,116 @@ +# This workflow will build a docker container, publish it to Google Container +# Registry, and deploy it to GKE when there is a push to the "main" +# branch. +# +# To configure this workflow: +# +# 1. Enable the following Google Cloud APIs: +# +# - Artifact Registry (artifactregistry.googleapis.com) +# - Google Kubernetes Engine (container.googleapis.com) +# - IAM Credentials API (iamcredentials.googleapis.com) +# +# You can learn more about enabling APIs at +# https://support.google.com/googleapi/answer/6158841. +# +# 2. Ensure that your repository contains the necessary configuration for your +# Google Kubernetes Engine cluster, including deployment.yml, +# kustomization.yml, service.yml, etc. +# +# 3. Create and configure a Workload Identity Provider for GitHub: +# https://github.com/google-github-actions/auth#preferred-direct-workload-identity-federation. +# +# Depending on how you authenticate, you will need to grant an IAM principal +# permissions on Google Cloud: +# +# - Artifact Registry Administrator (roles/artifactregistry.admin) +# - Kubernetes Engine Developer (roles/container.developer) +# +# You can learn more about setting IAM permissions at +# https://cloud.google.com/iam/docs/manage-access-other-resources +# +# 5. Change the values in the "env" block to match your values. + +name: 'Build and Deploy to GKE' + +on: + push: + branches: + - '"main"' + +env: + PROJECT_ID: 'my-project' # TODO: update to your Google Cloud project ID + GAR_LOCATION: 'us-central1' # TODO: update to your region + GKE_CLUSTER: 'cluster-1' # TODO: update to your cluster name + GKE_ZONE: 'us-central1-c' # TODO: update to your cluster zone + DEPLOYMENT_NAME: 'gke-test' # TODO: update to your deployment name + REPOSITORY: 'samples' # TODO: update to your Artifact Registry docker repository name + IMAGE: 'static-site' + WORKLOAD_IDENTITY_PROVIDER: 'projects/123456789/locations/global/workloadIdentityPools/my-pool/providers/my-provider' # TODO: update to your workload identity provider + +jobs: + setup-build-publish-deploy: + name: 'Setup, Build, Publish, and Deploy' + runs-on: 'ubuntu-latest' + environment: 'production' + + permissions: + contents: 'read' + id-token: 'write' + + steps: + - name: 'Checkout' + uses: 'actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332' # actions/checkout@v4 + + # Configure Workload Identity Federation and generate an access token. + # + # See https://github.com/google-github-actions/auth for more options, + # including authenticating via a JSON credentials file. + - id: 'auth' + name: 'Authenticate to Google Cloud' + uses: 'google-github-actions/auth@f112390a2df9932162083945e46d439060d66ec2' # google-github-actions/auth@v2 + with: + workload_identity_provider: '${{ env.WORKLOAD_IDENTITY_PROVIDER }}' + + # Authenticate Docker to Google Cloud Artifact Registry + - name: 'Docker Auth' + uses: 'docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567' # docker/login-action@v3 + with: + username: 'oauth2accesstoken' + password: '${{ steps.auth.outputs.auth_token }}' + registry: '${{ env.GAR_LOCATION }}-docker.pkg.dev' + + # Get the GKE credentials so we can deploy to the cluster + - name: 'Set up GKE credentials' + uses: 'google-github-actions/get-gke-credentials@6051de21ad50fbb1767bc93c11357a49082ad116' # google-github-actions/get-gke-credentials@v2 + with: + cluster_name: '${{ env.GKE_CLUSTER }}' + location: '${{ env.GKE_ZONE }}' + + # Build the Docker image + - name: 'Build and push Docker container' + run: |- + DOCKER_TAG="${GAR_LOCATION}-docker.pkg.dev/${PROJECT_ID}/${REPOSITORY}/${IMAGE}:${GITHUB_SHA}" + + docker build \ + --tag "${DOCKER_TAG}" \ + --build-arg GITHUB_SHA="${GITHUB_SHA}" \ + --build-arg GITHUB_REF="${GITHUB_REF}" \ + . + + docker push "${DOCKER_TAG}" + + # Set up kustomize + - name: 'Set up Kustomize' + run: |- + curl -sfLo kustomize https://github.com/kubernetes-sigs/kustomize/releases/download/kustomize%2Fv5.4.3/kustomize_v5.4.3_linux_amd64.tar.gz + chmod u+x ./kustomize + + # Deploy the Docker image to the GKE cluster + - name: 'Deploy to GKE' + run: |- + # replacing the image name in the k8s template + ./kustomize edit set image LOCATION-docker.pkg.dev/PROJECT_ID/REPOSITORY/IMAGE:TAG=$GAR_LOCATION-docker.pkg.dev/$PROJECT_ID/$REPOSITORY/$IMAGE:$GITHUB_SHA + ./kustomize build . | kubectl apply -f - + kubectl rollout status deployment/$DEPLOYMENT_NAME + kubectl get services -o wide diff --git a/.github/workflows/ibm.yml b/.github/workflows/ibm.yml new file mode 100644 index 0000000..f5e8cd8 --- /dev/null +++ b/.github/workflows/ibm.yml @@ -0,0 +1,75 @@ +# This workflow will build a docker container, publish it to IBM Container Registry, and deploy it to IKS when there is a push to the "main" branch. +# +# To configure this workflow: +# +# 1. Ensure that your repository contains a Dockerfile +# 2. Setup secrets in your repository by going to settings: Create ICR_NAMESPACE and IBM_CLOUD_API_KEY +# 3. Change the values for the IBM_CLOUD_REGION, REGISTRY_HOSTNAME, IMAGE_NAME, IKS_CLUSTER, DEPLOYMENT_NAME, and PORT + +name: Build and Deploy to IKS + +on: + push: + branches: [ "main" ] + +# Environment variables available to all jobs and steps in this workflow +env: + GITHUB_SHA: ${{ github.sha }} + IBM_CLOUD_API_KEY: ${{ secrets.IBM_CLOUD_API_KEY }} + IBM_CLOUD_REGION: us-south + ICR_NAMESPACE: ${{ secrets.ICR_NAMESPACE }} + REGISTRY_HOSTNAME: us.icr.io + IMAGE_NAME: iks-test + IKS_CLUSTER: example-iks-cluster-name-or-id + DEPLOYMENT_NAME: iks-test + PORT: 5001 + +jobs: + setup-build-publish-deploy: + name: Setup, Build, Publish, and Deploy + runs-on: ubuntu-latest + environment: production + steps: + + - name: Checkout + uses: actions/checkout@v4 + + # Download and Install IBM Cloud CLI + - name: Install IBM Cloud CLI + run: | + curl -fsSL https://clis.cloud.ibm.com/install/linux | sh + ibmcloud --version + ibmcloud config --check-version=false + ibmcloud plugin install -f kubernetes-service + ibmcloud plugin install -f container-registry + + # Authenticate with IBM Cloud CLI + - name: Authenticate with IBM Cloud CLI + run: | + ibmcloud login --apikey "${IBM_CLOUD_API_KEY}" -r "${IBM_CLOUD_REGION}" -g default + ibmcloud cr region-set "${IBM_CLOUD_REGION}" + ibmcloud cr login + + # Build the Docker image + - name: Build with Docker + run: | + docker build -t "$REGISTRY_HOSTNAME"/"$ICR_NAMESPACE"/"$IMAGE_NAME":"$GITHUB_SHA" \ + --build-arg GITHUB_SHA="$GITHUB_SHA" \ + --build-arg GITHUB_REF="$GITHUB_REF" . + + # Push the image to IBM Container Registry + - name: Push the image to ICR + run: | + docker push $REGISTRY_HOSTNAME/$ICR_NAMESPACE/$IMAGE_NAME:$GITHUB_SHA + + # Deploy the Docker image to the IKS cluster + - name: Deploy to IKS + run: | + ibmcloud ks cluster config --cluster $IKS_CLUSTER + kubectl config current-context + kubectl create deployment $DEPLOYMENT_NAME --image=$REGISTRY_HOSTNAME/$ICR_NAMESPACE/$IMAGE_NAME:$GITHUB_SHA --dry-run -o yaml > deployment.yaml + kubectl apply -f deployment.yaml + kubectl rollout status deployment/$DEPLOYMENT_NAME + kubectl create service loadbalancer $DEPLOYMENT_NAME --tcp=80:$PORT --dry-run -o yaml > service.yaml + kubectl apply -f service.yaml + kubectl get services -o wide diff --git a/.github/workflows/manual.yml b/.github/workflows/manual.yml new file mode 100644 index 0000000..11b2e35 --- /dev/null +++ b/.github/workflows/manual.yml @@ -0,0 +1,32 @@ +# This is a basic workflow that is manually triggered + +name: Manual workflow + +# Controls when the action will run. Workflow runs when manually triggered using the UI +# or API. +on: + workflow_dispatch: + # Inputs the workflow accepts. + inputs: + name: + # Friendly description to be shown in the UI instead of 'name' + description: 'Person to greet' + # Default value if no value is explicitly provided + default: 'World' + # Input has to be provided for the workflow to run + required: true + # The data type of the input + type: string + +# A workflow run is made up of one or more jobs that can run sequentially or in parallel +jobs: + # This workflow contains a single job called "greet" + greet: + # The type of runner that the job will run on + runs-on: ubuntu-latest + + # Steps represent a sequence of tasks that will be executed as part of the job + steps: + # Runs a single command using the runners shell + - name: Send greeting + run: echo "Hello ${{ inputs.name }}" diff --git a/.github/workflows/npm-publish-github-packages.yml b/.github/workflows/npm-publish-github-packages.yml new file mode 100644 index 0000000..ea2d329 --- /dev/null +++ b/.github/workflows/npm-publish-github-packages.yml @@ -0,0 +1,36 @@ +# This workflow will run tests using node and then publish a package to GitHub Packages when a release is created +# For more information see: https://docs.github.com/en/actions/publishing-packages/publishing-nodejs-packages + +name: Node.js Package + +on: + release: + types: [created] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 20 + - run: npm ci + - run: npm test + + publish-gpr: + needs: build + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 20 + registry-url: https://npm.pkg.github.com/ + - run: npm ci + - run: npm publish + env: + NODE_AUTH_TOKEN: ${{secrets.GITHUB_TOKEN}} diff --git a/.github/workflows/npm-publish.yml b/.github/workflows/npm-publish.yml new file mode 100644 index 0000000..2a4766d --- /dev/null +++ b/.github/workflows/npm-publish.yml @@ -0,0 +1,33 @@ +# This workflow will run tests using node and then publish a package to GitHub Packages when a release is created +# For more information see: https://docs.github.com/en/actions/publishing-packages/publishing-nodejs-packages + +name: Node.js Package + +on: + release: + types: [created] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 20 + - run: npm ci + - run: npm test + + publish-npm: + needs: build + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 20 + registry-url: https://registry.npmjs.org/ + - run: npm ci + - run: npm publish + env: + NODE_AUTH_TOKEN: ${{secrets.npm_token}} diff --git a/.github/workflows/release-new-action-version.yml b/.github/workflows/release-new-action-version.yml index 0b64c97..7acfe6b 100644 --- a/.github/workflows/release-new-action-version.yml +++ b/.github/workflows/release-new-action-version.yml @@ -4,7 +4,7 @@ on: types: [released] workflow_dispatch: inputs: - TAG_NAME: + TAG_NAME:'Lit2dafit' description: 'Tag name that the major tag will point to' required: true diff --git a/.github/workflows/static.yml b/.github/workflows/static.yml new file mode 100644 index 0000000..f2c9e97 --- /dev/null +++ b/.github/workflows/static.yml @@ -0,0 +1,43 @@ +# Simple workflow for deploying static content to GitHub Pages +name: Deploy static content to Pages + +on: + # Runs on pushes targeting the default branch + push: + branches: ["main"] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write + +# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. +# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. +concurrency: + group: "pages" + cancel-in-progress: false + +jobs: + # Single deploy job since we're just deploying + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Pages + uses: actions/configure-pages@v5 + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + with: + # Upload entire repository + path: '.' + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/.github/workflows/webpack.yml b/.github/workflows/webpack.yml new file mode 100644 index 0000000..9626ff6 --- /dev/null +++ b/.github/workflows/webpack.yml @@ -0,0 +1,28 @@ +name: NodeJS with Webpack + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +jobs: + build: + runs-on: ubuntu-latest + + strategy: + matrix: + node-version: [18.x, 20.x, 22.x] + + steps: + - uses: actions/checkout@v4 + + - name: Use Node.js ${{ matrix.node-version }} + uses: actions/setup-node@v4 + with: + node-version: ${{ matrix.node-version }} + + - name: Build + run: | + npm install + npx webpack diff --git a/LICENSE b/LICENSE index a426ef2..dea10f7 100644 --- a/LICENSE +++ b/LICENSE @@ -1,22 +1,28 @@ +BSD 3-Clause License -The MIT License (MIT) +Copyright (c) 2025, DIMVY clothing brand -Copyright (c) 2018 GitHub, Inc. and contributors +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. \ No newline at end of file +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/README.md b/README.md index 3f07f15..20318db 100644 --- a/README.md +++ b/README.md @@ -1,344 +1,58 @@ -# Cache action +# Dimvy-Clothing-brand/cache -This action allows caching dependencies and build outputs to improve workflow execution time. +Cache dependencies and build outputs in GitHub Actions. ->Two other actions are available in addition to the primary `cache` action: -> ->* [Restore action](./restore/README.md) ->* [Save action](./save/README.md) +## Table of Contents -[![Tests](https://github.com/actions/cache/actions/workflows/workflow.yml/badge.svg)](https://github.com/actions/cache/actions/workflows/workflow.yml) +- [Overview](#overview) +- [Features](#features) +- [Installation](#installation) +- [Usage](#usage) +- [Contributing](#contributing) +- [License](#license) -## Documentation +## Overview -See ["Caching dependencies to speed up workflows"](https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows). +This repository provides a solution for caching dependencies and build outputs in GitHub Actions. By caching these outputs, you can significantly speed up your CI/CD workflows. -## What's New +## Features -### ⚠ī¸ Important changes +- **TypeScript**: 98% +- **Shell**: 1.1% +- **JavaScript**: 0.9% -The cache backend service has been rewritten from the ground up for improved performance and reliability. [actions/cache](https://github.com/actions/cache) now integrates with the new cache service (v2) APIs. +## Installation -The new service will gradually roll out as of **February 1st, 2025**. The legacy service will also be sunset on the same date. Changes in these release are **fully backward compatible**. - -**We are deprecating some versions of this action**. We recommend upgrading to version `v4` or `v3` as soon as possible before **February 1st, 2025.** (Upgrade instructions below). - -If you are using pinned SHAs, please use the SHAs of versions `v4.2.0` or `v3.4.0` - -If you do not upgrade, all workflow runs using any of the deprecated [actions/cache](https://github.com/actions/cache) will fail. - -Upgrading to the recommended versions will not break your workflows. - -Read more about the change & access the migration guide: [reference to the announcement](https://github.com/actions/cache/discussions/1510). - -### v4 - -* Integrated with the new cache service (v2) APIs. -* Updated to node 20 - -### v3 - -* Integrated with the new cache service (v2) APIs. -* Added support for caching in GHES 3.5+. -* Fixed download issue for files > 2GB during restore. -* Updated the minimum runner version support from node 12 -> node 16. -* Fixed avoiding empty cache save when no files are available for caching. -* Fixed tar creation error while trying to create tar with path as `~/` home folder on `ubuntu-latest`. -* Fixed zstd failing on amazon linux 2.0 runners. -* Fixed cache not working with github workspace directory or current directory. -* Fixed the download stuck problem by introducing a timeout of 1 hour for cache downloads. -* Fix zstd not working for windows on gnu tar in issues. -* Allowing users to provide a custom timeout as input for aborting download of a cache segment using an environment variable `SEGMENT_DOWNLOAD_TIMEOUT_MINS`. Default is 10 minutes. -* New actions are available for granular control over caches - [restore](restore/action.yml) and [save](save/action.yml). -* Support cross-os caching as an opt-in feature. See [Cross OS caching](./tips-and-workarounds.md#cross-os-cache) for more info. -* Added option to fail job on cache miss. See [Exit workflow on cache miss](./restore/README.md#exit-workflow-on-cache-miss) for more info. -* Fix zstd not being used after zstd version upgrade to 1.5.4 on hosted runners -* Added option to lookup cache without downloading it. -* Reduced segment size to 128MB and segment timeout to 10 minutes to fail fast in case the cache download is stuck. - -See the [v2 README.md](https://github.com/actions/cache/blob/v2/README.md) for older updates. +To use this caching solution in your GitHub Actions workflows, you need to add the appropriate configuration to your workflow YAML files. ## Usage -### Pre-requisites - -Create a workflow `.yml` file in your repository's `.github/workflows` directory. An [example workflow](#example-cache-workflow) is available below. For more information, see the GitHub Help Documentation for [Creating a workflow file](https://help.github.com/en/articles/configuring-a-workflow#creating-a-workflow-file). - -If you are using this inside a container, a POSIX-compliant `tar` needs to be included and accessible from the execution path. - -If you are using a `self-hosted` Windows runner, `GNU tar` and `zstd` are required for [Cross-OS caching](https://github.com/actions/cache/blob/main/tips-and-workarounds.md#cross-os-cache) to work. They are also recommended to be installed in general so the performance is on par with `hosted` Windows runners. - -### Inputs - -* `key` - An explicit key for a cache entry. See [creating a cache key](#creating-a-cache-key). -* `path` - A list of files, directories, and wildcard patterns to cache and restore. See [`@actions/glob`](https://github.com/actions/toolkit/tree/main/packages/glob) for supported patterns. -* `restore-keys` - An ordered multiline string listing the prefix-matched keys, that are used for restoring stale cache if no cache hit occurred for key. -* `enableCrossOsArchive` - An optional boolean when enabled, allows Windows runners to save or restore caches that can be restored or saved respectively on other platforms. Default: `false` -* `fail-on-cache-miss` - Fail the workflow if cache entry is not found. Default: `false` -* `lookup-only` - If true, only checks if cache entry exists and skips download. Does not change save cache behavior. Default: `false` - -#### Environment Variables - -* `SEGMENT_DOWNLOAD_TIMEOUT_MINS` - Segment download timeout (in minutes, default `10`) to abort download of the segment if not completed in the defined number of minutes. [Read more](https://github.com/actions/cache/blob/main/tips-and-workarounds.md#cache-segment-restore-timeout) - -### Outputs - -* `cache-hit` - A string value to indicate an exact match was found for the key. - * If there's a cache hit, this will be 'true' or 'false' to indicate if there's an exact match for `key`. - * If there's a cache miss, this will be an empty string. - -See [Skipping steps based on cache-hit](#skipping-steps-based-on-cache-hit) for info on using this output - -### Cache scopes - -The cache is scoped to the key, [version](#cache-version), and branch. The default branch cache is available to other branches. - -See [Matching a cache key](https://help.github.com/en/actions/configuring-and-managing-workflows/caching-dependencies-to-speed-up-workflows#matching-a-cache-key) for more info. - -### Example cache workflow - -#### Restoring and saving cache using a single action +Here's an example of how to use this caching solution in a GitHub Actions workflow: ```yaml -name: Caching Primes +name: CI -on: push +on: [push, pull_request] jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v2 - - name: Cache Primes - id: cache-primes - uses: actions/cache@v4 + - name: Set up Node.js + uses: actions/setup-node@v2 with: - path: prime-numbers - key: ${{ runner.os }}-primes + node-version: '14' - - name: Generate Prime Numbers - if: steps.cache-primes.outputs.cache-hit != 'true' - run: /generate-primes.sh -d prime-numbers - - - name: Use Prime Numbers - run: /primes.sh -d prime-numbers -``` - -The `cache` action provides a `cache-hit` output which is set to `true` when the cache is restored using the primary `key` and `false` when the cache is restored using `restore-keys` or no cache is restored. - -#### Using a combination of restore and save actions - -```yaml -name: Caching Primes - -on: push - -jobs: - build: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - - name: Restore cached Primes - id: cache-primes-restore - uses: actions/cache/restore@v4 + - name: Cache dependencies + uses: actions/cache@v2 with: - path: | - path/to/dependencies - some/other/dependencies - key: ${{ runner.os }}-primes - . - . //intermediate workflow steps - . - - name: Save Primes - id: cache-primes-save - uses: actions/cache/save@v4 - with: - path: | - path/to/dependencies - some/other/dependencies - key: ${{ steps.cache-primes-restore.outputs.cache-primary-key }} -``` + path: ~/.npm + key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }} + restore-keys: | + ${{ runner.os }}-node- -> **Note** -> You must use the `cache` or `restore` action in your workflow before you need to use the files that might be restored from the cache. If the provided `key` matches an existing cache, a new cache is not created and if the provided `key` doesn't match an existing cache, a new cache is automatically created provided the job completes successfully. - -## Caching Strategies - -With the introduction of the `restore` and `save` actions, a lot of caching use cases can now be achieved. Please see the [caching strategies](./caching-strategies.md) document for understanding how you can use the actions strategically to achieve the desired goal. - -## Implementation Examples - -Every programming language and framework has its own way of caching. - -See [Examples](examples.md) for a list of `actions/cache` implementations for use with: - -* [Bun](./examples.md#bun) -* [C# - NuGet](./examples.md#c---nuget) -* [Clojure - Lein Deps](./examples.md#clojure---lein-deps) -* [D - DUB](./examples.md#d---dub) -* [Deno](./examples.md#deno) -* [Elixir - Mix](./examples.md#elixir---mix) -* [Go - Modules](./examples.md#go---modules) -* [Haskell - Cabal](./examples.md#haskell---cabal) -* [Haskell - Stack](./examples.md#haskell---stack) -* [Java - Gradle](./examples.md#java---gradle) -* [Java - Maven](./examples.md#java---maven) -* [Node - npm](./examples.md#node---npm) -* [Node - Lerna](./examples.md#node---lerna) -* [Node - Yarn](./examples.md#node---yarn) -* [OCaml/Reason - esy](./examples.md#ocamlreason---esy) -* [PHP - Composer](./examples.md#php---composer) -* [Python - pip](./examples.md#python---pip) -* [Python - pipenv](./examples.md#python---pipenv) -* [R - renv](./examples.md#r---renv) -* [Ruby - Bundler](./examples.md#ruby---bundler) -* [Rust - Cargo](./examples.md#rust---cargo) -* [Scala - SBT](./examples.md#scala---sbt) -* [Swift, Objective-C - Carthage](./examples.md#swift-objective-c---carthage) -* [Swift, Objective-C - CocoaPods](./examples.md#swift-objective-c---cocoapods) -* [Swift - Swift Package Manager](./examples.md#swift---swift-package-manager) -* [Swift - Mint](./examples.md#swift---mint) - -## Creating a cache key - -A cache key can include any of the contexts, functions, literals, and operators supported by GitHub Actions. - -For example, using the [`hashFiles`](https://docs.github.com/en/actions/learn-github-actions/expressions#hashfiles) function allows you to create a new cache when dependencies change. - -```yaml - - uses: actions/cache@v4 - with: - path: | - path/to/dependencies - some/other/dependencies - key: ${{ runner.os }}-${{ hashFiles('**/lockfiles') }} -``` - -Additionally, you can use arbitrary command output in a cache key, such as a date or software version: - -```yaml - # http://man7.org/linux/man-pages/man1/date.1.html - - name: Get Date - id: get-date - run: | - echo "date=$(/bin/date -u "+%Y%m%d")" >> $GITHUB_OUTPUT - shell: bash - - - uses: actions/cache@v4 - with: - path: path/to/dependencies - key: ${{ runner.os }}-${{ steps.get-date.outputs.date }}-${{ hashFiles('**/lockfiles') }} -``` - -See [Using contexts to create cache keys](https://help.github.com/en/actions/configuring-and-managing-workflows/caching-dependencies-to-speed-up-workflows#using-contexts-to-create-cache-keys) - -## Cache Limits - -A repository can have up to 10GB of caches. Once the 10GB limit is reached, older caches will be evicted based on when the cache was last accessed. Caches that are not accessed within the last week will also be evicted. - -## Skipping steps based on cache-hit - -Using the `cache-hit` output, subsequent steps (such as install or build) can be skipped when a cache hit occurs on the key. It is recommended to install missing/updated dependencies in case of a partial key match when the key is dependent on the `hash` of the package file. - -Example: - -```yaml -steps: - - uses: actions/checkout@v4 - - - uses: actions/cache@v4 - id: cache - with: - path: path/to/dependencies - key: ${{ runner.os }}-${{ hashFiles('**/lockfiles') }} - - - name: Install Dependencies - if: steps.cache.outputs.cache-hit != 'true' - run: /install.sh -``` - -> **Note** The `id` defined in `actions/cache` must match the `id` in the `if` statement (i.e. `steps.[ID].outputs.cache-hit`) - -## Cache Version - -Cache version is a hash [generated](https://github.com/actions/toolkit/blob/500d0b42fee2552ae9eeb5933091fe2fbf14e72d/packages/cache/src/internal/cacheHttpClient.ts#L73-L90) for a combination of compression tool used (Gzip, Zstd, etc. based on the runner OS) and the `path` of directories being cached. If two caches have different versions, they are identified as unique caches while matching. This, for example, means that a cache created on a `windows-latest` runner can't be restored on `ubuntu-latest` as cache `Version`s are different. - -> Pro tip: The [list caches](https://docs.github.com/en/rest/actions/cache#list-github-actions-caches-for-a-repository) API can be used to get the version of a cache. This can be helpful to troubleshoot cache miss due to version. - -
- Example -The workflow will create 3 unique caches with same keys. Ubuntu and windows runners will use different compression technique and hence create two different caches. And `build-linux` will create two different caches as the `paths` are different. - -```yaml -jobs: - build-linux: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Cache Primes - id: cache-primes - uses: actions/cache@v4 - with: - path: prime-numbers - key: primes - - - name: Generate Prime Numbers - if: steps.cache-primes.outputs.cache-hit != 'true' - run: ./generate-primes.sh -d prime-numbers - - - name: Cache Numbers - id: cache-numbers - uses: actions/cache@v4 - with: - path: numbers - key: primes - - - name: Generate Numbers - if: steps.cache-numbers.outputs.cache-hit != 'true' - run: ./generate-primes.sh -d numbers - - build-windows: - runs-on: windows-latest - steps: - - uses: actions/checkout@v4 - - - name: Cache Primes - id: cache-primes - uses: actions/cache@v4 - with: - path: prime-numbers - key: primes - - - name: Generate Prime Numbers - if: steps.cache-primes.outputs.cache-hit != 'true' - run: ./generate-primes -d prime-numbers -``` - -
- -## Known practices and workarounds - -There are a number of community practices/workarounds to fulfill specific requirements. You may choose to use them if they suit your use case. Note these are not necessarily the only solution or even a recommended solution. - -* [Cache segment restore timeout](./tips-and-workarounds.md#cache-segment-restore-timeout) -* [Update a cache](./tips-and-workarounds.md#update-a-cache) -* [Use cache across feature branches](./tips-and-workarounds.md#use-cache-across-feature-branches) -* [Cross OS cache](./tips-and-workarounds.md#cross-os-cache) -* [Force deletion of caches overriding default cache eviction policy](./tips-and-workarounds.md#force-deletion-of-caches-overriding-default-cache-eviction-policy) - -### Windows environment variables - -Please note that Windows environment variables (like `%LocalAppData%`) will NOT be expanded by this action. Instead, prefer using `~` in your paths which will expand to the HOME directory. For example, instead of `%LocalAppData%`, use `~\AppData\Local`. For a list of supported default environment variables, see the [Learn GitHub Actions: Variables](https://docs.github.com/en/actions/learn-github-actions/variables#default-environment-variables) page. - -## Contributing - -We would love for you to contribute to `actions/cache`. Pull requests are welcome! Please see the [CONTRIBUTING.md](CONTRIBUTING.md) for more information. - -## License - -The scripts and documentation in this project are released under the [MIT License](LICENSE) + - run: npm install + - run: npm run build diff --git a/Windy b/Windy new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/Windy @@ -0,0 +1 @@ + diff --git a/action.yml b/action.yml index 7af7458..39e8cce 100644 --- a/action.yml +++ b/action.yml @@ -1,6 +1,6 @@ -name: 'Cache' +name: 'air_40' description: 'Cache artifacts like dependencies and build outputs to improve workflow execution time' -author: 'GitHub' +author: 'Shannon Fletcher' inputs: path: description: 'A list of files, directories, and wildcard patterns to cache and restore' diff --git a/bquxjob_110c1a0c_195e84a97dd.json b/bquxjob_110c1a0c_195e84a97dd.json new file mode 100644 index 0000000..3f66acf --- /dev/null +++ b/bquxjob_110c1a0c_195e84a97dd.json @@ -0,0 +1,70213 @@ +[{ + "id": "3987", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "CTEs", + "sql_complexity_description": "common table expressions", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records in the \u0027subsidies\u0027 table where amount is greater than 50 for the year 2022", + "sql_context": "CREATE TABLE subsidies ( year INT, country VARCHAR(20), amount FLOAT ); INSERT INTO subsidies (year, country, amount) VALUES (2022, \u0027US\u0027, 60.5), (2022, \u0027Canada\u0027, 45.3), (2021, \u0027Mexico\u0027, 30.9);", + "sql": "WITH cte AS (DELETE FROM subsidies WHERE year \u003d 2022 AND amount \u003e 50) DELETE FROM cte;", + "sql_explanation": "A CTE is created to delete records from the \"subsidies\" table where the year is 2022 and the amount is greater than 50. The DELETE operation is then executed on the CTE." +}, { + "id": "3357", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "CTEs", + "sql_complexity_description": "common table expressions", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the \u0027battery_range\u0027 column in the \u0027electric_vehicle_stats\u0027 table to 300 for the record with id 2", + "sql_context": "CREATE TABLE electric_vehicle_stats (id INT, make TEXT, model TEXT, battery_range INT); INSERT INTO electric_vehicle_stats (id, make, model, battery_range) VALUES (1, \u0027Tesla\u0027, \u0027Model 3\u0027, 263), (2, \u0027Chevrolet\u0027, \u0027Bolt\u0027, 259), (3, \u0027Nissan\u0027, \u0027Leaf\u0027, 226);", + "sql": "WITH cte AS (UPDATE electric_vehicle_stats SET battery_range \u003d 300 WHERE id \u003d 2) SELECT * FROM cte;", + "sql_explanation": "This query creates a CTE that updates the \u0027battery_range\u0027 column in the \u0027electric_vehicle_stats\u0027 table to 300 for the record with id 2. It then selects all columns from the CTE, which returns the updated record." +}, { + "id": "14", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "CTEs", + "sql_complexity_description": "common table expressions", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the description of a sustainable urbanism initiative in the sustainable_urbanism_initiatives table", + "sql_context": "CREATE TABLE public.sustainable_urbanism_initiatives (id SERIAL PRIMARY KEY, initiative_name VARCHAR(255), initiative_description TEXT, city_name VARCHAR(255), state_name VARCHAR(255)); INSERT INTO public.sustainable_urbanism_initiatives (initiative_name, initiative_description, city_name, state_name) VALUES (\u0027Green City Program\u0027, \u0027Promotes green spaces and sustainable transportation options in urban areas\u0027, \u0027Portland\u0027, \u0027Oregon\u0027), (\u0027EcoDistricts Initiative\u0027, \u0027Encourages sustainable community development in city neighborhoods\u0027, \u0027Vancouver\u0027, \u0027British Columbia\u0027);", + "sql": "WITH updated_initiative AS (UPDATE public.sustainable_urbanism_initiatives SET initiative_description \u003d \u0027Promotes green spaces, sustainable transportation, and energy-efficient buildings in urban areas\u0027 WHERE initiative_name \u003d \u0027Green City Program\u0027 RETURNING *) INSERT INTO public.sustainable_urbanism_initiatives (initiative_name, initiative_description, city_name, state_name) SELECT initiative_name, initiative_description, city_name, state_name FROM updated_initiative;", + "sql_explanation": "The SQL query first updates the \u0027initiative_description\u0027 of the \u0027Green City Program\u0027 initiative in the \u0027sustainable_urbanism_initiatives\u0027 table to a more detailed description. The UPDATE statement uses a RETURNING clause to capture the updated record in a CTE named \u0027updated_initiative\u0027. The INSERT statement then inserts the updated record back into the \u0027sustainable_urbanism_initiatives\u0027 table." +}, { + "id": "65", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "CTEs", + "sql_complexity_description": "common table expressions", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the property owner\u0027s email address in the properties table", + "sql_context": "CREATE TABLE public.properties (id SERIAL PRIMARY KEY, property_address VARCHAR(255), property_owner_id INTEGER, property_owner_email VARCHAR(255)); INSERT INTO public.properties (property_address, property_owner_id, property_owner_email) VALUES (\u0027123 Main St\u0027, 1, \u0027john.smith@example.com\u0027), (\u0027456 Elm St\u0027, 2, \u0027jane.doe@example.com\u0027), (\u0027789 Oak St\u0027, 3, \u0027mary.major@example.com\u0027);", + "sql": "WITH updated_email AS (UPDATE public.properties SET property_owner_email \u003d \u0027mary.major@newemail.com\u0027 WHERE property_address \u003d \u0027789 Oak St\u0027 RETURNING *) INSERT INTO public.properties (property_address, property_owner_id, property_owner_email) SELECT property_address, property_owner_id, property_owner_email FROM updated_email;", + "sql_explanation": "The SQL query first updates the \u0027property_owner_email\u0027 of the property with address \u0027789 Oak St\u0027 in the \u0027properties\u0027 table to \u0027mary.major@newemail.com\u0027. The UPDATE statement uses a RETURNING clause to capture the updated record in a CTE named \u0027updated_email\u0027. The INSERT statement then inserts the updated record back into the \u0027properties\u0027 table." +}, { + "id": "2278", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "CTEs", + "sql_complexity_description": "common table expressions", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete the \u0027Ocean Pollution Prevention\u0027 effort from the \u0027conservation_efforts\u0027 table", + "sql_context": "CREATE TABLE conservation_efforts (id INT PRIMARY KEY, name VARCHAR(255), description TEXT, year INT);", + "sql": "WITH deleted_effort AS (DELETE FROM conservation_efforts WHERE name \u003d \u0027Ocean Pollution Prevention\u0027) SELECT * FROM deleted_effort;", + "sql_explanation": "Delete the \u0027Ocean Pollution Prevention\u0027 effort from the \"conservation_efforts\" table using a Common Table Expression (CTE) for better readability. The CTE is then used to select all columns from the deleted record, which will return an empty result set since we\u0027ve deleted the record." +}, { + "id": "2353", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "CTEs", + "sql_complexity_description": "common table expressions", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the average depth of the \u0027Arctic Ocean\u0027 in the \u0027oceanography\u0027 table", + "sql_context": "CREATE TABLE oceanography (id INT PRIMARY KEY, name VARCHAR(255), average_depth FLOAT, area FLOAT, volume FLOAT);", + "sql": "WITH updated_arctic AS (UPDATE oceanography SET average_depth \u003d 1205 WHERE name \u003d \u0027Arctic Ocean\u0027) SELECT * FROM updated_arctic;", + "sql_explanation": "Update the average depth for the \u0027Arctic Ocean\u0027 in the \"oceanography\" table using a Common Table Expression (CTE) for better readability. The average depth is updated to 1205 meters. The CTE is then used to select all columns from the updated record." +}, { + "id": "2143", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "CTEs", + "sql_complexity_description": "common table expressions", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the port name for port ID 12 in the \"ports\" table", + "sql_context": "CREATE TABLE ports (id INT PRIMARY KEY, name VARCHAR(255), location VARCHAR(255));", + "sql": "WITH updated_port AS (UPDATE ports SET name \u003d \u0027New Port Name\u0027 WHERE id \u003d 12 RETURNING id, name, location) SELECT * FROM updated_port;", + "sql_explanation": "This query updates the name of the port with ID 12 in the \"ports\" table, and then returns the updated record using a common table expression." +}, { + "id": "2008", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "CTEs", + "sql_complexity_description": "common table expressions", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the temperature of the record from 2011 to -18.5", + "sql_context": "CREATE TABLE climate (id INT PRIMARY KEY, year INT, temperature FLOAT, precipitation FLOAT, location VARCHAR(100));", + "sql": "WITH upd AS (UPDATE climate SET temperature \u003d -18.5 WHERE year \u003d 2011) SELECT id, year, temperature, precipitation, location FROM climate;", + "sql_explanation": "This query updates the temperature of the record from 2011 to -18.5. It uses a Common Table Expression (CTE) to perform the update operation." +}, { + "id": "2624", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "CTEs", + "sql_complexity_description": "common table expressions", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records from the \"game_servers\" table where the \"server_region\" is not in (\u0027EU\u0027, \u0027NA\u0027, \u0027ASIA\u0027)", + "sql_context": "CREATE TABLE game_servers (server_id INT, server_name VARCHAR(50), server_region VARCHAR(50));", + "sql": "WITH cte AS (VALUES (\u0027EU\u0027), (\u0027NA\u0027), (\u0027ASIA\u0027)) DELETE FROM game_servers WHERE server_region NOT IN (SELECT * FROM cte);", + "sql_explanation": "The SQL query uses a CTE to specify the allowed values for the \"server_region\" field (\u0027EU\u0027, \u0027NA\u0027, and \u0027ASIA\u0027). The DELETE statement is then used to delete records from the \"game_servers\" table where the \"server_region\" is not in the allowed values." +}, { + "id": "2776", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "CTEs", + "sql_complexity_description": "common table expressions", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete a publication record from the \"publications\" table", + "sql_context": "CREATE TABLE publications (id INT PRIMARY KEY, title VARCHAR(100), author VARCHAR(50), journal VARCHAR(50), publication_date DATE);", + "sql": "WITH deleted_publication AS (DELETE FROM publications WHERE id \u003d 1 RETURNING *) SELECT * FROM deleted_publication;", + "sql_explanation": "This SQL query deletes the publication record with id 1 from the \"publications\" table. It uses a common table expression (CTE) to perform the delete operation and returns the deleted record." +}, { + "id": "3221", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "CTEs", + "sql_complexity_description": "common table expressions", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete a graduate student record from the \"students\" table", + "sql_context": "CREATE TABLE students (id INT PRIMARY KEY, first_name VARCHAR(50), last_name VARCHAR(50), department VARCHAR(50), email VARCHAR(50));", + "sql": "WITH deleted_student AS (DELETE FROM students WHERE id \u003d 1 RETURNING *) SELECT * FROM deleted_student;", + "sql_explanation": "This SQL query deletes the graduate student record with id 1 from the \"students\" table. It uses a common table expression (CTE) to perform the delete operation and returns the deleted record." +}, { + "id": "399", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "CTEs", + "sql_complexity_description": "common table expressions", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update records in recycling_rates table where region is \u0027Asia\u0027 and year is 2019", + "sql_context": "CREATE TABLE recycling_rates (id INT, region VARCHAR(20), year INT, rate DECIMAL(5,2));", + "sql": "WITH data_to_update AS (UPDATE recycling_rates SET rate \u003d rate * 1.10 WHERE region \u003d \u0027Asia\u0027 AND year \u003d 2019 RETURNING *) UPDATE recycling_rates SET rate \u003d (SELECT rate FROM data_to_update) WHERE id IN (SELECT id FROM data_to_update);", + "sql_explanation": "This command updates records in the recycling_rates table where region is \u0027Asia\u0027 and year is 2019. It first creates a CTE (Common Table Expression) called data_to_update that contains the updated records. It then updates the records in the recycling_rates table by matching their IDs with the IDs in the data_to_update CTE." +}, { + "id": "939", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "CTEs", + "sql_complexity_description": "common table expressions", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records in waste_generation table where waste_type is \u0027Organic\u0027 and year is 2020", + "sql_context": "CREATE TABLE waste_generation (id INT, waste_type VARCHAR(20), year INT, quantity INT);", + "sql": "WITH data_to_delete AS (DELETE FROM waste_generation WHERE waste_type \u003d \u0027Organic\u0027 AND year \u003d 2020 RETURNING *) DELETE FROM waste_generation WHERE id IN (SELECT id FROM data_to_delete);", + "sql_explanation": "This command deletes records from the waste_generation table where waste_type is \u0027Organic\u0027 and year is 2020. It first creates a CTE (Common Table Expression) called data_to_delete that contains the records to be deleted. It then deletes the records from waste_generation table by matching their IDs with the IDs in the data_to_delete CTE." +}, { + "id": "858", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "CTEs", + "sql_complexity_description": "common table expressions", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete a record from the \"PublicServices\" table based on the provided criteria", + "sql_context": "CREATE TABLE PublicServices (ID INT, Service TEXT, Description TEXT, Availability TEXT);", + "sql": "WITH service_to_delete AS (DELETE FROM PublicServices WHERE ID \u003d 4001 AND Service \u003d \u0027Senior Transportation\u0027 RETURNING ID, Service, Description, Availability) SELECT * FROM service_to_delete;", + "sql_explanation": "* Step 1: Create a CTE called \"service_to_delete\" to delete a record from the \"PublicServices\" table based on the provided criteria.* * Step 2: Use the RETURNING clause to get the deleted records.*" +}, { + "id": "1516", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "CTEs", + "sql_complexity_description": "common table expressions", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete IoT sensor metrics for sensor_id 13 before \u00272022-04-01 06:00:00\u0027", + "sql_context": "CREATE TABLE iot_sensor_metrics (sensor_id INT, value INT, timestamp TIMESTAMP); INSERT INTO iot_sensor_metrics (sensor_id, value, timestamp) VALUES (13, 900, \u00272022-04-01 05:00:00\u0027), (13, 950, \u00272022-04-01 07:00:00\u0027);", + "sql": "WITH data_to_delete AS (DELETE FROM iot_sensor_metrics WHERE sensor_id \u003d 13 AND timestamp \u003c \u00272022-04-01 06:00:00\u0027 RETURNING *) SELECT * FROM data_to_delete;", + "sql_explanation": "This query creates a CTE named \u0027data_to_delete\u0027 to delete the IoT sensor metric records for sensor_id 13 before the specified timestamp. The RETURNING clause is used to get the deleted records. Finally, the SELECT statement retrieves the deleted records from the CTE." +}, { + "id": "2043", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "CTEs", + "sql_complexity_description": "common table expressions", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Decrease the soil moisture readings by 5% for parcel_id 9", + "sql_context": "CREATE TABLE soil_moisture_data (parcel_id INT, moisture FLOAT, timestamp TIMESTAMP); INSERT INTO soil_moisture_data (parcel_id, moisture, timestamp) VALUES (8, 32.1, \u00272021-01-01 10:00:00\u0027), (9, 40.5, \u00272021-01-01 10:00:00\u0027), (10, 45.3, \u00272021-01-01 10:00:00\u0027);", + "sql": "WITH updated_data AS (UPDATE soil_moisture_data SET moisture \u003d moisture - 5 WHERE parcel_id \u003d 9 RETURNING *) SELECT * FROM updated_data;", + "sql_explanation": "This query first creates a Common Table Expression (CTE) named \u0027updated_data\u0027 which updates the soil moisture by 5% for parcel_id 9. The RETURNING clause is used to get the updated records. Finally, the SELECT statement retrieves the updated records from the CTE." +}, { + "id": "774", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which countries have the highest and lowest media representation scores in Asia?", + "sql_context": "CREATE TABLE media_representation (id INT, user_id INT, country VARCHAR(50), region VARCHAR(50), score INT); INSERT INTO media_representation (id, user_id, country, region, score) VALUES (1, 1, \u0027China\u0027, \u0027Asia\u0027, 80), (2, 2, \u0027Japan\u0027, \u0027Asia\u0027, 85), (3, 3, \u0027India\u0027, \u0027Asia\u0027, 75), (4, 4, \u0027Indonesia\u0027, \u0027Asia\u0027, 70), (5, 5, \u0027Pakistan\u0027, \u0027Asia\u0027, 65), (6, 6, \u0027Bangladesh\u0027, \u0027Asia\u0027, 60);", + "sql": "SELECT country, score FROM media_representation WHERE region \u003d \u0027Asia\u0027 ORDER BY score DESC LIMIT 1; SELECT country, score FROM media_representation WHERE region \u003d \u0027Asia\u0027 ORDER BY score ASC LIMIT 1;", + "sql_explanation": "To find the country with the highest media representation score in Asia, we use the ORDER BY clause to sort the scores in descending order and the LIMIT clause to get the top result. To find the country with the lowest media representation score in Asia, we use the ORDER BY clause to sort the scores in ascending order and the LIMIT clause to get the bottom result." +}, { + "id": "1848", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average word count for articles about underrepresented communities in the past month?", + "sql_context": "CREATE TABLE Articles (article_id INT, title VARCHAR(255), topic VARCHAR(255), word_count INT, publication_date DATE); INSERT INTO Articles (article_id, title, topic, word_count, publication_date) VALUES (1, \u0027Article1\u0027, \u0027underrepresented communities\u0027, 800, \u00272022-05-01\u0027), (2, \u0027Article2\u0027, \u0027media literacy\u0027, 1200, \u00272022-03-15\u0027), (3, \u0027Article3\u0027, \u0027disinformation detection\u0027, 900, \u00272022-04-20\u0027);", + "sql": "SELECT AVG(word_count) FROM Articles WHERE topic \u003d \u0027underrepresented communities\u0027 AND publication_date \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 1 MONTH);", + "sql_explanation": "Calculate the average word count for articles about underrepresented communities in the past month by filtering the rows by topic and publication_date using the WHERE clause, and using the AVG() function with the word_count column." +}, { + "id": "1856", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many media literacy programs were implemented in rural areas in the last year?", + "sql_context": "CREATE TABLE media_literacy_programs (id INT, program_name VARCHAR(255), location VARCHAR(255), start_date DATE, end_date DATE); INSERT INTO media_literacy_programs (id, program_name, location, start_date, end_date) VALUES (1, \u0027Program 1\u0027, \u0027Rural\u0027, \u00272022-03-01\u0027, \u00272023-03-01\u0027);", + "sql": "SELECT COUNT(*) FROM media_literacy_programs WHERE location \u003d \u0027Rural\u0027 AND start_date \u003c\u003d GETDATE() AND end_date \u003e\u003d DATEADD(year, -1, GETDATE());", + "sql_explanation": "This query counts the number of media literacy programs implemented in rural areas in the last year by filtering the \u0027media_literacy_programs\u0027 table for locations in rural areas, start dates within the last year, and end dates within the last year." +}, { + "id": "2181", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the most popular social media platform in Spain in 2022?", + "sql_context": "CREATE TABLE social_media (id INT, platform VARCHAR(50), popularity_index INT, year INT, location VARCHAR(50)); INSERT INTO social_media (id, platform, popularity_index, year, location) VALUES (1, \u0027Facebook\u0027, 85, 2022, \u0027Spain\u0027), (2, \u0027Instagram\u0027, 80, 2022, \u0027Spain\u0027), (3, \u0027Twitter\u0027, 70, 2022, \u0027Spain\u0027);", + "sql": "SELECT platform, popularity_index FROM social_media WHERE location \u003d \u0027Spain\u0027 AND year \u003d 2022 ORDER BY popularity_index DESC LIMIT 1;", + "sql_explanation": "Identify the most popular social media platform in Spain in 2022 by using the ORDER BY function on the popularity_index column in descending order and limiting the output to 1 row." +}, { + "id": "2434", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many news articles were published in France in the last 30 days?", + "sql_context": "CREATE TABLE news_articles (id INT, publication_date DATE, country VARCHAR(20)); INSERT INTO news_articles (id, publication_date, country) VALUES (1, \u00272022-01-01\u0027, \u0027France\u0027), (2, \u00272022-01-15\u0027, \u0027France\u0027), (3, \u00272022-01-30\u0027, \u0027Germany\u0027);", + "sql": "SELECT COUNT(*) FROM news_articles WHERE country \u003d \u0027France\u0027 AND publication_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 30 DAY);", + "sql_explanation": "This query counts the number of news articles published in France in the last 30 days. It filters the news_articles table for the specified country and a publication date within the last 30 days, and then counts the number of rows in the filtered table." +}, { + "id": "2657", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the titles and runtimes of all movies and tv shows in the media table that have a runtime over 120 minutes and were produced in the US or Canada.", + "sql_context": "CREATE TABLE media (id INT, title VARCHAR(50), runtime INT, type VARCHAR(10), country VARCHAR(50));", + "sql": "SELECT title, runtime FROM media WHERE type IN (\u0027movie\u0027, \u0027tv_show\u0027) AND runtime \u003e 120 AND country IN (\u0027US\u0027, \u0027Canada\u0027);", + "sql_explanation": "This query selects the title and runtime columns from the media table where the type is either \u0027movie\u0027 or \u0027tv_show\u0027, the runtime is greater than 120 minutes, and the country is either US or Canada. The IN operator is used to specify multiple values for the type and country columns." +}, { + "id": "2720", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the titles and runtimes of all TV shows in the media table that have a runtime over 45 minutes and were produced in Africa or South America.", + "sql_context": "CREATE TABLE media (id INT, title VARCHAR(50), runtime INT, type VARCHAR(10), country VARCHAR(50));", + "sql": "SELECT title, runtime FROM media WHERE type \u003d \u0027tv_show\u0027 AND runtime \u003e 45 AND country IN (\u0027Africa\u0027, \u0027South America\u0027);", + "sql_explanation": "This query selects the title and runtime columns from the media table where the type is \u0027tv_show\u0027, the runtime is greater than 45 minutes, and the country is either Africa or South America. The IN operator is used to specify multiple values for the country column." +}, { + "id": "2944", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many news articles were published in France in the last month?", + "sql_context": "CREATE TABLE news_articles (id INT, title TEXT, publish_date DATE, country TEXT); INSERT INTO news_articles (id, title, publish_date, country) VALUES (1, \u0027Article 1\u0027, \u00272022-03-01\u0027, \u0027France\u0027); INSERT INTO news_articles (id, title, publish_date, country) VALUES (2, \u0027Article 2\u0027, \u00272022-03-15\u0027, \u0027France\u0027);", + "sql": "SELECT COUNT(*) FROM news_articles WHERE country \u003d \u0027France\u0027 AND publish_date \u003e\u003d DATEADD(month, -1, GETDATE());", + "sql_explanation": "This SQL query counts the number of news articles published in France in the last month by using the COUNT function and filtering for articles published in France in the last month using the WHERE clause, with the help of the DATEADD function to get the current date minus one month." +}, { + "id": "2958", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of views of videos about \u0027climate change\u0027 published by the \u0027BBC\u0027 in 2021?", + "sql_context": "CREATE TABLE videos (title VARCHAR(255), views INT, publication_year INT, topic VARCHAR(50), channel VARCHAR(50)); INSERT INTO videos (title, views, publication_year, topic, channel) VALUES (\u0027Climate change and politics\u0027, 12000, 2021, \u0027climate change\u0027, \u0027BBC\u0027), (\u0027Climate change and economy\u0027, 15000, 2021, \u0027climate change\u0027, \u0027BBC\u0027), (\u0027Climate change and technology\u0027, 18000, 2021, \u0027climate change\u0027, \u0027BBC\u0027), (\u0027Climate change and society\u0027, 20000, 2021, \u0027climate change\u0027, \u0027BBC\u0027), (\u0027Climate change and science\u0027, 25000, 2021, \u0027climate change\u0027, \u0027BBC\u0027);", + "sql": "SELECT AVG(views) FROM videos WHERE publication_year \u003d 2021 AND channel \u003d \u0027BBC\u0027 AND topic \u003d \u0027climate change\u0027;", + "sql_explanation": "We calculate the average number of views of videos about \u0027climate change\u0027 published by the \u0027BBC\u0027 in 2021 by selecting the AVG function on the views column, filtering the records by publication_year set to 2021, channel set to \u0027BBC\u0027, and topic set to \u0027climate change\u0027." +}, { + "id": "3021", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total watch time for videos about indigenous culture produced in the last 3 years?", + "sql_context": "CREATE TABLE videos (id INT, title TEXT, release_year INT, watch_time INT, culture TEXT); INSERT INTO videos (id, title, release_year, watch_time, culture) VALUES (1, \u0027Video1\u0027, 2020, 15000, \u0027Indigenous\u0027); INSERT INTO videos (id, title, release_year, watch_time, culture) VALUES (2, \u0027Video2\u0027, 2021, 12000, \u0027Indigenous\u0027);", + "sql": "SELECT SUM(watch_time) FROM videos WHERE release_year \u003e\u003d YEAR(CURRENT_DATE) - 3 AND culture \u003d \u0027Indigenous\u0027;", + "sql_explanation": "The SQL query calculates the total watch time for videos about indigenous culture produced in the last 3 years. It filters videos based on the release year and culture using the WHERE clause. Then, it calculates the sum of watch time using the SUM function." +}, { + "id": "3791", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum rating of Bollywood movies produced between 2010 and 2015?", + "sql_context": "CREATE TABLE movies (id INT, title TEXT, region TEXT, year INT, rating FLOAT); INSERT INTO movies (id, title, region, year, rating) VALUES (1, \u0027MovieA\u0027, \u0027Bollywood\u0027, 2010, 7.5), (2, \u0027MovieB\u0027, \u0027Bollywood\u0027, 2012, 8.8), (3, \u0027MovieC\u0027, \u0027Hollywood\u0027, 2014, 9.2);", + "sql": "SELECT MAX(rating) FROM movies WHERE region \u003d \u0027Bollywood\u0027 AND year BETWEEN 2010 AND 2015;", + "sql_explanation": "The query calculates the maximum rating of Bollywood movies produced between 2010 and 2015 by filtering the movies table for rows where the region is \u0027Bollywood\u0027 and the year is between 2010 and 2015, and then applying the MAX function to the rating column." +}, { + "id": "3911", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum rating of TV shows produced in Japan and released before 2010?", + "sql_context": "CREATE TABLE tv_shows_jp (id INT, title VARCHAR(100), rating FLOAT, production_year INT, country VARCHAR(50)); INSERT INTO tv_shows_jp (id, title, rating, production_year, country) VALUES (1, \u0027TVShow1\u0027, 7.5, 2005, \u0027Japan\u0027), (2, \u0027TVShow2\u0027, 8.2, 2008, \u0027Japan\u0027), (3, \u0027TVShow3\u0027, 6.9, 2012, \u0027Japan\u0027);", + "sql": "SELECT MIN(rating) FROM tv_shows_jp WHERE production_year \u003c 2010 AND country \u003d \u0027Japan\u0027;", + "sql_explanation": "Calculate the minimum rating of TV shows produced in Japan and released before 2010." +}, { + "id": "3985", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records related to disinformation detection in Europe.", + "sql_context": "CREATE TABLE news_data (id INT, country VARCHAR(50), date DATE, disinformation_detected BOOLEAN); INSERT INTO news_data (id, country, date, disinformation_detected) VALUES (1, \u0027France\u0027, \u00272022-01-01\u0027, true), (2, \u0027Germany\u0027, \u00272022-02-01\u0027, false);", + "sql": "DELETE FROM news_data WHERE disinformation_detected \u003d true AND country LIKE \u0027Europe%\u0027;", + "sql_explanation": "This query deletes all records where disinformation was detected in Europe from the news_data table." +}, { + "id": "4359", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total revenue for the \u0027comedy\u0027 genre movies released in 2020?", + "sql_context": "CREATE TABLE movies (id INT, title VARCHAR(100), genre VARCHAR(50), release_year INT, revenue INT); INSERT INTO movies (id, title, genre, release_year, revenue) VALUES (1, \u0027Movie1\u0027, \u0027Comedy\u0027, 2020, 5000000); INSERT INTO movies (id, title, genre, release_year, revenue) VALUES (2, \u0027Movie2\u0027, \u0027Comedy\u0027, 2020, 7000000);", + "sql": "SELECT SUM(revenue) FROM movies WHERE genre \u003d \u0027Comedy\u0027 AND release_year \u003d 2020;", + "sql_explanation": "This query calculates the total revenue for the \u0027comedy\u0027 genre movies released in 2020 by summing up the revenue column values where genre is \u0027Comedy\u0027 and release_year is 2020." +}, { + "id": "4464", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add a new character to the \u0027characters\u0027 table with the name \u0027New Character\u0027, show id 4", + "sql_context": "CREATE TABLE characters (id INT, name TEXT, show_id INT);", + "sql": "INSERT INTO characters (id, name, show_id) VALUES (null, \u0027New Character\u0027, 4);", + "sql_explanation": "This SQL query adds a new character to the \u0027characters\u0027 table with the name \u0027New Character\u0027 and show id 4. It does this by using the INSERT INTO statement, followed by the table name, and a list of columns to insert data into. A VALUES keyword is added, followed by a list of values in parentheses to insert into the columns. The id column is set to null, allowing the database to automatically assign a value." +}, { + "id": "4640", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the names of all tables and views related to media literacy programs, along with their creation dates.", + "sql_context": "CREATE TABLE programs (name VARCHAR(255), category VARCHAR(255), created_date DATE); INSERT INTO programs (name, category, created_date) VALUES (\u0027Media Literacy 101\u0027, \u0027Media Literacy\u0027, \u00272021-05-01\u0027), (\u0027Critical Thinking\u0027, \u0027Media Literacy\u0027, \u00272020-08-15\u0027);", + "sql": "SELECT name, created_date FROM programs WHERE category \u003d \u0027Media Literacy\u0027;", + "sql_explanation": "The query lists the names and creation dates of all tables and views related to media literacy programs. It filters the data by category, selecting only the rows where the category is \u0027Media Literacy\u0027." +}, { + "id": "4970", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total duration of all media contents produced in 2021?", + "sql_context": "CREATE TABLE media_contents (content_id INTEGER, title VARCHAR(255), duration INTEGER, release_year INTEGER); INSERT INTO media_contents (content_id, title, duration, release_year) VALUES (1, \u0027Content1\u0027, 120, 2021), (2, \u0027Content2\u0027, 90, 2020), (3, \u0027Content3\u0027, 150, 2021), (4, \u0027Content4\u0027, 100, 2019), (5, \u0027Content5\u0027, 110, 2021), (6, \u0027Content6\u0027, 130, 2018), (7, \u0027Content7\u0027, 80, 2021);", + "sql": "SELECT SUM(duration) FROM media_contents WHERE release_year \u003d 2021;", + "sql_explanation": "This query calculates the total duration of all media contents produced in 2021 by filtering the media_contents table for rows where the release_year is 2021 and then computing the sum of the duration column." +}, { + "id": "5041", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total watch time of educational videos in minutes?", + "sql_context": "CREATE TABLE videos (id INT, title VARCHAR(255), category VARCHAR(50), watch_time INT); INSERT INTO videos (id, title, category, watch_time) VALUES (1, \u0027Video1\u0027, \u0027Educational\u0027, 60), (2, \u0027Video2\u0027, \u0027Entertainment\u0027, 120), (3, \u0027Video3\u0027, \u0027Educational\u0027, 90);", + "sql": "SELECT SUM(watch_time) FROM videos WHERE category \u003d \u0027Educational\u0027;", + "sql_explanation": "This SQL query calculates the total watch time of educational videos by using the SUM function on the watch_time column, and filtering rows by category." +}, { + "id": "5084", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average rating of articles published in the \u0027Entertainment\u0027 section?", + "sql_context": "CREATE TABLE articles (id INT, title VARCHAR(100), section VARCHAR(50), rating DECIMAL(3,2)); INSERT INTO articles (id, title, section, rating) VALUES (1, \u0027Article1\u0027, \u0027Entertainment\u0027, 4.5); INSERT INTO articles (id, title, section, rating) VALUES (2, \u0027Article2\u0027, \u0027Politics\u0027, 3.2); INSERT INTO articles (id, title, section, rating) VALUES (3, \u0027Article3\u0027, \u0027Entertainment\u0027, 4.7);", + "sql": "SELECT AVG(rating) FROM articles WHERE section \u003d \u0027Entertainment\u0027;", + "sql_explanation": "This query calculates the average rating of articles published in the \u0027Entertainment\u0027 section by averaging the rating column values where section is \u0027Entertainment\u0027." +}, { + "id": "5123", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average budget of movies produced in the USA?", + "sql_context": "CREATE TABLE movies (id INT, title TEXT, budget INT, production_country TEXT); INSERT INTO movies (id, title, budget, production_country) VALUES (1, \u0027Movie1\u0027, 5000000, \u0027USA\u0027), (2, \u0027Movie2\u0027, 10000000, \u0027Canada\u0027), (3, \u0027Movie3\u0027, 7000000, \u0027USA\u0027);", + "sql": "SELECT AVG(budget) FROM movies WHERE production_country \u003d \u0027USA\u0027;", + "sql_explanation": "This SQL query calculates the average budget of movies produced in the USA. It does this by selecting all rows where the production_country is USA, then calculating the average (AVG) of the budget column for those rows." +}, { + "id": "5131", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average release year of movies produced in France.", + "sql_context": "CREATE TABLE movies_2 (id INT, title TEXT, release_year INT, country TEXT); INSERT INTO movies_2 (id, title, release_year, country) VALUES (1, \u0027Movie1\u0027, 2016, \u0027France\u0027), (2, \u0027Movie2\u0027, 2019, \u0027Germany\u0027);", + "sql": "SELECT AVG(release_year) FROM movies_2 WHERE country \u003d \u0027France\u0027;", + "sql_explanation": "The SQL query calculates the average release year by filtering the movies_2 table for movies produced in France, and then applying the AVG function to the release_year column." +}, { + "id": "5145", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the earliest publication date of articles in the \u0027Tech\u0027 category?", + "sql_context": "CREATE TABLE articles_tech (id INT, title TEXT, category TEXT, pub_date DATE); INSERT INTO articles_tech (id, title, category, pub_date) VALUES (1, \u0027Article1\u0027, \u0027Tech\u0027, \u00272022-01-01\u0027), (2, \u0027Article2\u0027, \u0027Tech\u0027, \u00272022-01-10\u0027);", + "sql": "SELECT MIN(pub_date) FROM articles_tech WHERE category \u003d \u0027Tech\u0027;", + "sql_explanation": "The SQL query calculates the earliest publication date of articles in the \u0027Tech\u0027 category by filtering the \u0027category\u0027 column with the value \u0027Tech\u0027 and then using the MIN() function over the \u0027pub_date\u0027 column." +}, { + "id": "5359", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average word count for articles in the \u0027Culture\u0027 category?", + "sql_context": "CREATE TABLE categories (id INT, name TEXT); INSERT INTO categories (id, name) VALUES (1, \u0027Culture\u0027), (2, \u0027Science\u0027), (3, \u0027Politics\u0027); CREATE TABLE articles (id INT, category_id INT, word_count INT); INSERT INTO articles (id, category_id, word_count) VALUES (1, 1, 800), (2, 2, 1200), (3, 1, 900);", + "sql": "SELECT AVG(word_count) FROM articles WHERE category_id \u003d 1;", + "sql_explanation": "This query calculates the average word count for articles in the \u0027Culture\u0027 category by selecting all records with category_id 1 and computing the average of their word_count column." +}, { + "id": "5438", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names of the movies and their genres for movies produced in Germany?", + "sql_context": "CREATE TABLE movie (id INT, title VARCHAR(255), genre VARCHAR(255), country VARCHAR(255)); INSERT INTO movie (id, title, genre, country) VALUES (1, \u0027Movie1\u0027, \u0027Comedy\u0027, \u0027Spain\u0027), (2, \u0027Movie2\u0027, \u0027Drama\u0027, \u0027France\u0027), (3, \u0027Movie3\u0027, \u0027Action\u0027, \u0027Germany\u0027), (4, \u0027Movie4\u0027, \u0027Adventure\u0027, \u0027Germany\u0027);", + "sql": "SELECT title, genre FROM movie WHERE country \u003d \u0027Germany\u0027;", + "sql_explanation": "This query retrieves the \u0027title\u0027 and \u0027genre\u0027 columns from the \u0027movie\u0027 table where the \u0027country\u0027 column value is \u0027Germany\u0027." +}, { + "id": "5519", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average disinformation detection score for content published in the \u0027USA\u0027?", + "sql_context": "CREATE TABLE content (id INT, title VARCHAR(50), location VARCHAR(50), score INT); INSERT INTO content (id, title, location, score) VALUES (1, \u0027Article 1\u0027, \u0027USA\u0027, 75), (2, \u0027Article 2\u0027, \u0027Canada\u0027, 80), (3, \u0027News 1\u0027, \u0027USA\u0027, 85);", + "sql": "SELECT AVG(score) FROM content WHERE location \u003d \u0027USA\u0027;", + "sql_explanation": "We use the AVG function to find the average disinformation detection score for content published in the \u0027USA\u0027." +}, { + "id": "5527", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average budget of horror movies?", + "sql_context": "CREATE TABLE movies (id INT, title TEXT, budget INT, genre TEXT); INSERT INTO movies (id, title, budget, genre) VALUES (1, \u0027Movie1\u0027, 5000000, \u0027Action\u0027), (2, \u0027Movie2\u0027, 10000000, \u0027Horror\u0027), (3, \u0027Movie3\u0027, 7000000, \u0027Comedy\u0027), (4, \u0027Movie4\u0027, 3000000, \u0027Horror\u0027);", + "sql": "SELECT AVG(budget) FROM movies WHERE genre \u003d \u0027Horror\u0027;", + "sql_explanation": "This SQL query calculates the average budget of horror movies. It does this by selecting all rows where the genre is Horror, then calculating the average (AVG) of the budget column for those rows." +}, { + "id": "5537", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum duration of any news segment about political polarization?", + "sql_context": "CREATE TABLE political_polarization_news (id INT, title VARCHAR(255), duration INT); INSERT INTO political_polarization_news (id, title, duration) VALUES (1, \u0027News1\u0027, 30), (2, \u0027News2\u0027, 45), (3, \u0027News3\u0027, 60);", + "sql": "SELECT MAX(duration) FROM political_polarization_news;", + "sql_explanation": "The SQL query retrieves the maximum duration of any news segment about political polarization by applying the MAX function to the duration column." +}, { + "id": "5724", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all rows in the tweets table with a retweet count of 0.", + "sql_context": "CREATE TABLE tweets (id INT, user VARCHAR(255), tweet TEXT, retweet_count INT);", + "sql": "DELETE FROM tweets WHERE retweet_count \u003d 0;", + "sql_explanation": "This query deletes all rows in the tweets table where the retweet_count is 0." +}, { + "id": "37", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new artist with multiple albums and tracks", + "sql_context": "CREATE TABLE artists (id INT PRIMARY KEY, name VARCHAR(255), genre VARCHAR(255)); CREATE TABLE albums (id INT PRIMARY KEY, title VARCHAR(255), release_year INT, artist_id INT, FOREIGN KEY (artist_id) REFERENCES artists(id)); CREATE TABLE tracks (id INT PRIMARY KEY, title VARCHAR(255), duration FLOAT, album_id INT, FOREIGN KEY (album_id) REFERENCES albums(id));", + "sql": "INSERT INTO artists (id, name, genre) VALUES (1, \u0027Natasha Bedingfield\u0027, \u0027Pop\u0027); INSERT INTO albums (id, title, release_year, artist_id) VALUES (1, \u0027Unwritten\u0027, 2004, 1), (2, \u0027Pocketful of Sunshine\u0027, 2008, 1); INSERT INTO tracks (id, title, duration, album_id) VALUES (1, \u0027These Words\u0027, 3.21, 1), (2, \u0027Unwritten\u0027, 4.18, 1), (3, \u0027Soulmate\u0027, 3.29, 2), (4, \u0027Pocketful of Sunshine\u0027, 3.12, 2);", + "sql_explanation": "This query inserts a new artist, Natasha Bedingfield, with two albums and four tracks, using foreign keys to associate the tracks and albums with the artist." +}, { + "id": "1083", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add a new genre to the \u0027genres\u0027 table", + "sql_context": "CREATE TABLE genres (id INT PRIMARY KEY, name VARCHAR(255), description TEXT);", + "sql": "INSERT INTO genres (id, name, description) VALUES (1, \u0027Dream Pop\u0027, \u0027A genre that combines the atmospheric textures of dream pop and the rhythmic elements of electronic music.\u0027);", + "sql_explanation": "This query inserts a new genre into the \u0027genres\u0027 table. The \u0027id\u0027 is set to 1, \u0027name\u0027 to \u0027Dream Pop\u0027, and \u0027description\u0027 to \u0027A genre that combines the atmospheric textures of dream pop and the rhythmic elements of electronic music.\u0027." +}, { + "id": "2642", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the total number of songs and the average length in minutes for the jazz genre in 2020?", + "sql_context": "CREATE TABLE Songs (SongName TEXT, Genre TEXT, LengthMinutes INTEGER, Year INTEGER); INSERT INTO Songs (SongName, Genre, LengthMinutes, Year) VALUES (\u0027Song1\u0027, \u0027Jazz\u0027, 4, 2020), (\u0027Song2\u0027, \u0027Jazz\u0027, 5, 2020), (\u0027Song3\u0027, \u0027Jazz\u0027, 3, 2020);", + "sql": "SELECT Genre, COUNT(*) as NumOfSongs, AVG(LengthMinutes) as AvgLength FROM Songs WHERE Genre \u003d \u0027Jazz\u0027 AND Year \u003d 2020;", + "sql_explanation": "The SQL query calculates the total number of songs and the average length in minutes for the jazz genre in the Songs table that has 2020 as the year. It then groups the results by genre and year, and calculates the count of songs and the average length." +}, { + "id": "3204", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average rating for songs released in the last 60 days?", + "sql_context": "CREATE TABLE SongRatings (rating_id INT, rating_date DATE, song_id INT, user_id INT, rating DECIMAL(3,2)); INSERT INTO SongRatings (rating_id, rating_date, song_id, user_id, rating) VALUES (1, \u00272022-07-01\u0027, 1, 1, 4.5), (2, \u00272022-07-05\u0027, 2, 2, 3.5), (3, \u00272022-06-30\u0027, 3, 3, 5.0), (4, \u00272022-08-01\u0027, 4, 4, 4.0), (5, \u00272022-08-02\u0027, 5, 5, 4.5);", + "sql": "SELECT AVG(rating) as average_rating FROM SongRatings WHERE rating_date \u003e\u003d CURDATE() - INTERVAL 60 DAY;", + "sql_explanation": "This SQL query calculates the average rating for songs released in the last 60 days. It uses the WHERE clause to filter the SongRatings table to only include ratings for songs released in the last 60 days. The AVG() function is then used to calculate the average rating for these songs." +}, { + "id": "3254", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of streams for all songs by female artists on the music streaming platform in the USA?", + "sql_context": "CREATE TABLE music_platform (id INT, artist VARCHAR(100), gender VARCHAR(10), country VARCHAR(50), streams INT);", + "sql": "SELECT SUM(streams) as total_streams FROM music_platform WHERE gender \u003d \u0027female\u0027 AND country \u003d \u0027USA\u0027;", + "sql_explanation": "The SQL query calculates the sum of the streams column in the music_platform table for records that have a gender value of \u0027female\u0027 and a country value of \u0027USA\u0027 using the SUM aggregate function." +}, { + "id": "3435", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record for the album \u0027Thriller\u0027 with 30,000,000 sales in 1982-12-15", + "sql_context": "CREATE TABLE if not exists sales (sale_id serial PRIMARY KEY, sale_date date, title varchar(255), revenue decimal(10,2));", + "sql": "insert into sales (sale_date, title, revenue) values (\u00271982-12-15\u0027, \u0027Thriller\u0027, 30000000 * 0.01);", + "sql_explanation": "This query inserts a new record for the album \u0027Thriller\u0027 with 30,000,000 sales in December 15th, 1982. It does so using the insert statement, specifying the target table sales and the columns (sale_date, title, revenue) along with the corresponding values (\u00271982-12-15\u0027, \u0027Thriller\u0027, 30000000 * 0.01). The revenue is multiplied by 0.01 to convert the sales figure (30,000,000) to the desired revenue format in decimal (3,000,000.00)." +}, { + "id": "3954", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the artist name \u0027Eminem\u0027 to \u0027Marshall Mathers\u0027 in the MusicArtists table.", + "sql_context": "CREATE TABLE MusicArtists (artist_id INT, artist_name VARCHAR(50), genre VARCHAR(20));", + "sql": "UPDATE MusicArtists SET artist_name \u003d \u0027Marshall Mathers\u0027 WHERE artist_name \u003d \u0027Eminem\u0027;", + "sql_explanation": "This query updates the artist_name from \u0027Eminem\u0027 to \u0027Marshall Mathers\u0027 in the MusicArtists table by using the UPDATE statement and WHERE clause to filter the rows with the artist_name \u0027Eminem\u0027." +}, { + "id": "4057", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the genre of the song \u0027Vande Mataram\u0027 to \u0027Indian Classical Fusion\u0027", + "sql_context": "CREATE TABLE Artists (ArtistID INT PRIMARY KEY, ArtistName VARCHAR(100)); CREATE TABLE Songs (SongID INT PRIMARY KEY, SongName VARCHAR(100), ArtistID INT, Genre VARCHAR(50), ReleasedDate DATE); INSERT INTO Artists (ArtistID, ArtistName) VALUES (101, \u0027Selena Gomez\u0027), (102, \u0027Taylor Swift\u0027), (103, \u0027Kala Bhairava\u0027), (104, \u0027Billie Eilish\u0027), (105, \u0027Maria Fernandes\u0027); INSERT INTO Songs (SongID, SongName, ArtistID, Genre, ReleasedDate) VALUES (1, \u0027Bad Liar\u0027, 101, \u0027Pop\u0027, \u00272017-05-19\u0027), (2, \u0027Shake it Off\u0027, 102, \u0027Pop\u0027, \u00272014-08-18\u0027), (3, \u0027Vande Mataram\u0027, 103, \u0027Indian Classical\u0027, \u00272018-12-15\u0027), (4, \u0027Bad Guy\u0027, 104, \u0027Pop\u0027, \u00272019-03-29\u0027), (5, \u0027Tuyo\u0027, 105, \u0027Latin Pop\u0027, \u00272021-05-21\u0027);", + "sql": "UPDATE Songs SET Genre \u003d \u0027Indian Classical Fusion\u0027 WHERE SongName \u003d \u0027Vande Mataram\u0027;", + "sql_explanation": "This SQL query updates the Genre of the song \u0027Vande Mataram\u0027 from the Songs table to \u0027Indian Classical Fusion\u0027." +}, { + "id": "4391", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total revenue for the Hip-Hop genre in 2019?", + "sql_context": "CREATE TABLE music_sales (sale_id INT, genre VARCHAR(10), year INT, revenue FLOAT); INSERT INTO music_sales (sale_id, genre, year, revenue) VALUES (1, \u0027Pop\u0027, 2021, 50000.00), (2, \u0027Rock\u0027, 2021, 45000.00), (3, \u0027Pop\u0027, 2020, 40000.00), (4, \u0027Jazz\u0027, 2020, 30000.00), (5, \u0027Hip-Hop\u0027, 2019, 25000.00); CREATE VIEW genre_sales AS SELECT genre, SUM(revenue) as total_revenue FROM music_sales GROUP BY genre;", + "sql": "SELECT total_revenue FROM genre_sales WHERE genre \u003d \u0027Hip-Hop\u0027 AND year \u003d 2019;", + "sql_explanation": "The SQL query selects the total revenue from the genre_sales view for the \u0027Hip-Hop\u0027 genre in the year 2019." +}, { + "id": "4430", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record of a Pop song sold for $11.99 into MusicSales table.", + "sql_context": "CREATE TABLE MusicSales (SaleID INT, Genre VARCHAR(10), SalesAmount DECIMAL(10,2));", + "sql": "INSERT INTO MusicSales (SaleID, Genre, SalesAmount) VALUES (4, \u0027Pop\u0027, 11.99);", + "sql_explanation": "This query inserts a new record with SaleID 4, Genre \u0027Pop\u0027, and SalesAmount 11.99 into the MusicSales table." +}, { + "id": "4524", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of streams for jazz songs released before 2010?", + "sql_context": "CREATE TABLE songs (song_id INT, genre VARCHAR(20), release_year INT, streams INT); INSERT INTO songs (song_id, genre, release_year, streams) VALUES (1, \u0027jazz\u0027, 2000, 1100); INSERT INTO songs (song_id, genre, release_year, streams) VALUES (2, \u0027jazz\u0027, 2005, 1200); INSERT INTO songs (song_id, genre, release_year, streams) VALUES (3, \u0027jazz\u0027, 2009, 1300);", + "sql": "SELECT AVG(streams) FROM songs WHERE genre \u003d \u0027jazz\u0027 AND release_year \u003c 2010;", + "sql_explanation": "This SQL query calculates the average number of streams for jazz songs released before 2010. It uses the AVG function to find the average value of the streams column for rows where the genre is jazz and the release_year is less than 2010." +}, { + "id": "4597", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average length of songs in the jazz genre that were released before 1990?", + "sql_context": "CREATE TABLE songs (id INT, title VARCHAR(255), length FLOAT, genre VARCHAR(255), release_year INT); INSERT INTO songs (id, title, length, genre, release_year) VALUES (1, \u0027Song1\u0027, 200.5, \u0027Pop\u0027, 2011), (2, \u0027Song2\u0027, 180.3, \u0027Rock\u0027, 2008), (3, \u0027Song3\u0027, 220.0, \u0027Jazz\u0027, 1989), (4, \u0027Song4\u0027, 150.0, \u0027Jazz\u0027, 1920);", + "sql": "SELECT AVG(length) FROM songs WHERE genre \u003d \u0027Jazz\u0027 AND release_year \u003c 1990;", + "sql_explanation": "First, we filter the data to only include rows with a genre of \u0027Jazz\u0027 and a release year earlier than 1990. Then, we calculate the average length of these songs using the AVG function." +}, { + "id": "4851", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Remove album \u0027Timeless Classics\u0027 for artist with ID 2", + "sql_context": "CREATE TABLE Artists (ArtistID INT PRIMARY KEY AUTO_INCREMENT, Name VARCHAR(100));CREATE TABLE Albums (AlbumID INT PRIMARY KEY AUTO_INCREMENT, Title VARCHAR(100), ArtistID INT, FOREIGN KEY (ArtistID) REFERENCES Artists(ArtistID));", + "sql": "DELETE FROM Albums WHERE Title \u003d \u0027Timeless Classics\u0027 AND ArtistID \u003d 2;", + "sql_explanation": "1. Delete from the Albums table.2. Where the Title is \u0027Timeless Classics\u0027 and the ArtistID is 2." +}, { + "id": "5102", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new music platform with name \u0027NewStream\u0027 into the music_platforms table", + "sql_context": "CREATE TABLE music_platforms (id INT, platform_name VARCHAR(50));", + "sql": "INSERT INTO music_platforms (platform_name) VALUES (\u0027NewStream\u0027);", + "sql_explanation": "* Add a new row to the music_platforms table with the specified platform name.* Since an id is auto-generated, we only need to provide the platform name." +}, { + "id": "5244", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total duration of all videos in the rap genre on a video platform?", + "sql_context": "CREATE TABLE video_platforms (id INT, genre TEXT, duration FLOAT); INSERT INTO video_platforms (id, genre, duration) VALUES (1, \u0027Rap\u0027, 350.0), (2, \u0027Pop\u0027, 200.0), (3, \u0027Rap\u0027, 400.0);", + "sql": "SELECT SUM(duration) FROM video_platforms WHERE genre \u003d \u0027Rap\u0027;", + "sql_explanation": "Calculate the total duration of all videos in the \u0027Rap\u0027 genre on a video platform." +}, { + "id": "5289", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue generated by songs released in 2022?", + "sql_context": "CREATE TABLE songs (id INT, title TEXT, year INT, revenue FLOAT); INSERT INTO songs (id, title, year, revenue) VALUES (1, \u0027Song 4\u0027, 2022, 700.5), (2, \u0027Song 5\u0027, 2021, 800.2), (3, \u0027Song 6\u0027, 2022, 900.3);", + "sql": "SELECT SUM(songs.revenue) FROM songs WHERE songs.year \u003d 2022;", + "sql_explanation": "This query calculates the total revenue generated by songs that were released in the year 2022. It filters the songs table to only include rows where the year column is equal to 2022. Then, it calculates the sum of the revenue column in the resulting dataset. This will give the total revenue generated by songs released in 2022." +}, { + "id": "5304", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue generated by songs released in the year 2000?", + "sql_context": "CREATE TABLE songs (id INT, title TEXT, year INT, revenue FLOAT); INSERT INTO songs (id, title, year, revenue) VALUES (1, \u0027Song 1\u0027, 2000, 500.5), (2, \u0027Song 2\u0027, 1999, 400.2), (3, \u0027Song 3\u0027, 2001, 600.3);", + "sql": "SELECT SUM(songs.revenue) FROM songs WHERE songs.year \u003d 2000;", + "sql_explanation": "This query calculates the total revenue generated by songs that were released in the year 2000. It filters the songs table to only include rows where the year column is equal to 2000. Then, it calculates the sum of the revenue column in the resulting dataset. This will give the total revenue generated by songs released in 2000." +}, { + "id": "5465", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average song_length in the pop genre?", + "sql_context": "CREATE TABLE genres (genre VARCHAR(10), song_id INT, song_length FLOAT); INSERT INTO genres (genre, song_id, song_length) VALUES (\u0027pop\u0027, 34, 180.5), (\u0027pop\u0027, 35, 195.8), (\u0027pop\u0027, 36, 175.4);", + "sql": "SELECT AVG(song_length) FROM genres WHERE genre \u003d \u0027pop\u0027;", + "sql_explanation": "This query calculates the average song_length from the genres table for the pop genre. It does this by summing up all the song_length values for the pop genre and then dividing by the count of those rows." +}, { + "id": "5578", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average album price for albums released in the US?", + "sql_context": "CREATE TABLE Albums (AlbumID INT, Country VARCHAR(50), Price DECIMAL(10, 2)); INSERT INTO Albums VALUES (1, \u0027USA\u0027, 25), (2, \u0027Canada\u0027, 20), (3, \u0027USA\u0027, 30), (4, \u0027Mexico\u0027, 18), (5, \u0027USA\u0027, 22), (6, \u0027Canada\u0027, 25);", + "sql": "SELECT AVG(Price) FROM Albums WHERE Country \u003d \u0027USA\u0027;", + "sql_explanation": "Simply filter the Albums table by the Country column using the WHERE clause and then apply the AVG function to the Price column to calculate the average album price for albums released in the US." +}, { + "id": "5582", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many unique countries are represented in the music_streaming table?", + "sql_context": "CREATE TABLE music_streaming (id INT, user_id INT, song_id INT, country VARCHAR(255), timestamp TIMESTAMP);", + "sql": "SELECT COUNT(DISTINCT country) FROM music_streaming;", + "sql_explanation": "We use the COUNT function along with the DISTINCT keyword to count the number of unique countries represented in the music_streaming table." +}, { + "id": "5598", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many songs were streamed in 2021?", + "sql_context": "CREATE TABLE streams (id INT, year INT, streams INT); INSERT INTO streams (id, year, streams) VALUES (1, 2021, 1000000);", + "sql": "SELECT SUM(streams) FROM streams WHERE year \u003d 2021;", + "sql_explanation": "Filter the streams table for 2021 and calculate the total streams." +}, { + "id": "5825", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "What is the number of new subscribers per day, by country, for the last 60 days?", + "sql_context": "CREATE TABLE subscribers (subscriber_id INT, country VARCHAR(255), subscribe_date DATE); CREATE VIEW daily_subscribers AS SELECT country, DATE_TRUNC(\u0027day\u0027, subscribe_date) as date, COUNT(DISTINCT subscriber_id) as new_subscribers FROM subscribers WHERE subscribe_date \u003e\u003d DATEADD(day, -60, CURRENT_DATE) GROUP BY country, date;", + "sql": "SELECT * FROM daily_subscribers;", + "sql_explanation": "The query calculates the number of new subscribers per day, by country, using the daily_subscribers view and retrieves the data." +}, { + "id": "1640", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the average and maximum altitudes of satellites in geostationary orbit?", + "sql_context": "CREATE TABLE satellites (id INT, name VARCHAR(50), country VARCHAR(50), launch_date DATE, altitude FLOAT, orbit VARCHAR(50));", + "sql": "SELECT AVG(satellites.altitude) as average_altitude, MAX(satellites.altitude) as max_altitude FROM satellites WHERE satellites.orbit \u003d \u0027geostationary\u0027;", + "sql_explanation": "This query calculates the average and maximum altitudes of satellites in geostationary orbit. It does so by filtering the rows based on the orbit column, using the AVG and MAX functions on the altitude column." +}, { + "id": "1734", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add a row to the space_missions table", + "sql_context": "CREATE TABLE space_missions (mission_name VARCHAR(100) PRIMARY KEY, start_date DATE, end_date DATE, mission_type VARCHAR(50));", + "sql": "INSERT INTO space_missions (mission_name, start_date, end_date, mission_type) VALUES (\u0027Artemis I\u0027, \u00272022-08-29\u0027, \u00272022-12-11\u0027, \u0027Human Exploration\u0027);", + "sql_explanation": "1. This statement adds a new row to the \u0027space_missions\u0027 table. 2. The row contains the following values: \u0027Artemis I\u0027 for \u0027mission_name\u0027, \u00272022-08-29\u0027 for \u0027start_date\u0027, \u00272022-12-11\u0027 for \u0027end_date\u0027, and \u0027Human Exploration\u0027 for \u0027mission_type\u0027." +}, { + "id": "2045", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many lunar missions were successfully conducted by China?", + "sql_context": "CREATE TABLE china_lunar_missions (mission_id INT, name VARCHAR(100), launch_date DATE, result VARCHAR(10));", + "sql": "SELECT COUNT(*) FROM china_lunar_missions WHERE result \u003d \u0027Success\u0027 AND EXTRACT(YEAR FROM launch_date) \u003c EXTRACT(YEAR FROM CURRENT_DATE);", + "sql_explanation": "Count the total number of successful lunar missions conducted by China by filtering the \u0027result\u0027 column with the value \u0027Success\u0027 and considering only the missions launched before the current year." +}, { + "id": "2301", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which exoplanets have a similar size to Earth?", + "sql_context": "CREATE TABLE Exoplanets (id INT, name VARCHAR(255), discovery_year INT, size FLOAT, distance_light_years FLOAT); INSERT INTO Exoplanets (id, name, discovery_year, size, distance_light_years) VALUES (1, \u0027Kepler-22b\u0027, 2011, 2.4, 622), (2, \u0027Kepler-186f\u0027, 2014, 1.2, 500), (3, \u0027Proxima Centauri b\u0027, 2016, 1.3, 4.2);", + "sql": "SELECT name, size, ABS(size - 1.0) as size_difference FROM Exoplanets HAVING size_difference \u003c\u003d 0.2 ORDER BY size_difference ASC;", + "sql_explanation": "This query calculates the difference between an exoplanet\u0027s size and Earth\u0027s size, and filters for those with a difference of less than 0.2. The result is ordered by the size difference in ascending order." +}, { + "id": "2780", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum apogee altitude of navigation satellites in MEO?", + "sql_context": "CREATE TABLE navigation_satellites (id INT, name VARCHAR(50), type VARCHAR(50), orbit VARCHAR(50), apogee_altitude FLOAT, launch_date DATE); INSERT INTO navigation_satellites (id, name, type, orbit, apogee_altitude, launch_date) VALUES (1, \u0027Galileo FOC FM1\u0027, \u0027Navigation\u0027, \u0027Medium Earth Orbit\u0027, 23222.0, \u00272011-10-12\u0027); INSERT INTO navigation_satellites (id, name, type, orbit, apogee_altitude, launch_date) VALUES (2, \u0027Beidou-3 M1\u0027, \u0027Navigation\u0027, \u0027Medium Earth Orbit\u0027, 21528.0, \u00272018-11-05\u0027);", + "sql": "SELECT MIN(apogee_altitude) FROM navigation_satellites WHERE type \u003d \u0027Navigation\u0027 AND orbit \u003d \u0027Medium Earth Orbit\u0027;", + "sql_explanation": "Calculate the minimum apogee altitude of navigation satellites in MEO by averaging the apogee_altitude values of all records with the specified type and orbit." +}, { + "id": "2985", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the latest successful launches and their payload mass?", + "sql_context": "CREATE TABLE launches(launch_id INT, launch_date DATE, launch_success BOOLEAN, payload_mass FLOAT); INSERT INTO launches VALUES (1, \u00272000-01-01\u0027, true, 500.5); INSERT INTO launches VALUES (2, \u00272001-01-01\u0027, false, 400.3); INSERT INTO launches VALUES (3, \u00272002-01-01\u0027, true, 300.2);", + "sql": "SELECT launch_date, payload_mass FROM launches WHERE launch_success \u003d true ORDER BY launch_date DESC LIMIT 1;", + "sql_explanation": "This SQL query retrieves the latest successful launch and its payload mass by filtering the records based on the launch_success column and ordering the results by launch_date in descending order. It then limits the results to 1 record using the LIMIT clause." +}, { + "id": "3059", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the oldest satellite still in orbit?", + "sql_context": "CREATE TABLE satellites (satellite_id INT, name VARCHAR(255), launch_country VARCHAR(255), launch_date DATE, orbit_status VARCHAR(255));", + "sql": "SELECT name, launch_date FROM satellites WHERE orbit_status \u003d \u0027in orbit\u0027 ORDER BY launch_date ASC LIMIT 1;", + "sql_explanation": "This query filters the satellites by those that are still in orbit and orders them by launch date in ascending order. It then returns the top 1 row, which corresponds to the oldest satellite still in orbit." +}, { + "id": "3257", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names and weights of the five heaviest satellites?", + "sql_context": "CREATE TABLE heavy_satellites (satellite_name TEXT, satellite_weight REAL); INSERT INTO heavy_satellites (satellite_name, satellite_weight) VALUES (\u0027Envisat\u0027, 8212), (\u0027GOES 16\u0027, 6595), (\u0027Metop-A\u0027, 4680), (\u0027Metop-B\u0027, 4680), (\u0027Metop-C\u0027, 4680);", + "sql": "SELECT satellite_name, satellite_weight FROM heavy_satellites ORDER BY satellite_weight DESC LIMIT 5;", + "sql_explanation": "Select the satellite_name and satellite_weight columns from the heavy_satellites table, order by satellite_weight in descending order, and limit the results to 5 rows." +}, { + "id": "3564", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many satellites have been launched by China since 2010?", + "sql_context": "CREATE TABLE SatelliteLaunches ( id INT, name VARCHAR(255), launch_country VARCHAR(255), launch_year INT, satellites INT);", + "sql": "SELECT COUNT(*) FROM SatelliteLaunches WHERE launch_country \u003d \u0027China\u0027 AND launch_year \u003e\u003d 2010;", + "sql_explanation": "This query counts the number of satellites launched by China since 2010 by using the COUNT function and a WHERE clause." +}, { + "id": "3591", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of active astronauts who have been on the ISS.", + "sql_context": "CREATE TABLE astronauts (id INT, name VARCHAR(50), status VARCHAR(50), missions VARCHAR(50)); INSERT INTO astronauts (id, name, status, missions) VALUES (1, \u0027Neil Armstrong\u0027, \u0027deceased\u0027, \u0027Apollo 11\u0027); INSERT INTO astronauts (id, name, status, missions) VALUES (2, \u0027Scott Kelly\u0027, \u0027active\u0027, \u0027ISS, STS-103\u0027);", + "sql": "SELECT COUNT(*) FROM astronauts WHERE status \u003d \u0027active\u0027 AND FIND_IN_SET(\u0027ISS\u0027, missions) \u003e 0;", + "sql_explanation": "The SQL query finds the number of active astronauts who have been on the ISS by filtering the astronauts table to include only astronauts with an \u0027active\u0027 status, and then using the FIND_IN_SET function to identify those who have been on the ISS and counting them with the COUNT function." +}, { + "id": "3786", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of people that the SpaceX Starship can carry?", + "sql_context": "CREATE TABLE Spacecraft_Capacities (Spacecraft_ID INT, Spacecraft_Name VARCHAR(100), Max_Capacity INT); INSERT INTO Spacecraft_Capacities (Spacecraft_ID, Spacecraft_Name, Max_Capacity) VALUES (1, \u0027SpaceX Starship\u0027, 100);", + "sql": "SELECT Max_Capacity FROM Spacecraft_Capacities WHERE Spacecraft_Name \u003d \u0027SpaceX Starship\u0027;", + "sql_explanation": "This query finds the maximum capacity of the SpaceX Starship by looking up the \u0027Max_Capacity\u0027 value in the \u0027Spacecraft_Capacities\u0027 table where the \u0027Spacecraft_Name\u0027 is \u0027SpaceX Starship\u0027." +}, { + "id": "4054", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total mass (in kg) of all space debris larger than 10 cm in size, removed from orbit since 2000?", + "sql_context": "CREATE TABLE removed_space_debris (id INT, debris_id VARCHAR(50), mass FLOAT, size FLOAT, removal_year INT);", + "sql": "SELECT SUM(mass) FROM removed_space_debris WHERE size \u003e 10 AND removal_year \u003e\u003d 2000;", + "sql_explanation": "The SQL query calculates the total mass of all space debris larger than 10 cm that has been removed from orbit since 2000 by summing the mass values where the size is greater than 10 and the removal_year is greater than or equal to 2000." +}, { + "id": "4065", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many astronauts from India have participated in space missions?", + "sql_context": "CREATE TABLE astronauts (id INT, name VARCHAR(255), spacecraft_id INT, role VARCHAR(255), country VARCHAR(255)); INSERT INTO astronauts VALUES (4, \u0027Rakesh Sharma\u0027, 1, \u0027Commander\u0027, \u0027India\u0027);", + "sql": "SELECT COUNT(id) as indian_astronauts_count FROM astronauts WHERE country \u003d \u0027India\u0027;", + "sql_explanation": "This SQL query filters the astronauts table by country \u0027India\u0027 and counts the number of rows." +}, { + "id": "4097", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the latest launch date for Indian space missions?", + "sql_context": "CREATE TABLE launches (id INT, mission VARCHAR(50), company VARCHAR(50), launch_date DATE, result VARCHAR(10)); INSERT INTO launches VALUES (1, \u0027PSLV-C51\u0027, \u0027ISRO\u0027, \u00272021-02-28\u0027, \u0027success\u0027);", + "sql": "SELECT MAX(launch_date) as latest_launch_date FROM launches WHERE company \u003d \u0027ISRO\u0027;", + "sql_explanation": "The query calculates the latest launch date for Indian space missions (ISRO) using the MAX function." +}, { + "id": "4283", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 3 countries with the most satellites in orbit?", + "sql_context": "CREATE TABLE countries (id INTEGER, name TEXT, num_satellites INTEGER); INSERT INTO countries (id, name, num_satellites) VALUES (1, \u0027USA\u0027, 1500), (2, \u0027Russia\u0027, 1200), (3, \u0027China\u0027, 800), (4, \u0027India\u0027, 300), (5, \u0027Japan\u0027, 250), (6, \u0027Germany\u0027, 150), (7, \u0027France\u0027, 120), (8, \u0027Italy\u0027, 100);", + "sql": "SELECT name, num_satellites FROM countries ORDER BY num_satellites DESC LIMIT 3;", + "sql_explanation": "This query retrieves the name and number of satellites of the top 3 countries with the most satellites in orbit by ordering the countries table by num_satellites in descending order and limiting the results to 3 rows." +}, { + "id": "4471", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many asteroids have been discovered by observatories in the USA and Europe?", + "sql_context": "CREATE TABLE asteroids (id INT, discovery_date DATE, discoverer_country VARCHAR(255));", + "sql": "SELECT COUNT(*) FROM asteroids WHERE discoverer_country IN (\u0027USA\u0027, \u0027Europe\u0027);", + "sql_explanation": "This query calculates the count of all records in the asteroids table where the discoverer_country is either \u0027USA\u0027 or \u0027Europe\u0027." +}, { + "id": "4500", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which countries participated in Mars missions in the 2010s?", + "sql_context": "CREATE TABLE Mars_Missions (Mission_ID INT, Mission_Name VARCHAR(50), Country VARCHAR(50), Year INT, PRIMARY KEY (Mission_ID)); INSERT INTO Mars_Missions (Mission_ID, Mission_Name, Country, Year) VALUES (1, \u0027Phoenix\u0027, \u0027United States\u0027, 2007), (2, \u0027Curiosity\u0027, \u0027United States\u0027, 2012), (3, \u0027ExoMars Trace Gas Orbiter\u0027, \u0027Russia\u0027, 2016);", + "sql": "SELECT DISTINCT Country FROM Mars_Missions WHERE Year BETWEEN 2010 AND 2019;", + "sql_explanation": "The SQL query identifies the unique countries involved in Mars missions between 2010 and 2019." +}, { + "id": "4662", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which space telescope has the largest primary mirror?", + "sql_context": "CREATE TABLE space_telescopes (name TEXT, primary_mirror_m INTEGER); INSERT INTO space_telescopes (name, primary_mirror_m) VALUES (\u0027Hubble Space Telescope\u0027, 2400), (\u0027James Webb Space Telescope\u0027, 6500), (\u0027Spitzer Space Telescope\u0027, 850);", + "sql": "SELECT name FROM space_telescopes ORDER BY primary_mirror_m DESC LIMIT 1;", + "sql_explanation": "This query lists the space telescope with the largest primary mirror by selecting the name column from the space_telescopes table and ordering the results in descending order based on the primary_mirror_m column. The query then limits the results to only the top row." +}, { + "id": "4694", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the average mass of space debris in the space_debris table for the debris items from the USA", + "sql_context": "CREATE TABLE space_debris (id INT, name VARCHAR(50), type VARCHAR(50), country VARCHAR(50), launch_date DATE, mass FLOAT);", + "sql": "SELECT AVG(mass) as average_mass FROM space_debris WHERE country \u003d \u0027USA\u0027;", + "sql_explanation": "This query calculates the average mass of space debris items that are from the United States." +}, { + "id": "4717", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the launch country of the most recent satellite?", + "sql_context": "CREATE TABLE satellites (satellite_id INT, name VARCHAR(255), launch_country VARCHAR(255), launch_date DATE);", + "sql": "SELECT launch_country FROM satellites ORDER BY launch_date DESC LIMIT 1;", + "sql_explanation": "This query orders the satellites by launch date in descending order and selects the top 1 row, which corresponds to the most recent satellite launch. It then returns the launch country of that satellite." +}, { + "id": "4743", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of astronauts per space mission in the SpaceMissions table?", + "sql_context": "CREATE TABLE SpaceMissions (id INT, mission VARCHAR(50), year INT, astronauts INT); INSERT INTO SpaceMissions (id, mission, year, astronauts) VALUES (1, \u0027Apollo 11\u0027, 1969, 3), (2, \u0027Apollo 13\u0027, 1970, 3), (3, \u0027STS-1\u0027, 1981, 5);", + "sql": "SELECT AVG(astronauts) AS avg_astronauts_per_mission FROM SpaceMissions;", + "sql_explanation": "This SQL query calculates the average number of astronauts per space mission in the SpaceMissions table. It uses the AVG() function to calculate the average number of astronauts." +}, { + "id": "4761", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which organizations have successfully launched satellites and when were they launched?", + "sql_context": "CREATE TABLE satellites (id INT, name VARCHAR(255), launch_date DATE, organization VARCHAR(255), PRIMARY KEY(id)); INSERT INTO satellites (id, name, launch_date, organization) VALUES (1, \u0027Satellite1\u0027, \u00272010-05-12\u0027, \u0027Organization1\u0027), (2, \u0027Satellite2\u0027, \u00272015-09-18\u0027, \u0027Organization2\u0027), (3, \u0027Satellite3\u0027, \u00272020-01-03\u0027, \u0027Organization1\u0027);", + "sql": "SELECT satellites.organization, satellites.launch_date FROM satellites;", + "sql_explanation": "This query lists all organizations that have successfully launched satellites and when they were launched by simply selecting all rows from the satellites table." +}, { + "id": "4766", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average distance (in kilometers) that space debris travels from its initial orbit?", + "sql_context": "CREATE TABLE space_debris (id INT, initial_orbit VARCHAR(255), current_orbit VARCHAR(255), distance FLOAT);", + "sql": "SELECT AVG(distance) FROM space_debris WHERE initial_orbit IS NOT NULL;", + "sql_explanation": "This SQL query calculates the average distance that space debris travels from its initial orbit. It first selects the distance column from the space_debris table, and then uses the AVG aggregation function to calculate the average value. The WHERE clause is used to filter out any rows where the initial_orbit is null." +}, { + "id": "4792", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum distance traveled by the Voyager 1 space probe?", + "sql_context": "CREATE TABLE SpaceProbes (id INT, name VARCHAR(50), launch_date DATE, current_distance INT); INSERT INTO SpaceProbes (id, name, launch_date, current_distance) VALUES (1, \u0027Voyager 1\u0027, \u00271977-09-05\u0027, 145000000000);", + "sql": "SELECT MAX(current_distance) FROM SpaceProbes WHERE name \u003d \u0027Voyager 1\u0027;", + "sql_explanation": "We find the maximum distance traveled by the Voyager 1 space probe by selecting the maximum current_distance from the SpaceProbes table where the name is Voyager 1." +}, { + "id": "4877", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the most common type of space debris?", + "sql_context": "CREATE TABLE space_debris (id INT, debris_type TEXT, frequency INT); INSERT INTO space_debris (id, debris_type, frequency) VALUES (1, \u0027Spent rocket stages\u0027, 2500), (2, \u0027Defunct satellites\u0027, 2000), (3, \u0027Fuel tanks\u0027, 500), (4, \u0027Nuts and bolts\u0027, 1000), (5, \u0027 fragments from disintegration and collisions\u0027, 5000);", + "sql": "SELECT debris_type FROM space_debris ORDER BY frequency DESC LIMIT 1;", + "sql_explanation": "This query retrieves the most common type of space debris. It selects the debris_type from the space_debris table and orders the result set in descending order based on the frequency column. The query then limits the result set to a single row using the LIMIT clause." +}, { + "id": "4993", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many space missions have been launched by India?", + "sql_context": "CREATE TABLE space_missions (id INT, mission_name VARCHAR(255), launch_country VARCHAR(255), launch_date DATE); INSERT INTO space_missions (id, mission_name, launch_country, launch_date) VALUES (1, \u0027Chandrayaan-1\u0027, \u0027India\u0027, \u00272008-10-22\u0027); INSERT INTO space_missions (id, mission_name, launch_country, launch_date) VALUES (2, \u0027Mangalyaan\u0027, \u0027India\u0027, \u00272013-11-05\u0027); INSERT INTO space_missions (id, mission_name, launch_country, launch_date) VALUES (3, \u0027Astrosat\u0027, \u0027India\u0027, \u00272015-09-28\u0027);", + "sql": "SELECT COUNT(*) FROM space_missions WHERE launch_country \u003d \u0027India\u0027;", + "sql_explanation": "This SQL query counts the number of space missions launched by India by counting the number of rows in the space_missions table where the launch_country field is set to \u0027India\u0027." +}, { + "id": "5022", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum number of operational satellites required for satellite-based augmentation systems?", + "sql_context": "CREATE TABLE satellites (id INT, name VARCHAR(255), type VARCHAR(255), operational BOOLEAN); CREATE VIEW sbas_satellites AS SELECT * FROM satellites WHERE type IN (\u0027WAAS\u0027, \u0027EGNOS\u0027, \u0027MSAS\u0027, \u0027GAGAN\u0027);", + "sql": "SELECT MIN(COUNT(*)) FROM sbas_satellites WHERE operational \u003d TRUE;", + "sql_explanation": "This SQL query finds the minimum number of operational satellites required for satellite-based augmentation systems. It first joins the sbas_satellites view with the satellites table on the name column. Then, it uses the COUNT aggregation function to count the number of rows in the resulting table, and the MIN aggregation function to find the minimum value. The WHERE clause is used to filter out any rows where the operational column is false." +}, { + "id": "5034", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all astrobiology records with a discovery_date before 2005-01-01 from the astrobiology_data table", + "sql_context": "CREATE TABLE astrobiology_data (record_id INT, name VARCHAR(255), discovery_date DATE); INSERT INTO astrobiology_data (record_id, name, discovery_date) VALUES (1, \u0027Discovery 1\u0027, \u00272000-12-25\u0027), (2, \u0027Discovery 2\u0027, \u00272007-06-18\u0027), (3, \u0027Discovery 3\u0027, \u00272003-11-05\u0027);", + "sql": "DELETE FROM astrobiology_data WHERE discovery_date \u003c \u00272005-01-01\u0027;", + "sql_explanation": "This query deletes all records from the astrobiology_data table where the discovery_date is before 2005-01-01." +}, { + "id": "5044", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average cost of Mars missions launched by NASA?", + "sql_context": "CREATE TABLE nasa_mars_missions (id INT, name VARCHAR(50), cost INT); INSERT INTO nasa_mars_missions (id, name, cost) VALUES (1, \u0027Mars Rover 2001\u0027, 2500000), (2, \u0027Mars Rover 2010\u0027, 3000000), (3, \u0027Mars Orbiter 2020\u0027, 2000000000);", + "sql": "SELECT AVG(cost) FROM nasa_mars_missions WHERE name LIKE \u0027%Mars%\u0027;", + "sql_explanation": "The SQL query uses the AVG function to calculate the average cost of Mars missions. The WHERE clause filters the data based on the name of the missions. The LIKE keyword is used to filter missions that contain \u0027Mars\u0027. The result is the average cost of Mars missions launched by NASA." +}, { + "id": "5066", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of space missions by ESA?", + "sql_context": "CREATE TABLE space_missions (agency TEXT, num_missions INTEGER); INSERT INTO space_missions (agency, num_missions) VALUES (\u0027NASA\u0027, 135), (\u0027ESA\u0027, 45), (\u0027Russia\u0027, 126);", + "sql": "SELECT SUM(num_missions) FROM space_missions WHERE agency \u003d \u0027ESA\u0027;", + "sql_explanation": "This query calculates the total number of space missions by ESA by selecting the sum of the num_missions column from the space_missions table where the agency is \u0027ESA\u0027." +}, { + "id": "5267", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average cost of Mars missions?", + "sql_context": "CREATE TABLE mars_missions (id INT, name VARCHAR(50), cost INT); INSERT INTO mars_missions (id, name, cost) VALUES (1, \u0027Mars Rover 2001\u0027, 2500000), (2, \u0027Mars Rover 2010\u0027, 3000000), (3, \u0027Mars Orbiter 2020\u0027, 2000000000);", + "sql": "SELECT AVG(cost) FROM mars_missions WHERE name LIKE \u0027%Mars%\u0027;", + "sql_explanation": "The SQL query uses the LIKE keyword to filter the data based on the name of the missions. It then calculates the average cost of the missions using the AVG function." +}, { + "id": "5547", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total mass of all space debris larger than 1 cm in size?", + "sql_context": "CREATE TABLE SpaceDebris (id INT, diameter FLOAT, mass FLOAT); INSERT INTO SpaceDebris (id, diameter, mass) VALUES (1, 1.5, 2.3);", + "sql": "SELECT SUM(mass) FROM SpaceDebris WHERE diameter \u003e 1;", + "sql_explanation": "This query calculates the total mass of all space debris larger than 1 cm in size by selecting the SUM function on the mass column, while filtering for debris with a diameter greater than 1 cm using the WHERE clause." +}, { + "id": "5573", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of space missions by NASA?", + "sql_context": "CREATE TABLE missions(name TEXT, agency TEXT, launch_date TEXT); INSERT INTO missions(name, agency, launch_date) VALUES(\u0027Apollo 11\u0027, \u0027NASA\u0027, \u00271969-07-16\u0027), (\u0027Apollo 13\u0027, \u0027NASA\u0027, \u00271970-04-11\u0027);", + "sql": "SELECT COUNT(*) FROM missions WHERE agency \u003d \u0027NASA\u0027;", + "sql_explanation": "This SQL query calculates the total number of space missions by NASA by selecting all rows from the missions table where the agency is \u0027NASA\u0027 and then counting the number of rows using the COUNT function." +}, { + "id": "5597", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all satellites operated by the European Space Agency.", + "sql_context": "CREATE TABLE satellites (id INTEGER, name TEXT, operator TEXT); INSERT INTO satellites (id, name, operator) VALUES (1, \u0027Hubble Space Telescope\u0027, \u0027NASA\u0027), (2, \u0027Galileo IOV-1\u0027, \u0027ESA\u0027), (3, \u0027Galileo IOV-2\u0027, \u0027ESA\u0027), (4, \u0027Galileo IOV-3\u0027, \u0027ESA\u0027), (5, \u0027Galileo IOV-4\u0027, \u0027ESA\u0027);", + "sql": "SELECT name FROM satellites WHERE operator \u003d \u0027ESA\u0027;", + "sql_explanation": "This query retrieves the names of all satellites from the satellites table where the operator is the European Space Agency (ESA)." +}, { + "id": "5600", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum altitude reached by a human in space?", + "sql_context": "CREATE TABLE human_spaceflight (name TEXT, max_altitude_km INTEGER); INSERT INTO human_spaceflight (name, max_altitude_km) VALUES (\u0027Gagarin\u0027, 327), (\u0027Nechaev\u0027, 330), (\u0027Shepard\u0027, 187), (\u0027Glenn\u0027, 282);", + "sql": "SELECT MAX(max_altitude_km) FROM human_spaceflight;", + "sql_explanation": "This query finds the maximum altitude reached by a human in space by selecting the maximum value of the max_altitude_km column from the human_spaceflight table." +}, { + "id": "5612", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total mass of the Juno spacecraft in kg?", + "sql_context": "CREATE TABLE spacecraft (name TEXT, mass_kg INTEGER); INSERT INTO spacecraft (name, mass_kg) VALUES (\u0027Juno\u0027, 3625), (\u0027Voyager 1\u0027, 722), (\u0027Cassini\u0027, 5600);", + "sql": "SELECT mass_kg FROM spacecraft WHERE name \u003d \u0027Juno\u0027;", + "sql_explanation": "This query retrieves the total mass of the Juno spacecraft in kilograms by selecting the mass_kg column from the spacecraft table where the name is \u0027Juno\u0027." +}, { + "id": "5629", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of active astronauts on the ISS at any given time?", + "sql_context": "CREATE TABLE active_iss_astronauts (astronaut_id INT, name VARCHAR(100), start_date DATE, end_date DATE, max_active INT);", + "sql": "SELECT MAX(max_active) FROM active_iss_astronauts;", + "sql_explanation": "Determine the maximum number of active astronauts on the ISS at any given time by finding the maximum value of the \u0027max_active\u0027 column in the \u0027active_iss_astronauts\u0027 table." +}, { + "id": "5690", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many spacecraft were launched by each space agency?", + "sql_context": "CREATE TABLE space_agencies_2 (agency VARCHAR(50), country VARCHAR(50), launches INTEGER); INSERT INTO space_agencies_2 (agency, country, launches) VALUES (\u0027NASA\u0027, \u0027USA\u0027, 228), (\u0027Soviet Union\u0027, \u0027Russia\u0027, 182), (\u0027ESA\u0027, \u0027Europe\u0027, 105), (\u0027ISRO\u0027, \u0027India\u0027, 105), (\u0027CNSA\u0027, \u0027China\u0027, 64);", + "sql": "SELECT agency, launches FROM space_agencies_2;", + "sql_explanation": "This query retrieves the space agencies and their corresponding launch counts by selecting the \u0027agency\u0027 and \u0027launches\u0027 columns from the \u0027space_agencies_2\u0027 table." +}, { + "id": "5695", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum altitude reached by any space mission, according to the Space_Missions table?", + "sql_context": "CREATE TABLE Space_Missions (ID INT, Mission_Name VARCHAR(255), Max_Altitude INT); INSERT INTO Space_Missions (ID, Mission_Name, Max_Altitude) VALUES (1, \u0027Apollo 11\u0027, 363300);", + "sql": "SELECT MAX(Max_Altitude) FROM Space_Missions;", + "sql_explanation": "The SQL query selects the maximum value of the Max_Altitude column from the Space_Missions table." +}, { + "id": "5719", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the earliest launch date of any space mission?", + "sql_context": "CREATE TABLE space_missions (id INT PRIMARY KEY, mission_name VARCHAR(255), country VARCHAR(255), launch_date DATE, mission_type VARCHAR(255));", + "sql": "SELECT MIN(launch_date) FROM space_missions;", + "sql_explanation": "This query retrieves the earliest launch date of any space mission by using the MIN function on the \u0027launch_date\u0027 column." +}, { + "id": "5754", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average altitude of satellites in the \u0027satellite_info\u0027 table?", + "sql_context": "CREATE TABLE satellite_info (id INT PRIMARY KEY, satellite_name VARCHAR(255), country VARCHAR(255), launch_date DATE, altitude INT);", + "sql": "SELECT AVG(altitude) FROM satellite_info;", + "sql_explanation": "This query calculates the average altitude of satellites in the \u0027satellite_info\u0027 table by using the AVG function on the \u0027altitude\u0027 column." +}, { + "id": "5785", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the earliest date a satellite was launched by any country?", + "sql_context": "CREATE TABLE launches (id INT, satellite_name VARCHAR(255), launch_country VARCHAR(255), launch_date DATE); INSERT INTO launches (id, satellite_name, launch_country, launch_date) VALUES (1, \u0027Sputnik 1\u0027, \u0027Russia\u0027, \u00271957-10-04\u0027); INSERT INTO launches (id, satellite_name, launch_country, launch_date) VALUES (2, \u0027Explorer 1\u0027, \u0027USA\u0027, \u00271958-01-31\u0027); INSERT INTO launches (id, satellite_name, launch_country, launch_date) VALUES (3, \u0027Beidou 1\u0027, \u0027China\u0027, \u00272000-10-31\u0027);", + "sql": "SELECT MIN(launch_date) FROM launches;", + "sql_explanation": "This SQL query retrieves the earliest launch date by finding the minimum launch_date value in the launches table." +}, { + "id": "5824", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the mass of the largest satellite in the \u0027Satellites\u0027 table?", + "sql_context": "CREATE TABLE Satellites (Satellite_ID INT, Name VARCHAR(100), Mass FLOAT); INSERT INTO Satellites (Satellite_ID, Name, Mass) VALUES (1, \u0027International Space Station\u0027, 419000.0);", + "sql": "SELECT MAX(Mass) FROM Satellites;", + "sql_explanation": "This query finds the mass of the largest satellite by finding the maximum \u0027Mass\u0027 value in the \u0027Satellites\u0027 table." +}, { + "id": "2219", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total installed capacity of solar and wind energy in Japan, India, and Thailand?", + "sql_context": "CREATE TABLE energy_capacity (country VARCHAR(255), energy_source VARCHAR(255), capacity FLOAT); INSERT INTO energy_capacity (country, energy_source, capacity) VALUES (\u0027Japan\u0027, \u0027Solar\u0027, 50000), (\u0027Japan\u0027, \u0027Wind\u0027, 20000), (\u0027India\u0027, \u0027Solar\u0027, 80000), (\u0027India\u0027, \u0027Wind\u0027, 35000), (\u0027Thailand\u0027, \u0027Solar\u0027, 30000), (\u0027Thailand\u0027, \u0027Wind\u0027, 15000);", + "sql": "SELECT SUM(capacity) FROM energy_capacity WHERE (country IN (\u0027Japan\u0027, \u0027India\u0027, \u0027Thailand\u0027) AND energy_source IN (\u0027Solar\u0027, \u0027Wind\u0027));", + "sql_explanation": "This query calculates the total installed capacity of solar and wind energy in Japan, India, and Thailand by summing up the capacities for each country and energy source using IN and WHERE clause." +}, { + "id": "2456", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the difference in carbon prices between the European Union Emissions Trading System and the California Cap-and-Trade Program?", + "sql_context": "CREATE TABLE eu_ets (year INTEGER, price DECIMAL); INSERT INTO eu_ets (year, price) VALUES (2016, 5.84); INSERT INTO eu_ets (year, price) VALUES (2017, 7.14); CREATE TABLE california_cap (year INTEGER, price DECIMAL); INSERT INTO california_cap (year, price) VALUES (2016, 13.57); INSERT INTO california_cap (year, price) VALUES (2017, 15.04);", + "sql": "SELECT eu_ets.year, eu_ets.price - california_cap.price FROM eu_ets, california_cap WHERE eu_ets.year \u003d california_cap.year;", + "sql_explanation": "This query joins the eu_ets and california_cap tables on the year column and calculates the difference between the carbon prices for each year." +}, { + "id": "2730", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total energy consumption of residential buildings in Tokyo in 2020?", + "sql_context": "CREATE TABLE energy_consumption (id INT, sector TEXT, location TEXT, year INT, consumption FLOAT); INSERT INTO energy_consumption (id, sector, location, year, consumption) VALUES (1, \u0027residential\u0027, \u0027Tokyo\u0027, 2020, 5000.0), (2, \u0027commercial\u0027, \u0027Tokyo\u0027, 2020, 7000.0);", + "sql": "SELECT SUM(consumption) FROM energy_consumption WHERE sector \u003d \u0027residential\u0027 AND location \u003d \u0027Tokyo\u0027 AND year \u003d 2020;", + "sql_explanation": "This query calculates the total energy consumption of residential buildings in Tokyo in 2020 by summing up the consumption values in the energy_consumption table where the sector is residential, the location is Tokyo, and the year is 2020." +}, { + "id": "2734", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum carbon price (USD/ton) in the California Cap-and-Trade Program since its inception?", + "sql_context": "CREATE TABLE carbon_prices_ca (id INT, market TEXT, state TEXT, price FLOAT, year INT); INSERT INTO carbon_prices_ca (id, market, state, price, year) VALUES (1, \u0027California Cap-and-Trade Program\u0027, \u0027California\u0027, 13.57, 2013);", + "sql": "SELECT MIN(price) FROM carbon_prices_ca WHERE market \u003d \u0027California Cap-and-Trade Program\u0027 AND state \u003d \u0027California\u0027;", + "sql_explanation": "The SQL query calculates the minimum carbon price in the California Cap-and-Trade Program since its inception by selecting the MIN function on the price column and filtering the data with a WHERE clause to only consider the California Cap-and-Trade Program and California." +}, { + "id": "3299", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total installed capacity of renewable energy sources in Brazil and Argentina?", + "sql_context": "CREATE TABLE latam_renewable_energy (country VARCHAR(20), energy_source VARCHAR(20), installed_capacity INT); INSERT INTO latam_renewable_energy (country, energy_source, installed_capacity) VALUES (\u0027Brazil\u0027, \u0027Solar\u0027, 5000), (\u0027Brazil\u0027, \u0027Wind\u0027, 12000), (\u0027Brazil\u0027, \u0027Hydro\u0027, 95000), (\u0027Argentina\u0027, \u0027Solar\u0027, 2000), (\u0027Argentina\u0027, \u0027Wind\u0027, 6000), (\u0027Argentina\u0027, \u0027Hydro\u0027, 10000);", + "sql": "SELECT SUM(installed_capacity) FROM latam_renewable_energy WHERE country IN (\u0027Brazil\u0027, \u0027Argentina\u0027);", + "sql_explanation": "This query calculates the total installed capacity of renewable energy sources in Brazil and Argentina by summing up the installed_capacity values for these two countries from the latam_renewable_energy table." +}, { + "id": "3371", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average carbon pricing revenue in New York state between 2018 and 2020?", + "sql_context": "CREATE TABLE carbon_pricing_ny (id INT, year INT, revenue FLOAT); INSERT INTO carbon_pricing_ny (id, year, revenue) VALUES (1, 2018, 100.0), (2, 2019, 120.0), (3, 2020, 150.0);", + "sql": "SELECT AVG(revenue) FROM carbon_pricing_ny WHERE year BETWEEN 2018 AND 2020 AND state \u003d \u0027New York\u0027;", + "sql_explanation": "This query calculates the average carbon pricing revenue in New York state between 2018 and 2020 by averaging the revenue column where the year is between 2018 and 2020 and the state is New York." +}, { + "id": "3422", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total installed capacity (MW) of renewable energy sources in California?", + "sql_context": "CREATE TABLE ca_renewable_energy (id INT, source TEXT, capacity_mw FLOAT); INSERT INTO ca_renewable_energy (id, source, capacity_mw) VALUES (1, \u0027Wind\u0027, 500.0), (2, \u0027Solar\u0027, 1000.0), (3, \u0027Geothermal\u0027, 750.0);", + "sql": "SELECT SUM(capacity_mw) FROM ca_renewable_energy WHERE source IN (\u0027Wind\u0027, \u0027Solar\u0027, \u0027Geothermal\u0027);", + "sql_explanation": "This query calculates the total installed capacity (MW) of renewable energy sources in California by summing the capacity_mw column and filtering the source column for \u0027Wind\u0027, \u0027Solar\u0027, and \u0027Geothermal\u0027 values." +}, { + "id": "3548", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all wind energy projects in Germany before 2010.", + "sql_context": "CREATE TABLE energy_projects (name TEXT, country TEXT, technology TEXT, capacity_mw INTEGER, year INTEGER); INSERT INTO energy_projects (name, country, technology, capacity_mw, year) VALUES (\u0027Project A\u0027, \u0027Germany\u0027, \u0027Wind\u0027, 10, 2005), (\u0027Project B\u0027, \u0027Germany\u0027, \u0027Wind\u0027, 15, 2012);", + "sql": "DELETE FROM energy_projects WHERE country \u003d \u0027Germany\u0027 AND technology \u003d \u0027Wind\u0027 AND year \u003c 2010;", + "sql_explanation": "This query deletes all rows in the energy_projects table that have a country of Germany, technology of Wind, and year before 2010." +}, { + "id": "3612", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many renewable energy projects are there in the \u0027Midwest\u0027 region with a budget greater than 500 million USD?", + "sql_context": "CREATE TABLE RenewableEnergyProjects (region VARCHAR(50), budget FLOAT);", + "sql": "SELECT COUNT(*) FROM RenewableEnergyProjects WHERE region \u003d \u0027Midwest\u0027 AND budget \u003e 500000000;", + "sql_explanation": "This query counts the number of renewable energy projects in the \u0027Midwest\u0027 region with a budget greater than 500 million USD by filtering the RenewableEnergyProjects table based on the region and budget columns and then counting the number of rows that meet the criteria." +}, { + "id": "3663", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the sector with the second lowest energy demand in 2020?", + "sql_context": "CREATE TABLE energy_demand (id INT, sector VARCHAR(255), demand DECIMAL(5,2), year INT, PRIMARY KEY (id)); INSERT INTO energy_demand (id, sector, demand, year) VALUES (1, \u0027Transportation\u0027, 120.5, 2019); INSERT INTO energy_demand (id, sector, demand, year) VALUES (2, \u0027Commercial\u0027, 95.2, 2019); INSERT INTO energy_demand (id, sector, demand, year) VALUES (3, \u0027Industrial\u0027, 150.7, 2020); INSERT INTO energy_demand (id, sector, demand, year) VALUES (4, \u0027Residential\u0027, 78.4, 2020);", + "sql": "SELECT sector, demand FROM energy_demand WHERE year \u003d 2020 ORDER BY demand LIMIT 1 OFFSET 1;", + "sql_explanation": "This query selects the sector and demand columns from the energy_demand table where the year is 2020 and orders the results by demand, then returns the second row (offset 1) of the ordered results." +}, { + "id": "3952", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of electric vehicles (in thousands) sold in Germany since 2018?", + "sql_context": "CREATE TABLE ElectricVehicles (id INT, country VARCHAR(50), year INT, sales INT); INSERT INTO ElectricVehicles (id, country, year, sales) VALUES (1, \u0027Germany\u0027, 2020, 385), (2, \u0027Germany\u0027, 2019, 320), (3, \u0027Germany\u0027, 2018, 210), (4, \u0027France\u0027, 2020, 410);", + "sql": "SELECT COUNT(*)/1000 FROM ElectricVehicles WHERE country \u003d \u0027Germany\u0027 AND year \u003e\u003d 2018;", + "sql_explanation": "This query calculates the total number of electric vehicles (in thousands) sold in Germany since 2018 by filtering the records based on the country and year, and then dividing the count of records that meet the criteria by 1000 to convert the value to thousands." +}, { + "id": "3995", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the \u0027efficiency_improvement\u0027 value to 0.15 in the \u0027energy_efficiency\u0027 table where the \u0027sector\u0027 is \u0027Industry\u0027", + "sql_context": "CREATE TABLE energy_efficiency (id INT PRIMARY KEY, sector VARCHAR(255), efficiency_improvement FLOAT, country VARCHAR(255));", + "sql": "UPDATE energy_efficiency SET efficiency_improvement \u003d 0.15 WHERE sector \u003d \u0027Industry\u0027;", + "sql_explanation": "The SQL query updates the \u0027efficiency_improvement\u0027 value to 0.15 in the \u0027energy_efficiency\u0027 table where the \u0027sector\u0027 is \u0027Industry\u0027." +}, { + "id": "4023", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average energy efficiency (kWh/ton) of electric vehicles sold in Japan since 2019?", + "sql_context": "CREATE TABLE ev_sales (id INT, model TEXT, country TEXT, energy_efficiency FLOAT, year INT); INSERT INTO ev_sales (id, model, country, energy_efficiency, year) VALUES (1, \u0027Nissan Leaf\u0027, \u0027Japan\u0027, 0.27, 2019), (2, \u0027Toyota Prius Prime\u0027, \u0027Japan\u0027, 0.59, 2020);", + "sql": "SELECT AVG(energy_efficiency) FROM ev_sales WHERE country \u003d \u0027Japan\u0027 AND year \u003e\u003d 2019;", + "sql_explanation": "The SQL query calculates the average energy efficiency of electric vehicles sold in Japan since 2019 by selecting the AVG function on the energy_efficiency column and filtering the data with a WHERE clause to only consider Japan and years after 2019." +}, { + "id": "4060", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many electric vehicles are there in China and Japan?", + "sql_context": "CREATE TABLE electric_vehicles (country VARCHAR(50), num_vehicles INT); INSERT INTO electric_vehicles (country, num_vehicles) VALUES (\u0027China\u0027, 1140000), (\u0027Japan\u0027, 850000);", + "sql": "SELECT SUM(num_vehicles) FROM electric_vehicles WHERE country IN (\u0027China\u0027, \u0027Japan\u0027);", + "sql_explanation": "This query calculates the total number of electric vehicles in China and Japan by summing up the num_vehicles values for those two countries in the electric_vehicles table." +}, { + "id": "4108", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of renewable energy in the total energy production in Brazil in 2020?", + "sql_context": "CREATE TABLE brazil_energy_production (year INT, renewable_energy_percentage DECIMAL(4,2)); INSERT INTO brazil_energy_production (year, renewable_energy_percentage) VALUES (2015, 35.00), (2016, 37.00), (2017, 39.00), (2018, 41.00), (2019, 43.00), (2020, 45.00);", + "sql": "SELECT renewable_energy_percentage FROM brazil_energy_production WHERE year \u003d 2020;", + "sql_explanation": "This query retrieves the percentage of renewable energy in the total energy production in Brazil in 2020 by filtering the brazil_energy_production table for the year 2020." +}, { + "id": "4174", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the carbon price in the California Cap-and-Trade Program?", + "sql_context": "CREATE TABLE carbon_prices (region TEXT, price FLOAT); INSERT INTO carbon_prices (region, price) VALUES (\u0027California Cap-and-Trade Program\u0027, 15.0);", + "sql": "SELECT price FROM carbon_prices WHERE region \u003d \u0027California Cap-and-Trade Program\u0027;", + "sql_explanation": "This SQL query retrieves the carbon price in the California Cap-and-Trade Program by selecting the price column from the carbon_prices table where the region is \u0027California Cap-and-Trade Program\u0027." +}, { + "id": "4277", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average energy storage capacity in MWh for Hydro Power Plants in Spain?", + "sql_context": "CREATE TABLE HydroPowerPlants ( PlantID INT, Name VARCHAR(255), Country VARCHAR(255), EnergyStorageCapacity FLOAT );", + "sql": "SELECT AVG(EnergyStorageCapacity) FROM HydroPowerPlants WHERE Country \u003d \u0027Spain\u0027;", + "sql_explanation": "This query calculates the average energy storage capacity of hydro power plants in Spain by finding the mean of the EnergyStorageCapacity column in the HydroPowerPlants table where the Country is \u0027Spain\u0027." +}, { + "id": "4454", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total installed energy storage capacity in Texas and Ontario.", + "sql_context": "CREATE TABLE energy_storage (id INT, state VARCHAR(255), name VARCHAR(255), capacity FLOAT); INSERT INTO energy_storage (id, state, name, capacity) VALUES (1, \u0027Texas\u0027, \u0027Storage A\u0027, 120.5); INSERT INTO energy_storage (id, state, name, capacity) VALUES (2, \u0027Texas\u0027, \u0027Storage B\u0027, 150.2); INSERT INTO energy_storage (id, state, name, capacity) VALUES (3, \u0027Ontario\u0027, \u0027Storage C\u0027, 200.1);", + "sql": "SELECT SUM(capacity) FROM energy_storage WHERE state IN (\u0027Texas\u0027, \u0027Ontario\u0027);", + "sql_explanation": "The SQL query selects the sum of the capacity column from the energy_storage table where the state is either Texas or Ontario." +}, { + "id": "4571", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total capacity of energy storage in Country M?", + "sql_context": "CREATE TABLE storage_capacity (name TEXT, location TEXT, capacity_MW INTEGER); INSERT INTO storage_capacity (name, location, capacity_MW) VALUES (\u0027Unit 1\u0027, \u0027Country M\u0027, 50), (\u0027Unit 2\u0027, \u0027Country N\u0027, 75), (\u0027Unit 3\u0027, \u0027Country M\u0027, 100);", + "sql": "SELECT SUM(capacity_MW) FROM storage_capacity WHERE location \u003d \u0027Country M\u0027;", + "sql_explanation": "The query calculates the total capacity of energy storage in Country M by summing up the capacity_MW values for rows where the location is \u0027Country M\u0027" +}, { + "id": "4643", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get the total energy storage capacity (MWh) in the United Kingdom", + "sql_context": "CREATE TABLE energy_storage (id INT, country VARCHAR(50), capacity FLOAT); INSERT INTO energy_storage (id, country, capacity) VALUES (1, \u0027United Kingdom\u0027, 3000), (2, \u0027Germany\u0027, 4000), (3, \u0027France\u0027, 2500);", + "sql": "SELECT SUM(capacity) FROM energy_storage WHERE country \u003d \u0027United Kingdom\u0027;", + "sql_explanation": "This query calculates the total energy storage capacity in the United Kingdom by summing up the capacity values in the energy_storage table where the country is the United Kingdom." +}, { + "id": "4678", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names and capacities of geothermal plants in Indonesia?", + "sql_context": "CREATE TABLE geothermal_plants (name TEXT, country TEXT, capacity FLOAT); INSERT INTO geothermal_plants (name, country, capacity) VALUES (\u0027Muara Laboh\u0027, \u0027Indonesia\u0027, 80.0), (\u0027Sinabung\u0027, \u0027Indonesia\u0027, 45.0), (\u0027Dieng\u0027, \u0027Indonesia\u0027, 66.4);", + "sql": "SELECT name, capacity FROM geothermal_plants WHERE country \u003d \u0027Indonesia\u0027;", + "sql_explanation": "This SQL query retrieves the names and capacities of geothermal plants in Indonesia by selecting the name and capacity columns from the geothermal_plants table where the country is \u0027Indonesia\u0027." +}, { + "id": "4958", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update records in the \"solar_panels\" table where the system size is 150 kW, setting the install year to 2020", + "sql_context": "CREATE TABLE solar_panels (id INT PRIMARY KEY, system_size FLOAT, install_year INT, manufacturer VARCHAR(255));", + "sql": "UPDATE solar_panels SET install_year \u003d 2020 WHERE system_size \u003d 150;", + "sql_explanation": "{1. The UPDATE statement is used to modify records in the \"solar_panels\" table. 2. The WHERE clause filters the records based on the system size being 150 kW. 3. The SET clause changes the install year of the selected records to 2020.}" +}, { + "id": "4979", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the price of renewable energy in France to 14.00.", + "sql_context": "CREATE TABLE renewable_energy (country VARCHAR(20), price DECIMAL(5,2)); INSERT INTO renewable_energy (country, price) VALUES (\u0027France\u0027, 12.50), (\u0027France\u0027, 13.20), (\u0027Germany\u0027, 10.00), (\u0027Germany\u0027, 11.50);", + "sql": "UPDATE renewable_energy SET price \u003d 14.00 WHERE country \u003d \u0027France\u0027;", + "sql_explanation": "The SQL query updates the price of renewable energy in France to 14.00 by using the WHERE clause to filter the country and the SET clause to update the price." +}, { + "id": "5051", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names and capacities of solar farms in California?", + "sql_context": "CREATE TABLE solar_farms (name TEXT, state TEXT, capacity FLOAT); INSERT INTO solar_farms (name, state, capacity) VALUES (\u0027Mojave Solar\u0027, \u0027California\u0027, 250.0), (\u0027Desert Sunlight\u0027, \u0027California\u0027, 550.0), (\u0027Topaz Solar\u0027, \u0027California\u0027, 550.0);", + "sql": "SELECT name, capacity FROM solar_farms WHERE state \u003d \u0027California\u0027;", + "sql_explanation": "This SQL query retrieves the names and capacities of solar farms in California by selecting the name and capacity columns from the solar_farms table where the state is \u0027California\u0027." +}, { + "id": "5112", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum and maximum capacity of energy storage projects?", + "sql_context": "CREATE TABLE energy_storage_projects (name VARCHAR(255), capacity FLOAT); INSERT INTO energy_storage_projects (name, capacity) VALUES (\u0027Project1\u0027, 120.5), (\u0027Project2\u0027, 450.7), (\u0027Project3\u0027, 789.6), (\u0027Project4\u0027, 901.2);", + "sql": "SELECT MIN(capacity), MAX(capacity) FROM energy_storage_projects;", + "sql_explanation": "Calculates the minimum and maximum capacity of energy storage projects by using the MIN and MAX aggregate functions on the capacity column." +}, { + "id": "5113", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the energy efficiency (kWh/m2) of buildings in Sydney", + "sql_context": "CREATE TABLE building_efficiency (id INT, city VARCHAR(50), efficiency FLOAT); INSERT INTO building_efficiency (id, city, efficiency) VALUES (1, \u0027Tokyo\u0027, 120), (2, \u0027Osaka\u0027, 110), (3, \u0027Sydney\u0027, 140);", + "sql": "SELECT efficiency FROM building_efficiency WHERE city \u003d \u0027Sydney\u0027;", + "sql_explanation": "This query retrieves the energy efficiency of buildings in Sydney by selecting the efficiency column from the building_efficiency table where the city is Sydney." +}, { + "id": "5235", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum capacity of wind farms in France?", + "sql_context": "CREATE TABLE wind_farms (id INT, name TEXT, country TEXT, capacity FLOAT); INSERT INTO wind_farms (id, name, country, capacity) VALUES (1, \u0027Eoliennes du Boulonnais\u0027, \u0027France\u0027, 97.2), (2, \u0027Parc Eolien en Mer de Dieppe Le TrÊport\u0027, \u0027France\u0027, 496), (3, \u0027Saint Nazaire Offshore Wind Farm\u0027, \u0027France\u0027, 480);", + "sql": "SELECT MAX(capacity) FROM wind_farms WHERE country \u003d \u0027France\u0027;", + "sql_explanation": "This query calculates the maximum capacity of wind farms in France by selecting the MAX capacity from the wind_farms table where the country is France." +}, { + "id": "5551", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the \"capacity\" column for the record with id 2 in the \"solar_plants\" table to 62.5", + "sql_context": "CREATE TABLE solar_plants (id INT, name VARCHAR(50), location VARCHAR(50), capacity FLOAT);", + "sql": "UPDATE solar_plants SET capacity \u003d 62.5 WHERE id \u003d 2;", + "sql_explanation": "This query updates the \"capacity\" for the record with id 2 in the \"solar_plants\" table to 62.5." +}, { + "id": "5555", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total capacity of energy efficiency projects?", + "sql_context": "CREATE TABLE energy_efficiency_projects (name TEXT, capacity INTEGER); INSERT INTO energy_efficiency_projects (name, capacity) VALUES (\u0027Project A\u0027, 200), (\u0027Project B\u0027, 900);", + "sql": "SELECT SUM(capacity) FROM energy_efficiency_projects;", + "sql_explanation": "This query calculates the total capacity of energy efficiency projects. It does this by summing up the \u0027capacity\u0027 column in the \u0027energy_efficiency_projects\u0027 table." +}, { + "id": "5666", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total capacity of wind farms in the \u0027renewables\u0027 schema?", + "sql_context": "CREATE SCHEMA renewables;CREATE TABLE wind_farms (name VARCHAR(50), capacity INT);INSERT INTO wind_farms (name, capacity) VALUES (\u0027Farm1\u0027, 100), (\u0027Farm2\u0027, 200);", + "sql": "SELECT SUM(capacity) FROM renewables.wind_farms;", + "sql_explanation": "This SQL query calculates the total capacity of wind farms in the \u0027renewables\u0027 schema by summing up the capacity values from the \u0027wind_farms\u0027 table." +}, { + "id": "5722", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records in the \u0027carbon_prices\u0027 table with a price higher than 50", + "sql_context": "CREATE TABLE carbon_prices (id INT PRIMARY KEY, year INT, price FLOAT);", + "sql": "DELETE FROM carbon_prices WHERE price \u003e 50;", + "sql_explanation": "This query deletes all records from the carbon_prices table with a price value higher than 50. It uses the DELETE statement, filtering the records based on the conditions specified in the WHERE clause." +}, { + "id": "5777", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete the energy efficiency upgrades with a cost greater than $2000.", + "sql_context": "CREATE TABLE upgrades (id INT, cost FLOAT, type TEXT); INSERT INTO upgrades (id, cost, type) VALUES (1, 500, \u0027Insulation\u0027), (2, 1000, \u0027HVAC\u0027), (3, 1500, \u0027Lighting\u0027), (4, 2500, \u0027Solar Panels\u0027), (5, 1800, \u0027Heat Pumps\u0027);", + "sql": "DELETE FROM upgrades WHERE cost \u003e 2000;", + "sql_explanation": "This query removes the energy efficiency upgrades with a cost greater than $2000 using the DELETE statement and WHERE clause." +}, { + "id": "5788", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the energy efficiency score for countries in the Asia-Pacific region?", + "sql_context": "CREATE TABLE energy_efficiency_ap (country TEXT, score FLOAT); INSERT INTO energy_efficiency_ap (country, score) VALUES (\u0027China\u0027, 79.8), (\u0027Japan\u0027, 80.7), (\u0027South Korea\u0027, 80.2), (\u0027Australia\u0027, 78.3), (\u0027India\u0027, 65.5), (\u0027Indonesia\u0027, 57.1), (\u0027Thailand\u0027, 56.8), (\u0027Malaysia\u0027, 55.8), (\u0027Singapore\u0027, 54.6), (\u0027Philippines\u0027, 53.6);", + "sql": "SELECT score FROM energy_efficiency_ap", + "sql_explanation": "This query creates a table named energy_efficiency_ap with the country and energy efficiency score for countries in the Asia-Pacific region. Then, it uses a SELECT statement to get the energy efficiency score for these countries." +}, { + "id": "2619", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all the eSports events that have taken place in Africa or South America, along with the number of attendees?", + "sql_context": "CREATE TABLE events (id INT, name VARCHAR(20), location VARCHAR(20), attendees INT); INSERT INTO events (id, name, location, attendees) VALUES (1, \u0027Rio Games\u0027, \u0027Brazil\u0027, 50000), (2, \u0027Casablanca Cup\u0027, \u0027Morocco\u0027, 30000), (3, \u0027Johannesburg Jam\u0027, \u0027South Africa\u0027, 40000), (4, \u0027Lima League\u0027, \u0027Peru\u0027, 25000), (5, \u0027Cairo Clash\u0027, \u0027Egypt\u0027, 35000);", + "sql": "SELECT events.name, events.location, events.attendees FROM events WHERE events.location IN (\u0027Africa\u0027, \u0027South America\u0027);", + "sql_explanation": "The SQL query filters the events table for those that have taken place in Africa or South America, and then retrieves the name, location, and number of attendees for each event." +}, { + "id": "2786", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many players have achieved a rank of \u0027Grandmaster\u0027 or higher in the game \"Cybernetic Showdown\"?", + "sql_context": "CREATE TABLE Players (PlayerID INT, PlayerName VARCHAR(50), Game VARCHAR(50), Rank VARCHAR(20)); INSERT INTO Players (PlayerID, PlayerName, Game, Rank) VALUES (1, \u0027John Doe\u0027, \u0027Cybernetic Showdown\u0027, \u0027Grandmaster\u0027); INSERT INTO Players (PlayerID, PlayerName, Game, Rank) VALUES (2, \u0027Jane Smith\u0027, \u0027Cybernetic Showdown\u0027, \u0027Master\u0027);", + "sql": "SELECT COUNT(*) FROM Players WHERE Game \u003d \u0027Cybernetic Showdown\u0027 AND Rank IN (\u0027Grandmaster\u0027, \u0027Master\u0027, \u0027Champion\u0027);", + "sql_explanation": "This SQL query counts the number of players with a rank of \u0027Grandmaster\u0027 or higher in \"Cybernetic Showdown\" by using the COUNT function on all rows (*). It filters the data for players who have played \"Cybernetic Showdown\" and have a rank of \u0027Grandmaster\u0027, \u0027Master\u0027, or \u0027Champion\u0027 using the WHERE and IN clauses." +}, { + "id": "2836", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert data into \u0027Game_Design\u0027 for game with ID 1001", + "sql_context": "CREATE TABLE Game_Design (id INT PRIMARY KEY, game_id INT, genre VARCHAR(255), release_year INT, developer VARCHAR(255));", + "sql": "INSERT INTO Game_Design (id, game_id, genre, release_year, developer) VALUES (1, 1001, \u0027RPG\u0027, 2018, \u0027CompanyA\u0027);", + "sql_explanation": "1. Inserts a new row into \u0027Game_Design\u0027 for game 1001 with genre \u0027RPG\u0027, release year 2018, and developer \u0027CompanyA\u0027." +}, { + "id": "2951", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total spending by players from Europe in the \u0027gaming_facts\u0027 table?", + "sql_context": "CREATE TABLE gaming_facts (player_id INT, country VARCHAR(50), total_spending FLOAT); INSERT INTO gaming_facts (player_id, country, total_spending) VALUES (1, \u0027USA\u0027, 450.25), (2, \u0027Canada\u0027, 520.35), (3, \u0027France\u0027, 405.12), (4, \u0027Germany\u0027, 350.56);", + "sql": "SELECT SUM(total_spending) as total_europe_spending FROM gaming_facts WHERE country IN (\u0027France\u0027, \u0027Germany\u0027);", + "sql_explanation": "This query calculates the total spending by players from Europe by summing the total spending for players from France and Germany." +}, { + "id": "2999", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many players have participated in esports events, and what is the average age of these players?", + "sql_context": "CREATE TABLE Players (id INT, name VARCHAR(50), age INT, participated_in_esports_event BOOLEAN); INSERT INTO Players (id, name, age, participated_in_esports_event) VALUES (1, \u0027Player1\u0027, 25, TRUE), (2, \u0027Player2\u0027, 30, FALSE), (3, \u0027Player3\u0027, 35, TRUE);", + "sql": "SELECT COUNT(*) AS num_players, AVG(age) AS avg_age FROM Players WHERE participated_in_esports_event \u003d TRUE;", + "sql_explanation": "We filter the Players table for players who have participated in esports events and calculate the average age and total number of players who meet this condition." +}, { + "id": "3017", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the names of players who are from \u0027Japan\u0027 and use PlayStation VR to \u0027PSVR Japanese Players\u0027.", + "sql_context": "CREATE TABLE Players (PlayerID INT, Name VARCHAR(20), Country VARCHAR(20), VRPlatform VARCHAR(10)); INSERT INTO Players (PlayerID, Name, Country, VRPlatform) VALUES (1, \u0027Hiroshi\u0027, \u0027Japan\u0027, \u0027PlayStation VR\u0027);", + "sql": "UPDATE Players SET Name \u003d \u0027PSVR Japanese Players\u0027 WHERE Country \u003d \u0027Japan\u0027 AND VRPlatform \u003d \u0027PlayStation VR\u0027;", + "sql_explanation": "This query updates the names of players who are from \u0027Japan\u0027 and use \u0027PlayStation VR\u0027 to \u0027PSVR Japanese Players\u0027 in the \u0027Players\u0027 table." +}, { + "id": "3080", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total prize pool for esports events in the \u0027MOBA\u0027 genre, and how many events were held for this genre?", + "sql_context": "CREATE TABLE EsportsEvents (EventID INT, EventName VARCHAR(50), GameID INT, EventDate DATE, PrizePool NUMERIC(18,2)); INSERT INTO EsportsEvents (EventID, EventName, GameID, EventDate, PrizePool) VALUES (1, \u0027Fortnite World Cup\u0027, 1, \u00272019-07-26\u0027, 30000000); INSERT INTO EsportsEvents (EventID, EventName, GameID, EventDate, PrizePool) VALUES (2, \u0027Overwatch League Grand Finals\u0027, 2, \u00272018-07-28\u0027, 1500000); INSERT INTO EsportsEvents (EventID, EventName, GameID, EventDate, PrizePool) VALUES (3, \u0027League of Legends World Championship\u0027, 3, \u00272018-11-03\u0027, 24000000); INSERT INTO EsportsEvents (EventID, EventName, GameID, EventDate, PrizePool) VALUES (4, \u0027Dota 2 International\u0027, 4, \u00272018-08-20\u0027, 25500000);", + "sql": "SELECT SUM(PrizePool) as TotalPrizePool, COUNT(*) as EventCount FROM EsportsEvents WHERE GameID IN (3, 4);", + "sql_explanation": "Calculate the total prize pool for esports events in the \u0027MOBA\u0027 genre and how many events were held for this genre." +}, { + "id": "3310", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many esports events were held in the APAC region in 2021 and 2022?", + "sql_context": "CREATE TABLE EsportsEventsYearly (EventYear INT, Region VARCHAR(10), EventCount INT); INSERT INTO EsportsEventsYearly (EventYear, Region, EventCount) VALUES (2021, \u0027APAC\u0027, 8); INSERT INTO EsportsEventsYearly (EventYear, Region, EventCount) VALUES (2022, \u0027APAC\u0027, 10);", + "sql": "SELECT SUM(EventCount) FROM EsportsEventsYearly WHERE Region \u003d \u0027APAC\u0027 AND EventYear IN (2021, 2022);", + "sql_explanation": "This SQL query calculates the total number of esports events held in the APAC region in 2021 and 2022 by filtering the EsportsEventsYearly table based on the Region and EventYear columns and then aggregating the EventCount column using the SUM function." +}, { + "id": "3324", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record for a player from \u0027Brazil\u0027 who uses Oculus and prefers PC gaming.", + "sql_context": "CREATE TABLE Players (PlayerID INT, Country VARCHAR(20), VRPlatform VARCHAR(10), PrefersPC BOOLEAN);", + "sql": "INSERT INTO Players (PlayerID, Country, VRPlatform, PrefersPC) VALUES (3, \u0027Brazil\u0027, \u0027Oculus\u0027, TRUE);", + "sql_explanation": "This query inserts a new record into the \u0027Players\u0027 table for a player from \u0027Brazil\u0027 who uses Oculus and prefers PC gaming." +}, { + "id": "3418", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many male players have designed a puzzle game and have more than 5,000 players?", + "sql_context": "CREATE TABLE game_designers (designer_id INT, gender VARCHAR(10), genre VARCHAR(10), players INT);", + "sql": "SELECT COUNT(*) FROM game_designers WHERE gender \u003d \u0027male\u0027 AND genre \u003d \u0027puzzle\u0027 AND players \u003e 5000;", + "sql_explanation": "We create a table for game designers and formulate a SQL query to find the number of male game designers in the puzzle genre who have more than 5,000 players. We start by selecting the count of records from the game_designers table where gender is \u0027male\u0027, genre is \u0027puzzle\u0027, and players is greater than 5,000." +}, { + "id": "4006", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the total number of players from North America and Europe?", + "sql_context": "CREATE TABLE Players (PlayerID INT, Name VARCHAR(100), Age INT, Country VARCHAR(50)); INSERT INTO Players (PlayerID, Name, Age, Country) VALUES (1, \u0027John Doe\u0027, 25, \u0027USA\u0027), (2, \u0027Jane Smith\u0027, 28, \u0027Canada\u0027), (3, \u0027James Johnson\u0027, 30, \u0027England\u0027), (4, \u0027Emily Davis\u0027, 24, \u0027France\u0027);", + "sql": "SELECT COUNT(*) FROM Players WHERE Country IN (\u0027USA\u0027, \u0027Canada\u0027, \u0027England\u0027, \u0027France\u0027);", + "sql_explanation": "This query counts the total number of players from the four countries specified in the context. It does this by using the WHERE clause to filter the rows where the Country column matches any of the specified countries. Finally, it counts the rows using the COUNT() aggregate function." +}, { + "id": "4055", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum adoption revenue of virtual reality hardware in Middle East?", + "sql_context": "CREATE TABLE VRHardwareMiddleEast (HardwareID INT, HardwareName VARCHAR(100), AdoptionRevenue DECIMAL(10,2), Country VARCHAR(50)); INSERT INTO VRHardwareMiddleEast (HardwareID, HardwareName, AdoptionRevenue, Country) VALUES (1, \u0027VR Headset A\u0027, 700.00, \u0027Saudi Arabia\u0027), (2, \u0027VR Headset B\u0027, 800.00, \u0027United Arab Emirates\u0027), (3, \u0027VR Headset C\u0027, 900.00, \u0027Turkey\u0027);", + "sql": "SELECT MIN(AdoptionRevenue) FROM VRHardwareMiddleEast WHERE Country \u003d \u0027Middle East\u0027;", + "sql_explanation": "The SQL query calculates the minimum adoption revenue of virtual reality hardware in Middle East. However, there is no \u0027Middle East\u0027 country in the table, so it should be changed to a specific Middle Eastern country name. It finds the minimum revenue by selecting the smallest value in the AdoptionRevenue column for the specified country." +}, { + "id": "4069", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a table for player demographics", + "sql_context": "CREATE TABLE player_demographics (player_id INT PRIMARY KEY, age INT, gender VARCHAR(10), location VARCHAR(50));", + "sql": "CREATE TABLE player_demographics_new AS SELECT * FROM player_demographics WHERE 1\u003d2;", + "sql_explanation": "A new table player_demographics_new is created with the same structure as player_demographics but no rows since the WHERE clause filters out all records." +}, { + "id": "4110", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names of the virtual reality devices used by the most players?", + "sql_context": "CREATE TABLE VirtualReality (VRID INT PRIMARY KEY, VRName VARCHAR(50), PlayersUsing INT); INSERT INTO VirtualReality (VRID, VRName, PlayersUsing) VALUES (3, \u0027PlayStation VR\u0027, 80000); INSERT INTO VirtualReality (VRID, VRName, PlayersUsing) VALUES (5, \u0027Valve Index\u0027, 60000);", + "sql": "SELECT VRName, PlayersUsing FROM VirtualReality ORDER BY PlayersUsing DESC LIMIT 1;", + "sql_explanation": "Sort the VirtualReality table by PlayersUsing in descending order and return the top row with the virtual reality device used by the most players." +}, { + "id": "4274", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify players who have adopted virtual reality technology, ordered by the number of games played in descending order.", + "sql_context": "CREATE TABLE VRAdoption (PlayerID INT, VRDevice VARCHAR(50), GamesPlayed INT); INSERT INTO VRAdoption (PlayerID, VRDevice, GamesPlayed) VALUES (1, \u0027Oculus Quest\u0027, 150), (2, \u0027HTC Vive\u0027, 200), (3, \u0027Valve Index\u0027, 100);", + "sql": "SELECT PlayerID, VRDevice, GamesPlayed FROM VRAdoption ORDER BY GamesPlayed DESC", + "sql_explanation": "Order players by the number of games played, who have adopted virtual reality technology." +}, { + "id": "4455", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many players from Japan have played more than 50 games?", + "sql_context": "CREATE TABLE Players (PlayerID INT, PlayerName VARCHAR(50), Country VARCHAR(50), GamesPlayed INT); INSERT INTO Players (PlayerID, PlayerName, Country, GamesPlayed) VALUES (1, \u0027John Doe\u0027, \u0027USA\u0027, 100), (2, \u0027Jane Smith\u0027, \u0027Canada\u0027, 80), (3, \u0027Taro Yamada\u0027, \u0027Japan\u0027, 70), (4, \u0027Hana Nakamura\u0027, \u0027Japan\u0027, 60);", + "sql": "SELECT COUNT(*) FROM Players WHERE Country \u003d \u0027Japan\u0027 HAVING GamesPlayed \u003e 50;", + "sql_explanation": "This query counts the number of players from Japan who have played more than 50 games. It does this by using the COUNT function with an asterisk (*), filtering the table to only include rows where the Country column is equal to \u0027Japan\u0027, and using the HAVING clause to further filter the results to only include rows where the GamesPlayed column is greater than 50." +}, { + "id": "4467", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the unique game genres played on each continent?", + "sql_context": "CREATE TABLE GameContinents (GameID INT, GameName VARCHAR(20), Continent VARCHAR(20), Genre VARCHAR(20)); INSERT INTO GameContinents (GameID, GameName, Continent, Genre) VALUES (1, \u0027GameE\u0027, \u0027Asia\u0027, \u0027Adventure\u0027), (2, \u0027GameF\u0027, \u0027Europe\u0027, \u0027Simulation\u0027), (3, \u0027GameG\u0027, \u0027North America\u0027, \u0027Strategy\u0027), (4, \u0027GameH\u0027, \u0027Australia\u0027, \u0027Adventure\u0027);", + "sql": "SELECT DISTINCT Continent, Genre FROM GameContinents WHERE Genre IS NOT NULL;", + "sql_explanation": "The query retrieves the unique game genres played on each continent. It uses the DISTINCT keyword to eliminate duplicate genres from the result set." +}, { + "id": "4499", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the names of Esports events that have had more than 1000 participants.", + "sql_context": "CREATE TABLE Events (EventID INT, EventName VARCHAR(20), Game VARCHAR(10), Participants INT); INSERT INTO Events (EventID, EventName, Game, Participants) VALUES (1, \u0027Event1\u0027, \u0027Esports\u0027, 1200); INSERT INTO Events (EventID, EventName, Game, Participants) VALUES (2, \u0027Event2\u0027, \u0027Esports\u0027, 800);", + "sql": "SELECT EventName FROM Events WHERE Game \u003d \u0027Esports\u0027 AND Participants \u003e 1000;", + "sql_explanation": "This query lists the names of Esports events that have had more than 1000 participants. It filters the Events table using the WHERE clause based on game and participants." +}, { + "id": "4558", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of hours spent on esports events by teams from South America?", + "sql_context": "CREATE TABLE EsportsTeamsSA (TeamID INT, TeamName VARCHAR(100), Country VARCHAR(50), HoursSpent DECIMAL(10,2)); INSERT INTO EsportsTeamsSA (TeamID, TeamName, Country, HoursSpent) VALUES (1, \u0027Team Brazil\u0027, \u0027Brazil\u0027, 120.00), (2, \u0027Team Argentina\u0027, \u0027Argentina\u0027, 140.00), (3, \u0027Team Chile\u0027, \u0027Chile\u0027, 160.00);", + "sql": "SELECT AVG(HoursSpent) FROM EsportsTeamsSA WHERE Country \u003d \u0027South America\u0027;", + "sql_explanation": "The SQL query calculates the average number of hours spent on esports events by teams from South America. However, there is no \u0027South America\u0027 country in the table, so it should be changed to a specific South American country name. It calculates the average by summing the HoursSpent column and dividing by the number of rows with the specified country." +}, { + "id": "4604", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many hours were spent on esports events in Q2 2021?", + "sql_context": "CREATE TABLE EventSessions (SessionID INT, EventID INT, Duration INT, Quarter INT, Year INT); INSERT INTO EventSessions (SessionID, EventID, Duration, Quarter, Year) VALUES (1, 1, 300, 1, 2021); INSERT INTO EventSessions (SessionID, EventID, Duration, Quarter, Year) VALUES (2, 2, 400, 2, 2021); INSERT INTO EventSessions (SessionID, EventID, Duration, Quarter, Year) VALUES (3, 3, 500, 3, 2021);", + "sql": "SELECT SUM(Duration) FROM EventSessions WHERE Quarter \u003d 2 AND Year \u003d 2021;", + "sql_explanation": "This query calculates the total number of hours spent on esports events in Q2 2021. It filters the EventSessions table for rows where Quarter is 2 and Year is 2021 and then calculates the sum of the Duration column for those rows." +}, { + "id": "4764", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of games played by users from the United States and Japan?", + "sql_context": "CREATE TABLE Players (PlayerID INT, PlayerName VARCHAR(50), Country VARCHAR(50), GamesPlayed INT); INSERT INTO Players (PlayerID, PlayerName, Country, GamesPlayed) VALUES (1, \u0027John Doe\u0027, \u0027USA\u0027, 100), (2, \u0027Jane Smith\u0027, \u0027Canada\u0027, 80), (3, \u0027Taro Yamada\u0027, \u0027Japan\u0027, 70), (4, \u0027Hana Nakamura\u0027, \u0027Japan\u0027, 60);", + "sql": "SELECT SUM(GamesPlayed) FROM Players WHERE Country IN (\u0027USA\u0027, \u0027Japan\u0027);", + "sql_explanation": "This query calculates the total number of games played by users from the United States and Japan. It does this by using the SUM function on the GamesPlayed column, and filtering the table to only include rows where the Country column is equal to either \u0027USA\u0027 or \u0027Japan\u0027." +}, { + "id": "4816", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of players who use VR technology to play racing games?", + "sql_context": "CREATE TABLE PlayerVR (PlayerID INT PRIMARY KEY, GameType VARCHAR(20), VR BOOLEAN); INSERT INTO PlayerVR (PlayerID, GameType, VR) VALUES (1, \u0027Racing\u0027, true); INSERT INTO PlayerVR (PlayerID, GameType, VR) VALUES (2, \u0027FPS\u0027, false);", + "sql": "SELECT COUNT(*) FROM PlayerVR WHERE GameType \u003d \u0027Racing\u0027 AND VR \u003d true;", + "sql_explanation": "This SQL query counts the number of players who use VR technology to play racing games by filtering the PlayerVR table based on the GameType and VR columns and then aggregating the result using the COUNT function." +}, { + "id": "4844", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the maximum hours played in a single game session for players who have played VR games", + "sql_context": "CREATE TABLE GameSessions (PlayerID INT, GamePreference VARCHAR(20), HoursPlayed DECIMAL(5,2)); INSERT INTO GameSessions (PlayerID, GamePreference, HoursPlayed) VALUES (1, \u0027VR\u0027, 10.5), (2, \u0027FPS\u0027, 5.3), (3, \u0027Strategy\u0027, 7.2);", + "sql": "SELECT MAX(HoursPlayed) FROM GameSessions WHERE GamePreference \u003d \u0027VR\u0027;", + "sql_explanation": "This query finds the maximum hours played in a single game session for players who have played VR games. It does so by filtering the GameSessions table for rows where GamePreference is \u0027VR\u0027 and then applying the MAX function to the HoursPlayed column." +}, { + "id": "4875", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the total revenue of all VR games.", + "sql_context": "CREATE TABLE game_design (game_id INT, game_name VARCHAR(50), genre VARCHAR(20), revenue FLOAT, vr_compatible BOOLEAN); INSERT INTO game_design (game_id, game_name, genre, revenue, vr_compatible) VALUES (1, \u0027Elite Dangerous\u0027, \u0027Space Sim\u0027, 50.0, true), (2, \u0027Witcher 3\u0027, \u0027RPG\u0027, 120.0, false), (3, \u0027Half-Life: Alyx\u0027, \u0027Action\u0027, 40.0, true), (4, \u0027Minecraft\u0027, \u0027Sandbox\u0027, 350.0, false);", + "sql": "SELECT SUM(revenue) FROM game_design WHERE vr_compatible IS NOT NULL;", + "sql_explanation": "This query calculates the total revenue of all VR games by filtering the game_design table with the WHERE clause and the IS NOT NULL operator, which checks if the vr_compatible column is not null (i.e., the game supports VR)." +}, { + "id": "5061", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all players from Egypt who have played less than 100 games.", + "sql_context": "CREATE TABLE Players (PlayerID INT, PlayerName VARCHAR(50), Age INT, Country VARCHAR(50), GamesPlayed INT); INSERT INTO Players (PlayerID, PlayerName, Age, Country, GamesPlayed) VALUES (1, \u0027John Doe\u0027, 25, \u0027USA\u0027, 100); INSERT INTO Players (PlayerID, PlayerName, Age, Country, GamesPlayed) VALUES (2, \u0027Jane Smith\u0027, 30, \u0027Canada\u0027, 200); INSERT INTO Players (PlayerID, PlayerName, Age, Country, GamesPlayed) VALUES (3, \u0027Mohamed Ahmed\u0027, 24, \u0027Egypt\u0027, 80); INSERT INTO Players (PlayerID, PlayerName, Age, Country, GamesPlayed) VALUES (4, \u0027Fatima Hassan\u0027, 28, \u0027Egypt\u0027, 150);", + "sql": "DELETE FROM Players WHERE Country \u003d \u0027Egypt\u0027 AND GamesPlayed \u003c 100;", + "sql_explanation": "1. Filter the Players table to only include rows where Country is \u0027Egypt\u0027. 2. Further filter the rows to only include those where GamesPlayed is less than 100. 3. Delete the filtered rows." +}, { + "id": "5231", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all the game design elements for the game \u0027Galactic Conquest\u0027", + "sql_context": "CREATE TABLE GameDesign (GameName VARCHAR(50), GameEngine VARCHAR(50), GameGraphics VARCHAR(50)); INSERT INTO GameDesign (GameName, GameEngine, GameGraphics) VALUES (\u0027Galactic Conquest\u0027, \u0027Unreal Engine\u0027, \u0027High\u0027); INSERT INTO GameDesign (GameName, GameEngine, GameGraphics) VALUES (\u0027Space Crusaders\u0027, \u0027Unity\u0027, \u0027Medium\u0027);", + "sql": "SELECT * FROM GameDesign WHERE GameName \u003d \u0027Galactic Conquest\u0027;", + "sql_explanation": "This query selects all columns from the GameDesign table where the GameName is \u0027Galactic Conquest\u0027." +}, { + "id": "5234", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which countries have players who play \u0027FPS\u0027 games?", + "sql_context": "CREATE TABLE Players (player_id INT, name VARCHAR(255), age INT, game_genre VARCHAR(255), country VARCHAR(255)); INSERT INTO Players (player_id, name, age, game_genre, country) VALUES (1, \u0027John\u0027, 27, \u0027FPS\u0027, \u0027USA\u0027), (2, \u0027Sarah\u0027, 30, \u0027RPG\u0027, \u0027Canada\u0027), (3, \u0027Alex\u0027, 22, \u0027FPS\u0027, \u0027USA\u0027), (4, \u0027Max\u0027, 25, \u0027FPS\u0027, \u0027Canada\u0027), (5, \u0027Zoe\u0027, 28, \u0027FPS\u0027, \u0027Mexico\u0027);", + "sql": "SELECT DISTINCT country FROM Players WHERE game_genre \u003d \u0027FPS\u0027;", + "sql_explanation": "This query returns the unique countries of players who play FPS games by selecting the country column and using the DISTINCT keyword to remove duplicates. The WHERE clause filters the data by game genre." +}, { + "id": "5507", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of players who play action games?", + "sql_context": "CREATE TABLE Players (PlayerID INT, Age INT, GameType VARCHAR(10)); INSERT INTO Players (PlayerID, Age, GameType) VALUES (1, 25, \u0027Action\u0027), (2, 30, \u0027RPG\u0027), (3, 22, \u0027Action\u0027);", + "sql": "SELECT AVG(Age) FROM Players WHERE GameType \u003d \u0027Action\u0027;", + "sql_explanation": "The SQL query filters the Players table for rows where GameType is \u0027Action\u0027. It then calculates the average Age of the filtered rows." +}, { + "id": "5564", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the age of all players from India to 27.", + "sql_context": "CREATE TABLE Players (PlayerID INT, PlayerName VARCHAR(50), Age INT, Country VARCHAR(50)); INSERT INTO Players (PlayerID, PlayerName, Age, Country) VALUES (1, \u0027John Doe\u0027, 25, \u0027USA\u0027); INSERT INTO Players (PlayerID, PlayerName, Age, Country) VALUES (2, \u0027Jane Smith\u0027, 30, \u0027Canada\u0027); INSERT INTO Players (PlayerID, PlayerName, Age, Country) VALUES (3, \u0027Raj Patel\u0027, 24, \u0027India\u0027);", + "sql": "UPDATE Players SET Age \u003d 27 WHERE Country \u003d \u0027India\u0027;", + "sql_explanation": "1. Filter the Players table to only include rows where Country is \u0027India\u0027. 2. Update the Age column to 27 for all matching rows." +}, { + "id": "5650", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of players who have adopted VR technology?", + "sql_context": "CREATE TABLE players (id INT, has_vr BOOLEAN); INSERT INTO players (id, has_vr) VALUES (1, TRUE), (2, FALSE), (3, TRUE), (4, FALSE), (5, TRUE);", + "sql": "SELECT COUNT(*) FROM players WHERE has_vr \u003d TRUE;", + "sql_explanation": "This SQL query counts the total number of players who have adopted VR technology. It does this by using the COUNT function on all rows, while filtering the rows to only include those where the has_vr column is TRUE." +}, { + "id": "5658", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update a player\u0027s age in the Players table using their ID.", + "sql_context": "CREATE TABLE Players (Player_ID INT, Age INT, Gender VARCHAR(10), Country VARCHAR(20)); INSERT INTO Players (Player_ID, Age, Gender, Country) VALUES (1, 25, \u0027Male\u0027, \u0027Country_X\u0027), (2, 30, \u0027Female\u0027, \u0027Country_Y\u0027);", + "sql": "UPDATE Players SET Age \u003d 26 WHERE Player_ID \u003d 1;", + "sql_explanation": "This query updates the age of a player with a Player_ID of 1 from 25 to 26 in the Players table." +}, { + "id": "5710", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "Which players are admins in any game?", + "sql_context": "CREATE TABLE Players (player_id INT, name VARCHAR(255), age INT, is_admin BOOLEAN); INSERT INTO Players (player_id, name, age, is_admin) VALUES (1, \u0027John\u0027, 27, true), (2, \u0027Sarah\u0027, 30, false), (3, \u0027Alex\u0027, 22, false), (4, \u0027Max\u0027, 25, true), (5, \u0027Zoe\u0027, 28, false);", + "sql": "SELECT * FROM Players WHERE is_admin \u003d true;", + "sql_explanation": "This query returns all players who are admins by selecting the data where the is_admin column is true." +}, { + "id": "5739", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all players from the United States", + "sql_context": "CREATE TABLE players (id INT PRIMARY KEY, name VARCHAR(50), age INT, country VARCHAR(50)); INSERT INTO players (id, name, age, country) VALUES (1, \u0027John Doe\u0027, 25, \u0027USA\u0027); INSERT INTO players (id, name, age, country) VALUES (2, \u0027Jane Smith\u0027, 30, \u0027Canada\u0027);", + "sql": "DELETE FROM players WHERE country \u003d \u0027USA\u0027;", + "sql_explanation": "This query deletes all records from the players table where the country is \u0027USA\u0027." +}, { + "id": "1322", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of minerals extracted (in kg) for all mining projects in Europe that have a reported environmental impact?", + "sql_context": "CREATE TABLE productivity (project_id INT, mineral TEXT, quantity INT, environmental_impact TEXT); INSERT INTO productivity (project_id, mineral, quantity, environmental_impact) VALUES (1, \u0027gold\u0027, 1200, \u0027high\u0027), (2, \u0027copper\u0027, 1500, \u0027low\u0027);", + "sql": "SELECT SUM(quantity) FROM productivity, projects WHERE productivity.project_id \u003d projects.id AND projects.continent \u003d \u0027Europe\u0027 AND environmental_impact IS NOT NULL;", + "sql_explanation": "This SQL query calculates the total quantity of minerals extracted (in kg) for all mining projects in Europe that have a reported environmental impact by summing the quantity column where projects.continent is \u0027Europe\u0027 and environmental_impact is not null." +}, { + "id": "2870", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which mines have more than 250 employees and are located in Canada or Australia?", + "sql_context": "CREATE TABLE labor_force (mine_name VARCHAR(255), employee_count INT, country VARCHAR(255)); INSERT INTO labor_force (mine_name, employee_count, country) VALUES (\u0027Diamond Dunes\u0027, 300, \u0027Canada\u0027), (\u0027Ruby Ridges\u0027, 260, \u0027Australia\u0027);", + "sql": "SELECT mine_name FROM labor_force WHERE employee_count \u003e 250 AND (country \u003d \u0027Canada\u0027 OR country \u003d \u0027Australia\u0027);", + "sql_explanation": "Select mine name from labor_force table where employee count is greater than 250 and country is either Canada or Australia." +}, { + "id": "3025", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many mining projects in Asia have a start date on or after 2015-01-01 and are still ongoing?", + "sql_context": "CREATE TABLE projects (id INT, name TEXT, continent TEXT, start_date DATE, end_date DATE); INSERT INTO projects (id, name, continent, start_date, end_date) VALUES (1, \u0027Asia Gold\u0027, \u0027Asia\u0027, \u00272015-02-01\u0027, NULL), (2, \u0027Asia Coal\u0027, \u0027Asia\u0027, \u00272016-05-15\u0027, \u00272025-04-30\u0027);", + "sql": "SELECT COUNT(*) FROM projects WHERE continent \u003d \u0027Asia\u0027 AND start_date \u003e\u003d \u00272015-01-01\u0027 AND end_date IS NULL;", + "sql_explanation": "This SQL query counts the number of mining projects in Asia that have a start date on or after 2015-01-01 and are still ongoing by counting the number of rows where continent is \u0027Asia\u0027, start_date is on or after \u00272015-01-01\u0027, and end_date is null." +}, { + "id": "3151", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which mine site has the highest environmental impact?", + "sql_context": "CREATE TABLE environmental_impact (site_id INT, environmental_score INT); INSERT INTO environmental_impact (site_id, environmental_score) VALUES (1, 60), (2, 80), (3, 70);", + "sql": "SELECT site_id, environmental_score FROM environmental_impact ORDER BY environmental_score DESC LIMIT 1;", + "sql_explanation": "This query retrieves the site with the highest environmental impact by ordering the environmental scores in descending order and limiting the results to 1 row." +}, { + "id": "3585", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average production rate of gold per mine in Canada, for mines that produce more than 100,000 ounces of gold annually?", + "sql_context": "CREATE TABLE mines (id INT, name TEXT, location TEXT, production_rate INT, total_production INT); INSERT INTO mines (id, name, location, production_rate, total_production) VALUES (1, \u0027Golden Mine\u0027, \u0027Canada\u0027, 120000, 5000000); INSERT INTO mines (id, name, location, production_rate, total_production) VALUES (2, \u0027Silver Mine\u0027, \u0027Canada\u0027, 80000, 4000000);", + "sql": "SELECT AVG(production_rate) FROM mines WHERE location \u003d \u0027Canada\u0027 AND production_rate \u003e 100000;", + "sql_explanation": "This query calculates the average production rate of gold per mine in Canada, considering only the mines that produce more than 100,000 ounces of gold annually." +}, { + "id": "3655", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which mines in California have an environmental impact score above 75?", + "sql_context": "CREATE TABLE mines (id INT, name TEXT, state TEXT, environmental_score INT); INSERT INTO mines (id, name, state, environmental_score) VALUES (1, \u0027Delta Mine\u0027, \u0027CA\u0027, 85), (2, \u0027Echo Mine\u0027, \u0027CA\u0027, 65), (3, \u0027Foxtrot Mine\u0027, \u0027CA\u0027, 78);", + "sql": "SELECT name, environmental_score FROM mines WHERE state \u003d \u0027CA\u0027 AND environmental_score \u003e 75;", + "sql_explanation": "This query finds the names of mines in California with an environmental impact score above 75. It selects the name and environmental score from the mines table where the state is California and the environmental score is above 75." +}, { + "id": "3773", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average labor productivity in silver mining?", + "sql_context": "CREATE TABLE labor (employee_id INT, employee_name VARCHAR(50), department VARCHAR(20), hours_worked INT, productivity INT); INSERT INTO labor (employee_id, employee_name, department, hours_worked, productivity) VALUES (1, \u0027Juan Garcia\u0027, \u0027silver\u0027, 160, 500), (2, \u0027Maria Rodriguez\u0027, \u0027silver\u0027, 180, 600), (3, \u0027Pedro Lopez\u0027, \u0027gold\u0027, 165, 700), (4, \u0027Ana Gomez\u0027, \u0027copper\u0027, 170, 550);", + "sql": "SELECT AVG(l.productivity) AS avg_productivity FROM labor l WHERE l.department \u003d \u0027silver\u0027;", + "sql_explanation": "The SQL query filters the \u0027labor\u0027 table for rows with a department of \u0027silver\u0027 and calculates the average \u0027productivity\u0027 for those rows." +}, { + "id": "4490", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records of mineral extractions with a quantity less than 40 in the \u0027Africa\u0027 region.", + "sql_context": "CREATE TABLE Mineral_Extractions_3 (country TEXT, mineral TEXT, quantity INTEGER, region TEXT); INSERT INTO Mineral_Extractions_3 (country, mineral, quantity, region) VALUES (\u0027Nigeria\u0027, \u0027Gold\u0027, 50, \u0027Africa\u0027); INSERT INTO Mineral_Extractions_3 (country, mineral, quantity, region) VALUES (\u0027Egypt\u0027, \u0027Silver\u0027, 45, \u0027Africa\u0027); INSERT INTO Mineral_Extractions_3 (country, mineral, quantity, region) VALUES (\u0027South Africa\u0027, \u0027Platinum\u0027, 60, \u0027Africa\u0027);", + "sql": "DELETE FROM Mineral_Extractions_3 WHERE quantity \u003c 40 AND region \u003d \u0027Africa\u0027;", + "sql_explanation": "*This query deletes records from the Mineral_Extractions_3 table.* *It filters the data where the quantity is less than 40 and the region is Africa.* *The matching rows are removed from the table.*" +}, { + "id": "4495", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum amount of gold extracted, for mines that are of the \u0027Underground\u0027 type?", + "sql_context": "CREATE TABLE mine (id INT, name VARCHAR(255), type VARCHAR(255), gold_tons INT); INSERT INTO mine (id, name, type, gold_tons) VALUES (1, \u0027Alaskan Gold Mine\u0027, \u0027Open Pit\u0027, 700), (2, \u0027California Gold Mine\u0027, \u0027Underground\u0027, 400), (3, \u0027Nevada Silver Mine\u0027, \u0027Open Pit\u0027, 500);", + "sql": "SELECT MIN(gold_tons) as min_gold_tons FROM mine WHERE type \u003d \u0027Underground\u0027;", + "sql_explanation": "This SQL query calculates the minimum amount of gold extracted for mines that are of the \u0027Underground\u0027 type. It does this by using the MIN function to find the lowest value in the \u0027gold_tons\u0027 column for mines that have \u0027Underground\u0027 as their type." +}, { + "id": "4605", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of \u0027copper\u0027 reserves?", + "sql_context": "CREATE TABLE reserves_breakdown (id INT, metal VARCHAR(10), total_quantity INT); INSERT INTO reserves_breakdown (id, metal, total_quantity) VALUES (1, \u0027gold\u0027, 5000), (2, \u0027silver\u0027, 3000), (3, \u0027copper\u0027, 8000);", + "sql": "SELECT SUM(total_quantity) FROM reserves_breakdown WHERE metal \u003d \u0027copper\u0027;", + "sql_explanation": "This query calculates the sum of the \u0027total_quantity\u0027 column for records where the \u0027metal\u0027 column is \u0027copper\u0027." +}, { + "id": "4651", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average depth of gold mines in Canada.", + "sql_context": "CREATE TABLE mines (id INT, name TEXT, location TEXT, depth INT); INSERT INTO mines (id, name, location, depth) VALUES (1, \u0027Golden Mine\u0027, \u0027Canada\u0027, 1200); INSERT INTO mines (id, name, location, depth) VALUES (2, \u0027Yukon Mine\u0027, \u0027Canada\u0027, 1500);", + "sql": "SELECT AVG(depth) FROM mines WHERE location \u003d \u0027Canada\u0027 AND type \u003d \u0027gold\u0027;", + "sql_explanation": "This query calculates the average depth of gold mines in Canada using the AVG() aggregate function." +}, { + "id": "4718", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average production rate of gold mines in the US?", + "sql_context": "CREATE TABLE gold_mines (id INT, name TEXT, location TEXT, production_rate FLOAT); INSERT INTO gold_mines (id, name, location, production_rate) VALUES (1, \u0027Brewer Gold Mine\u0027, \u0027Nevada, USA\u0027, 5000.0), (2, \u0027Cortez Gold Mine\u0027, \u0027Nevada, USA\u0027, 12000.0);", + "sql": "SELECT AVG(production_rate) FROM gold_mines WHERE location LIKE \u0027%USA%\u0027;", + "sql_explanation": "This query calculates the average production rate of gold mines in the USA. It uses the AVG function to find the mean value of the production_rate column, and filters the results to only include rows where the location contains \u0027USA\u0027." +}, { + "id": "4861", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum depth of lithium mines in Bolivia?", + "sql_context": "CREATE TABLE lithium_mines (id INT, name TEXT, location TEXT, depth FLOAT); INSERT INTO lithium_mines (id, name, location, depth) VALUES (1, \u0027Salar de Uyuni\u0027, \u0027Potosi, Bolivia\u0027, 360), (2, \u0027Salar de Coipasa\u0027, \u0027Oruro, Bolivia\u0027, 340), (3, \u0027Pozuelos\u0027, \u0027Oruro, Bolivia\u0027, 320);", + "sql": "SELECT MAX(depth) FROM lithium_mines WHERE location LIKE \u0027%Bolivia%\u0027;", + "sql_explanation": "Determines the maximum depth of lithium mines in Bolivia by finding the maximum value of the depth column where the location column contains \u0027Bolivia\u0027." +}, { + "id": "4950", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the production rate of the \u0027Grasberg\u0027 mine in the gold_mines table to 700.", + "sql_context": "CREATE TABLE gold_mines (id INT, name TEXT, location TEXT, production_rate INT); INSERT INTO gold_mines (id, name, location, production_rate) VALUES (1, \u0027Barrick Goldstrike\u0027, \u0027USA\u0027, 1500), (2, \u0027Nevada Gold Mines\u0027, \u0027USA\u0027, 2000), (3, \u0027Grasberg\u0027, \u0027Indonesia\u0027, 500);", + "sql": "UPDATE gold_mines SET production_rate \u003d 700 WHERE name \u003d \u0027Grasberg\u0027;", + "sql_explanation": "This query updates the production rate of the \u0027Grasberg\u0027 mine in the gold_mines table to 700. It uses the UPDATE statement to modify the existing rows in the gold_mines table that meet the specified condition." +}, { + "id": "4996", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all copper mines in Canada.", + "sql_context": "CREATE TABLE mines (id INT, name TEXT, location TEXT, production_volume INT, mineral TEXT); INSERT INTO mines (id, name, location, production_volume, mineral) VALUES (1, \u0027Canadian Copper Mine 1\u0027, \u0027Canada\u0027, 7000, \u0027copper\u0027); INSERT INTO mines (id, name, location, production_volume, mineral) VALUES (2, \u0027Canadian Copper Mine 2\u0027, \u0027Canada\u0027, 8000, \u0027copper\u0027);", + "sql": "DELETE FROM mines WHERE location \u003d \u0027Canada\u0027 AND mineral \u003d \u0027copper\u0027;", + "sql_explanation": "This query deletes all copper mines in Canada by filtering the \u0027mines\u0027 table for rows with location \u0027Canada\u0027 and mineral \u0027copper\u0027, and then removing those rows." +}, { + "id": "5251", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get labor productivity for Barrick Gold", + "sql_context": "CREATE TABLE productivity (id INT PRIMARY KEY, company VARCHAR(100), value DECIMAL(5,2));", + "sql": "SELECT value FROM productivity WHERE company \u003d \u0027Barrick Gold\u0027;", + "sql_explanation": "This query returns the labor productivity value for Barrick Gold by querying the value column in the productivity table where the company is \u0027Barrick Gold\u0027." +}, { + "id": "5412", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete labor productivity record for Anglo American", + "sql_context": "CREATE TABLE productivity (id INT PRIMARY KEY, company VARCHAR(100), value DECIMAL(5,2));", + "sql": "DELETE FROM productivity WHERE company \u003d \u0027Anglo American\u0027;", + "sql_explanation": "This query deletes the labor productivity record for Anglo American in the productivity table." +}, { + "id": "5487", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get total environmental impact for site 1", + "sql_context": "CREATE TABLE impact (id INT PRIMARY KEY, site_id INT, impact_score INT);", + "sql": "SELECT SUM(impact_score) FROM impact WHERE site_id \u003d 1;", + "sql_explanation": "This query returns the total environmental impact score for site_id 1 by summing the impact_score column in the impact table where the site_id is 1." +}, { + "id": "5605", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records from the \u0027surveys\u0027 table where \u0027survey_date\u0027 is in the year 2005", + "sql_context": "CREATE TABLE surveys (id INT PRIMARY KEY, mine_id VARCHAR(10), survey_date DATE); INSERT INTO surveys (id, mine_id, survey_date) VALUES (1, \u0027Mine_001\u0027, \u00272005-01-01\u0027); INSERT INTO surveys (id, mine_id, survey_date) VALUES (2, \u0027Mine_002\u0027, \u00272006-01-01\u0027); INSERT INTO surveys (id, mine_id, survey_date) VALUES (3, \u0027Mine_001\u0027, \u00272004-01-01\u0027);", + "sql": "DELETE FROM surveys WHERE YEAR(survey_date) \u003d 2005;", + "sql_explanation": "This query deletes all records from the \u0027surveys\u0027 table where \u0027survey_date\u0027 is in the year 2005." +}, { + "id": "5790", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete environmental impact records for site 2", + "sql_context": "CREATE TABLE impact (id INT PRIMARY KEY, site_id INT, impact_score INT);", + "sql": "DELETE FROM impact WHERE site_id \u003d 2;", + "sql_explanation": "This query deletes all environmental impact records for site_id 2 in the impact table." +}, { + "id": "1781", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which organizations received funding for marine conservation in the Mediterranean Sea in 2022?", + "sql_context": "CREATE TABLE marine_conservation_funding (id INT, organization VARCHAR(50), funding_source VARCHAR(50), amount DECIMAL(10,2), date DATE); INSERT INTO marine_conservation_funding (id, organization, funding_source, amount, date) VALUES (2, \u0027Greenpeace Mediterranean\u0027, \u0027Grants\u0027, 150000, \u00272022-03-15\u0027);", + "sql": "SELECT DISTINCT organization FROM marine_conservation_funding WHERE location \u003d \u0027Mediterranean Sea\u0027 AND date BETWEEN \u00272022-01-01\u0027 AND \u00272022-12-31\u0027;", + "sql_explanation": "This query selects distinct \u0027organization\u0027 values from the \u0027marine_conservation_funding\u0027 table for records where the \u0027location\u0027 column value is \u0027Mediterranean Sea\u0027 and the \u0027date\u0027 column values are within 2022." +}, { + "id": "1818", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total biomass of marine life in all marine protected areas in the Atlantic Ocean?", + "sql_context": "CREATE TABLE marine_protected_areas (area_name TEXT, location TEXT, biomass FLOAT);", + "sql": "SELECT SUM(marine_protected_areas.biomass) AS total_biomass FROM marine_protected_areas WHERE marine_protected_areas.location \u003d \u0027Atlantic Ocean\u0027;", + "sql_explanation": "We filter the marine_protected_areas table to only include records in the Atlantic Ocean. We then calculate the total biomass of marine life in all marine protected areas in the Atlantic Ocean (using SUM)." +}, { + "id": "2320", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many ships were involved in collisions in the South China Sea in 2019?", + "sql_context": "CREATE TABLE maritime_incidents (year INT, region VARCHAR(255), incident_type VARCHAR(255), number_of_ships INT);INSERT INTO maritime_incidents (year, region, incident_type, number_of_ships) VALUES (2019, \u0027South China Sea\u0027, \u0027collision\u0027, 3), (2018, \u0027South China Sea\u0027, \u0027grounding\u0027, 2), (2017, \u0027South China Sea\u0027, \u0027collision\u0027, 4);", + "sql": "SELECT number_of_ships FROM maritime_incidents WHERE region \u003d \u0027South China Sea\u0027 AND incident_type \u003d \u0027collision\u0027 AND year \u003d 2019;", + "sql_explanation": "The query counts the number of ships involved in collisions in the South China Sea in 2019. It uses a WHERE clause to filter the records based on the region, incident type, and year." +}, { + "id": "2646", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average depth of all Pacific Ocean trenches, excluding the Mariana Trench?\"", + "sql_context": "CREATE TABLE ocean_trenches (trench_name TEXT, ocean_region TEXT, average_depth NUMERIC);", + "sql": "SELECT AVG(at.average_depth) FROM ocean_trenches at WHERE at.ocean_region \u003d \u0027Pacific\u0027 AND at.trench_name !\u003d \u0027Mariana\u0027;", + "sql_explanation": "This SQL query calculates the average depth of all Pacific Ocean trenches, excluding the Mariana Trench, by selecting the average depth from the ocean_trenches table where the ocean_region is \u0027Pacific\u0027 and the trench_name is not \u0027Mariana\u0027." +}, { + "id": "2916", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of fishing vessels in the Indian, Pacific, and Southern Oceans?", + "sql_context": "CREATE TABLE fishing_vessels (id INT, name VARCHAR(255), location VARCHAR(255), length FLOAT); INSERT INTO fishing_vessels (id, name, location, length) VALUES (1, \u0027Indian Ocean Tuna Fleet\u0027, \u0027Indian Ocean\u0027, 500); INSERT INTO fishing_vessels (id, name, location, length) VALUES (2, \u0027South Pacific Squid Fleet\u0027, \u0027Pacific Ocean\u0027, 450); INSERT INTO fishing_vessels (id, name, location, length) VALUES (3, \u0027Southern Ocean Krill Fleet\u0027, \u0027Southern Ocean\u0027, 600);", + "sql": "SELECT SUM(length) FROM fishing_vessels WHERE location IN (\u0027Indian Ocean\u0027, \u0027Pacific Ocean\u0027, \u0027Southern Ocean\u0027);", + "sql_explanation": "The SQL query calculates the total length of fishing vessels in the Indian, Pacific, and Southern Oceans by summing up the length column where the location is either \u0027Indian Ocean\u0027, \u0027Pacific Ocean\u0027, or \u0027Southern Ocean\u0027." +}, { + "id": "2935", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average length of all underwater cables in the Indian Ocean, and how many cables are there?", + "sql_context": "CREATE TABLE UNDERWATER_CABLES (NAME TEXT, LENGTH NUMERIC, REGION TEXT); INSERT INTO UNDERWATER_CABLES (NAME, LENGTH, REGION) VALUES (\u0027SAEx1\u0027, 12000, \u0027Indian Ocean\u0027), (\u0027EASSy\u0027, 10000, \u0027Indian Ocean\u0027), (\u0027SEA-ME-WE 3\u0027, 39000, \u0027Indian Ocean\u0027), (\u0027EIG\u0027, 13000, \u0027Indian Ocean\u0027), (\u0027TEA- IN\u0027, 15000, \u0027Indian Ocean\u0027);", + "sql": "SELECT AVG(LENGTH) AS AVG_LENGTH, COUNT(*) AS NUM_CABLES FROM UNDERWATER_CABLES WHERE REGION \u003d \u0027Indian Ocean\u0027;", + "sql_explanation": "This query calculates the average length and the number of cables in the Indian Ocean by using the window functions AVG() and COUNT() on the LENGTH column for records with the REGION column equal to \u0027Indian Ocean\u0027." +}, { + "id": "3002", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add a new marine species \u0027Blue Whale\u0027 with weight 200000 in the \u0027North Atlantic Ocean\u0027 to the marine_species table", + "sql_context": "CREATE TABLE marine_species (name VARCHAR(255), weight FLOAT, location VARCHAR(255)); INSERT INTO marine_species (name, weight, location) VALUES (\u0027Great White Shark\u0027, 2000.0, \u0027Pacific Ocean\u0027), (\u0027Giant Squid\u0027, 700.0, \u0027Atlantic Ocean\u0027);", + "sql": "INSERT INTO marine_species (name, weight, location) VALUES (\u0027Blue Whale\u0027, 200000.0, \u0027North Atlantic Ocean\u0027);", + "sql_explanation": "This SQL query inserts a new record into the marine_species table with the name \u0027Blue Whale\u0027, weight 200000, and location \u0027North Atlantic Ocean\u0027 by using the INSERT INTO statement and specifying the name of the table, the columns to be inserted, and the values for those columns." +}, { + "id": "3339", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average ocean acidity level (pH value) in the Southern Ocean over the last 10 years?", + "sql_context": "CREATE TABLE ocean_acidity (id INT, location TEXT, pH FLOAT, year INT); INSERT INTO ocean_acidity (id, location, pH, year) VALUES (1, \u0027Southern Ocean\u0027, 8.0, 2011), (2, \u0027Southern Ocean\u0027, 7.9, 2012), (3, \u0027Southern Ocean\u0027, 8.1, 2013);", + "sql": "SELECT AVG(pH) FROM ocean_acidity WHERE location \u003d \u0027Southern Ocean\u0027 AND year BETWEEN 2011 AND 2021;", + "sql_explanation": "This SQL query calculates the average ocean acidity level (pH value) in the Southern Ocean over the last 10 years. It does this by using the AVG() function on the pH column, filtering the rows by the location column and using a WHERE clause to only consider rows where year is between 2011 and 2021." +}, { + "id": "3437", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the marine_life table to set the population of the \u0027Salmon\u0027 species to 16000 for the atlantic_ocean region.", + "sql_context": "CREATE TABLE marine_life (id INT, species VARCHAR(255), population INT, region VARCHAR(255)); INSERT INTO marine_life (id, species, population, region) VALUES (1, \u0027Salmon\u0027, 15000, \u0027pacific_ocean\u0027); INSERT INTO marine_life (id, species, population, region) VALUES (2, \u0027Lionfish\u0027, 1200, \u0027atlantic_ocean\u0027); INSERT INTO marine_life (id, species, population, region) VALUES (3, \u0027Starfish\u0027, 8000, \u0027pacific_ocean\u0027);", + "sql": "UPDATE marine_life SET population \u003d 16000 WHERE species \u003d \u0027Salmon\u0027 AND region \u003d \u0027atlantic_ocean\u0027;", + "sql_explanation": "This SQL query updates the population value to 16000 for records with the species value of \u0027Salmon\u0027 in the marine_life table specifically for the atlantic_ocean region." +}, { + "id": "3536", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total area of all marine protected areas (MPAs) in the Indian Ocean with an average depth greater than 1000 meters?", + "sql_context": "CREATE TABLE marine_protected_areas (id INT, name VARCHAR(255), area_size FLOAT, avg_depth FLOAT, region VARCHAR(255)); INSERT INTO marine_protected_areas (id, name, area_size, avg_depth, region) VALUES (1, \u0027Great Barrier Reef\u0027, 344400, -1574, \u0027Indian\u0027), (2, \u0027Andaman Islands\u0027, 8044, 1234, \u0027Indian\u0027);", + "sql": "SELECT SUM(area_size) FROM marine_protected_areas WHERE region \u003d \u0027Indian\u0027 AND avg_depth \u003e 1000;", + "sql_explanation": "This SQL query calculates the total area of all marine protected areas in the Indian Ocean with an average depth greater than 1000 meters. It does this by using the SUM function, which returns the sum of a numeric column. The WHERE clause is used to filter the rows to only those in the Indian region and with an average depth greater than 1000 meters." +}, { + "id": "3592", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all marine protected areas in the Pacific region that were established after 2010.", + "sql_context": "CREATE TABLE marine_protected_areas (id INT, name VARCHAR(255), region VARCHAR(50), year_established INT); INSERT INTO marine_protected_areas (id, name, region, year_established) VALUES (1, \u0027Pacific Protected Area 1\u0027, \u0027Pacific\u0027, 2012), (2, \u0027Pacific Protected Area 2\u0027, \u0027Pacific\u0027, 2008);", + "sql": "SELECT name FROM marine_protected_areas WHERE region \u003d \u0027Pacific\u0027 AND year_established \u003e 2010;", + "sql_explanation": "This query filters the marine_protected_areas table to only include records where the region is \u0027Pacific\u0027 and the year_established column is greater than 2010. It then returns the names of these protected areas." +}, { + "id": "3664", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which marine protected areas in the Indian Ocean have an average depth greater than 500 meters?", + "sql_context": "CREATE TABLE marine_protected_areas (name VARCHAR(255), location VARCHAR(255), avg_depth FLOAT); INSERT INTO marine_protected_areas (name, location, avg_depth) VALUES (\u0027MPA 1\u0027, \u0027Indian Ocean\u0027, 700.0), (\u0027MPA 2\u0027, \u0027Atlantic Ocean\u0027, 300.0);", + "sql": "SELECT name FROM marine_protected_areas WHERE location \u003d \u0027Indian Ocean\u0027 AND avg_depth \u003e 500;", + "sql_explanation": "This query finds the names of marine protected areas in the Indian Ocean with an average depth greater than 500 meters by selecting the names of rows where the location is \u0027Indian Ocean\u0027 and the avg_depth is greater than 500." +}, { + "id": "3989", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average depth of marine protected areas in the Pacific and Atlantic oceans?", + "sql_context": "CREATE TABLE marine_protected_areas (name TEXT, depth FLOAT, ocean TEXT); INSERT INTO marine_protected_areas (name, depth, ocean) VALUES (\u0027Galapagos Marine Reserve\u0027, 2600.0, \u0027Pacific\u0027), (\u0027Great Barrier Reef\u0027, 3444.0, \u0027Pacific\u0027), (\u0027Sargasso Sea\u0027, 7000.0, \u0027Atlantic\u0027), (\u0027Bermuda Triangle\u0027, 4000.0, \u0027Atlantic\u0027);", + "sql": "SELECT AVG(depth) FROM marine_protected_areas WHERE ocean IN (\u0027Pacific\u0027, \u0027Atlantic\u0027);", + "sql_explanation": "This SQL query calculates the average value of the depth column in the marine_protected_areas table for rows where the ocean column is either \u0027Pacific\u0027 or \u0027Atlantic\u0027 using the AVG function." +}, { + "id": "4213", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of vessels operating in the Pacific and Atlantic Oceans?", + "sql_context": "CREATE TABLE vessels_2 (vessel_id INT, name VARCHAR(255), operating_region VARCHAR(255));", + "sql": "SELECT COUNT(*) FROM vessels_2 WHERE operating_region IN (\u0027Pacific\u0027, \u0027Atlantic\u0027);", + "sql_explanation": "This query counts the total number of vessels operating in the Pacific and Atlantic Oceans by filtering for rows where the operating region is either \"Pacific\" or \"Atlantic\" and returning the count." +}, { + "id": "4337", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum water pressure at the deepest point in the Atlantic?", + "sql_context": "CREATE TABLE deep_sea_pressure (id INT, location VARCHAR(255), pressure INT, depth INT); INSERT INTO deep_sea_pressure (id, location, pressure, depth) VALUES (1, \u0027Challenger Deep\u0027, 1100, 10972), (2, \u0027Mariana Trench\u0027, 800, 10994);", + "sql": "SELECT MAX(pressure) FROM deep_sea_pressure WHERE location \u003d \u0027Challenger Deep\u0027;", + "sql_explanation": "This query finds the maximum water pressure at the deepest point in the Atlantic Ocean (Challenger Deep) by selecting the maximum pressure value in the deep_sea_pressure table for rows where the location is \u0027Challenger Deep\u0027." +}, { + "id": "4357", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many vessels are operating in the Arctic and Antarctic Oceans?", + "sql_context": "CREATE TABLE vessels (vessel_id INT, name VARCHAR(255), operating_region VARCHAR(255));", + "sql": "SELECT COUNT(*) FROM vessels WHERE operating_region IN (\u0027Arctic\u0027, \u0027Antarctic\u0027);", + "sql_explanation": "This query counts the number of vessels operating in the Arctic and Antarctic Oceans by filtering for rows where the operating region is either \"Arctic\" or \"Antarctic\" and returning the count." +}, { + "id": "4362", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average depth of all deep-sea volcanoes in the Pacific Ocean?", + "sql_context": "CREATE TABLE deep_sea_volcanoes (volcano_id INT, location VARCHAR(50), avg_depth FLOAT); INSERT INTO deep_sea_volcanoes (volcano_id, location, avg_depth) VALUES (1, \u0027Pacific Ocean\u0027, 2000.0), (2, \u0027Atlantic Ocean\u0027, 1500.0), (3, \u0027Indian Ocean\u0027, 1800.0);", + "sql": "SELECT AVG(avg_depth) FROM deep_sea_volcanoes WHERE location \u003d \u0027Pacific Ocean\u0027;", + "sql_explanation": "This query calculates the average of the \u0027avg_depth\u0027 column in the \u0027deep_sea_volcanoes\u0027 table where the \u0027location\u0027 column is equal to \u0027Pacific Ocean\u0027, returning the average depth of all deep-sea volcanoes in the Pacific Ocean." +}, { + "id": "4448", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum ocean acidification level in the Southern Ocean?", + "sql_context": "CREATE TABLE ocean_acidification (location TEXT, value FLOAT); INSERT INTO ocean_acidification (location, value) VALUES (\u0027Southern Ocean\u0027, 7.9), (\u0027Mediterranean Sea\u0027, 8.2);", + "sql": "SELECT MAX(value) FROM ocean_acidification WHERE location \u003d \u0027Southern Ocean\u0027;", + "sql_explanation": "This query calculates the maximum ocean acidification level (MAX(value)) in the \u0027Southern Ocean\u0027 from the ocean_acidification table." +}, { + "id": "4456", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average size of fishing vessels in the Southern Hemisphere?", + "sql_context": "CREATE TABLE fishing_vessels (id INT, name VARCHAR(255), region VARCHAR(255), length FLOAT); INSERT INTO fishing_vessels (id, name, region, length) VALUES (1, \u0027Vessel A\u0027, \u0027Northern Hemisphere\u0027, 50.5); INSERT INTO fishing_vessels (id, name, region, length) VALUES (2, \u0027Vessel B\u0027, \u0027Southern Hemisphere\u0027, 60.3); INSERT INTO fishing_vessels (id, name, region, length) VALUES (3, \u0027Vessel C\u0027, \u0027Northern Hemisphere\u0027, 70.2);", + "sql": "SELECT AVG(length) FROM fishing_vessels WHERE region \u003d \u0027Southern Hemisphere\u0027;", + "sql_explanation": "This query calculates the average size of fishing vessels in the Southern Hemisphere by using the AVG function to average the length column in the fishing_vessels table, filtered by the region column value." +}, { + "id": "4479", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum number of deep-sea species observed in a single expedition in the Southern Ocean?", + "sql_context": "CREATE TABLE deep_sea_expeditions (id INT, expedition_name VARCHAR(255), year INT, country VARCHAR(255), region VARCHAR(255), num_species INT); INSERT INTO deep_sea_expeditions (id, expedition_name, year, country, region, num_species) VALUES (1, \u0027Antarctic Circumnavigation Expedition\u0027, 2016, \u0027Australia\u0027, \u0027Southern\u0027, 345), (2, \u0027Southern Ocean Deep-Sea Expedition\u0027, 2017, \u0027New Zealand\u0027, \u0027Southern\u0027, 567);", + "sql": "SELECT MIN(num_species) FROM deep_sea_expeditions WHERE region \u003d \u0027Southern\u0027;", + "sql_explanation": "This SQL query finds the minimum number of deep-sea species observed in a single expedition in the Southern Ocean. It does this by using the MIN function, which returns the minimum value of a numeric column. The WHERE clause is used to filter the rows to only those in the Southern region." +}, { + "id": "4481", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum ocean acidification level in the Pacific Ocean?", + "sql_context": "CREATE TABLE ocean_acidification (location TEXT, value FLOAT); INSERT INTO ocean_acidification (location, value) VALUES (\u0027Pacific Ocean\u0027, 8.1), (\u0027Atlantic Ocean\u0027, 8.0);", + "sql": "SELECT MIN(value) FROM ocean_acidification WHERE location \u003d \u0027Pacific Ocean\u0027;", + "sql_explanation": "This query calculates the minimum ocean acidification level (MIN(value)) in the \u0027Pacific Ocean\u0027 from the ocean_acidification table." +}, { + "id": "4594", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total population of marine species in the Southern Ocean?", + "sql_context": "CREATE TABLE marine_species (name TEXT, region TEXT, population INTEGER); INSERT INTO marine_species (name, region, population) VALUES (\u0027Krill\u0027, \u0027Southern Ocean\u0027, 500000000), (\u0027Penguin\u0027, \u0027Southern Ocean\u0027, 1200000);", + "sql": "SELECT SUM(population) FROM marine_species WHERE region \u003d \u0027Southern Ocean\u0027;", + "sql_explanation": "This query sums up (SUM(population)) the population counts of marine species in the \u0027Southern Ocean\u0027 from the marine_species table." +}, { + "id": "4715", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "Identify conservation efforts in the Mediterranean Sea.", + "sql_context": "CREATE TABLE conservation_efforts (effort_id INT, effort_name VARCHAR(50), location VARCHAR(50), start_date DATE, end_date DATE); INSERT INTO conservation_efforts (effort_id, effort_name, location, start_date, end_date) VALUES (2, \u0027Mediterranean Protection\u0027, \u0027Mediterranean Sea\u0027, \u00272010-01-01\u0027, \u00272030-12-31\u0027);", + "sql": "SELECT * FROM conservation_efforts WHERE location \u003d \u0027Mediterranean Sea\u0027;", + "sql_explanation": "Retrieve all records from the conservation_efforts table for efforts taking place in the Mediterranean Sea." +}, { + "id": "4802", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of marine pollution incidents in the Arctic Ocean?", + "sql_context": "CREATE TABLE marine_pollution (id INT, type TEXT, location TEXT, year INT); INSERT INTO marine_pollution (id, type, location, year) VALUES (1, \u0027Oil spill\u0027, \u0027Arctic Ocean\u0027, 2018), (2, \u0027Plastic waste\u0027, \u0027Pacific Ocean\u0027, 2020), (3, \u0027Chemical spill\u0027, \u0027Atlantic Ocean\u0027, 2019);", + "sql": "SELECT COUNT(*) FROM marine_pollution WHERE location \u003d \u0027Arctic Ocean\u0027;", + "sql_explanation": "This SQL query counts the total number of marine pollution incidents in the Arctic Ocean. It does this by using the COUNT() function with no specified column name (which counts all rows), filtering the rows by the location column and using a WHERE clause to only consider rows where location is equal to \u0027Arctic Ocean\u0027." +}, { + "id": "4904", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total area of all marine protected areas in the Arctic?", + "sql_context": "CREATE TABLE marine_protected_areas (area_name TEXT, region TEXT, area FLOAT); INSERT INTO marine_protected_areas (area_name, region, area) VALUES (\u0027Northwest Passage National Park\u0027, \u0027Arctic\u0027, 42000.0), (\u0027Torngat Mountains National Park\u0027, \u0027Arctic\u0027, 9700.0);", + "sql": "SELECT SUM(area) FROM marine_protected_areas WHERE region \u003d \u0027Arctic\u0027;", + "sql_explanation": "This SQL query calculates the total area of all marine protected areas in the Arctic by taking the sum of the \u0027area\u0027 column in the \u0027marine_protected_areas\u0027 table where the \u0027region\u0027 column is \u0027Arctic\u0027." +}, { + "id": "4919", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the name of the record with id 7 in the table \"marine_protected_areas\" to \u0027Coral Reefs\u0027", + "sql_context": "CREATE TABLE marine_protected_areas (id INT, name VARCHAR(50), size FLOAT, country VARCHAR(50));", + "sql": "UPDATE marine_protected_areas SET name \u003d \u0027Coral Reefs\u0027 WHERE id \u003d 7;", + "sql_explanation": "This query updates the name of the record with id 7 in the \"marine_protected_areas\" table to \u0027Coral Reefs\u0027." +}, { + "id": "4942", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the MarineLife table for a species named \u0027Blue Whale\u0027 with an id of 1.", + "sql_context": "CREATE TABLE marine_life (id INT, species_name VARCHAR(255)); INSERT INTO marine_life (id, species_name) VALUES (1, \u0027Dolphin\u0027), (2, \u0027Shark\u0027), (3, \u0027Tuna\u0027);", + "sql": "INSERT INTO marine_life (id, species_name) VALUES (1, \u0027Blue Whale\u0027);", + "sql_explanation": "This query inserts a new record into the MarineLife table for a species named \u0027Blue Whale\u0027 with an id of 1 by using the INSERT INTO statement and specifying the values for the columns in the VALUES clause." +}, { + "id": "5001", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average sighting frequency of marine species in the \u0027Pacific Ocean\u0027?", + "sql_context": "CREATE TABLE Sightings (Species VARCHAR(25), Ocean VARCHAR(25), Sightings INT); INSERT INTO Sightings (Species, Ocean, Sightings) VALUES (\u0027Dolphin\u0027, \u0027Atlantic Ocean\u0027, 200), (\u0027Turtle\u0027, \u0027Pacific Ocean\u0027, 350), (\u0027Shark\u0027, \u0027Indian Ocean\u0027, 150), (\u0027Whale\u0027, \u0027Pacific Ocean\u0027, 400);", + "sql": "SELECT AVG(Sightings) FROM Sightings WHERE Ocean \u003d \u0027Pacific Ocean\u0027;", + "sql_explanation": "The SQL query calculates the average sighting frequency of marine species in the \u0027Pacific Ocean\u0027. It filters the rows where the ocean is \u0027Pacific Ocean\u0027, and then calculates the average sightings frequency using the AVG() function." +}, { + "id": "5015", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total number of marine protected areas in the Indian Ocean.", + "sql_context": "CREATE TABLE marine_protected_areas (id INT, name VARCHAR(50), size FLOAT, ocean VARCHAR(20)); INSERT INTO marine_protected_areas (id, name, size, ocean) VALUES (1, \u0027Maldives Atolls\u0027, 90000, \u0027Indian\u0027); INSERT INTO marine_protected_areas (id, name, size, ocean) VALUES (2, \u0027Chagos Marine Protected Area\u0027, 640000, \u0027Indian\u0027);", + "sql": "SELECT COUNT(*) FROM marine_protected_areas WHERE ocean \u003d \u0027Indian\u0027;", + "sql_explanation": "This SQL query calculates the total number of marine protected areas in the Indian Ocean by counting all records in the marine_protected_areas table where the ocean is \u0027Indian\u0027." +}, { + "id": "5257", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average depth of the Mediterranean Sea?", + "sql_context": "CREATE TABLE sea_depths (sea VARCHAR(255), depth INT); INSERT INTO sea_depths (sea, depth) VALUES (\u0027Mediterranean\u0027, 1500), (\u0027Caribbean\u0027, 1000), (\u0027Red\u0027, 2000), (\u0027Black\u0027, 2200);", + "sql": "SELECT AVG(depth) FROM sea_depths WHERE sea \u003d \u0027Mediterranean\u0027;", + "sql_explanation": "This query calculates the average depth of the Mediterranean Sea by filtering the sea column with a condition that only includes records from the Mediterranean Sea, and then calculating the average of the depth column." +}, { + "id": "5288", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all deep-sea expeditions in the Arctic.", + "sql_context": "CREATE TABLE deep_sea_expeditions (name TEXT, location TEXT, year INT); INSERT INTO deep_sea_expeditions (name, location, year) VALUES (\u0027Arctic Ocean 2020\u0027, \u0027Arctic\u0027, \u00272020\u0027), (\u0027Atlantic Ocean 2019\u0027, \u0027Atlantic\u0027, \u00272019\u0027);", + "sql": "SELECT * FROM deep_sea_expeditions WHERE location \u003d \u0027Arctic\u0027;", + "sql_explanation": "This SQL query selects all rows in the \u0027deep_sea_expeditions\u0027 table with a \u0027location\u0027 of \u0027Arctic\u0027, giving us a list of all deep-sea expeditions in the Arctic." +}, { + "id": "5341", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the names of marine protected areas and their corresponding depths in the Pacific Ocean.", + "sql_context": "CREATE TABLE marine_protected_areas_pacific (area_name VARCHAR(255), depth FLOAT); INSERT INTO marine_protected_areas_pacific (area_name, depth) VALUES (\u0027Great Barrier Reef\u0027, 10.0), (\u0027Channel Islands National Park\u0027, 200.0), (\u0027Monterey Bay National Marine Sanctuary\u0027, 400.0);", + "sql": "SELECT area_name, depth FROM marine_protected_areas_pacific;", + "sql_explanation": "The SQL query retrieves the names of marine protected areas and their corresponding depths in the Pacific Ocean by selecting all the records from the marine_protected_areas_pacific table." +}, { + "id": "5373", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many countries have a whale sanctuary?", + "sql_context": "CREATE TABLE whale_sanctuaries (country_name TEXT, sanctuary_name TEXT); INSERT INTO whale_sanctuaries (country_name, sanctuary_name) VALUES (\u0027Norway\u0027, \u0027Norwegian Whale Sanctuary\u0027), (\u0027Antarctica\u0027, \u0027Southern Ocean Whale Sanctuary\u0027), (\u0027Australia\u0027, \u0027Australian Whale Sanctuary\u0027);", + "sql": "SELECT COUNT(DISTINCT country_name) FROM whale_sanctuaries;", + "sql_explanation": "This SQL query uses the COUNT function with the DISTINCT keyword to count the unique number of country_name entries in the whale_sanctuaries table, giving us the number of countries with a whale sanctuary." +}, { + "id": "5429", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum temperature recorded in the Antarctic Ocean?", + "sql_context": "CREATE TABLE antarctic_ocean_temperature (location TEXT, temperature REAL); INSERT INTO antarctic_ocean_temperature (location, temperature) VALUES (\u0027Antarctic Ocean\u0027, 2.8), (\u0027Weddell Sea\u0027, 1.8), (\u0027Ross Sea\u0027, 0.8);", + "sql": "SELECT MAX(temperature) FROM antarctic_ocean_temperature;", + "sql_explanation": "Find the maximum temperature value in the \u0027temperature\u0027 column of \u0027antarctic_ocean_temperature\u0027 table." +}, { + "id": "5524", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all marine protected areas with a depth greater than 1000 meters.", + "sql_context": "CREATE TABLE marine_protected_areas (name VARCHAR(255), depth FLOAT);", + "sql": "DELETE FROM marine_protected_areas WHERE depth \u003e 1000;", + "sql_explanation": "This query deletes all records from the marine_protected_areas table where the depth is greater than 1000 meters." +}, { + "id": "5532", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many species are there in each ocean basin?", + "sql_context": "CREATE TABLE species_count (ocean_basin TEXT, species_number INTEGER); INSERT INTO species_count (ocean_basin, species_number) VALUES (\u0027Atlantic\u0027, 1200), (\u0027Pacific\u0027, 2000), (\u0027Indian\u0027, 1500);", + "sql": "SELECT ocean_basin, species_number FROM species_count;", + "sql_explanation": "This query retrieves the data from the \u0027species_count\u0027 table, displaying each ocean basin and the corresponding number of species." +}, { + "id": "5576", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records in the \"MarineLife\" table where the \"Species\" is \"Blue Whale\"", + "sql_context": "CREATE TABLE MarineLife (Id INT, Species VARCHAR(20), Population INT);", + "sql": "DELETE FROM MarineLife WHERE Species \u003d \u0027Blue Whale\u0027;", + "sql_explanation": "This query deletes all records in the \"MarineLife\" table where the \"Species\" is \"Blue Whale\"." +}, { + "id": "5586", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records in the ocean_acidification table where the acidity level is greater than 8.3", + "sql_context": "CREATE TABLE ocean_acidification (location TEXT, acidity FLOAT); INSERT INTO ocean_acidification (location, acidity) VALUES (\u0027Caribbean Sea\u0027, 8.2), (\u0027Pacific Ocean\u0027, 8.1), (\u0027Atlantic Ocean\u0027, 8.0), (\u0027Arctic Ocean\u0027, 8.4);", + "sql": "DELETE FROM ocean_acidification WHERE acidity \u003e 8.3;", + "sql_explanation": "This query deletes all records in the ocean_acidification table where the acidity level is greater than 8.3." +}, { + "id": "5621", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum safe maritime traffic speed for each shipping lane?\u0027", + "sql_context": "CREATE TABLE shipping_lanes (lane_id INT, name VARCHAR(50), max_safe_speed INT);", + "sql": "SELECT name, max_safe_speed FROM shipping_lanes;", + "sql_explanation": "The SQL query selects the name and max_safe_speed columns from the shipping_lanes table, which contains the maximum safe maritime traffic speed for each shipping lane." +}, { + "id": "5651", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum depth reached during deep-sea exploration missions?", + "sql_context": "CREATE TABLE exploration_missions (name varchar(255), max_depth integer); INSERT INTO exploration_missions (name, max_depth) VALUES (\u0027Challenger Deep\u0027, 10972), (\u0027Mariana Trench\u0027, 10994), (\u0027Molloy Deep\u0027, 10916);", + "sql": "SELECT MAX(max_depth) FROM exploration_missions;", + "sql_explanation": "The SQL query calculates the maximum depth reached during deep-sea exploration missions by creating a common table expression (CTE) for the max_depth column of the exploration_missions table, then calculating the maximum value of the max_depth column in this CTE." +}, { + "id": "5680", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum dissolved oxygen level in each ocean?", + "sql_context": "CREATE TABLE dissolved_oxygen (ocean TEXT, level FLOAT); INSERT INTO dissolved_oxygen (ocean, level) VALUES (\u0027Atlantic\u0027, 4.5), (\u0027Pacific\u0027, 4.3), (\u0027Indian\u0027, 4.7);", + "sql": "SELECT ocean, MIN(level) FROM dissolved_oxygen;", + "sql_explanation": "This query calculates the minimum value of the \u0027level\u0027 column in the \u0027dissolved_oxygen\u0027 table for each ocean using the MIN() function." +}, { + "id": "5694", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete the \"DiveSite\" record with a name of Blue Hole", + "sql_context": "CREATE TABLE DiveSite (Name VARCHAR(50) PRIMARY KEY, Depth INT); INSERT INTO DiveSite (Name, Depth) VALUES (\u0027Blue Hole\u0027, 120);", + "sql": "DELETE FROM DiveSite WHERE Name \u003d \u0027Blue Hole\u0027;", + "sql_explanation": "* The record with the name \"Blue Hole\" is deleted from the \"DiveSite\" table." +}, { + "id": "5744", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum ocean acidification level recorded in the \u0027acidification_data\u0027 table?", + "sql_context": "CREATE TABLE acidification_data (sample_id INT, location VARCHAR(255), level FLOAT);", + "sql": "SELECT MAX(level) FROM acidification_data;", + "sql_explanation": "This SQL query calculates the maximum value in the \u0027level\u0027 column of the \u0027acidification_data\u0027 table using the MAX() function." +}, { + "id": "5772", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of species at risk in all oceans?", + "sql_context": "CREATE TABLE species_at_risk (ocean VARCHAR(255), count INT); INSERT INTO species_at_risk (ocean, count) VALUES (\u0027Atlantic Ocean\u0027, 1500), (\u0027Pacific Ocean\u0027, 2200), (\u0027Indian Ocean\u0027, 1800), (\u0027Southern Ocean\u0027, 1300), (\u0027Arctic Ocean\u0027, 900);", + "sql": "SELECT SUM(count) FROM species_at_risk;", + "sql_explanation": "This query calculates the total number of species at risk in all oceans by summing up their counts from the species_at_risk table." +}, { + "id": "5787", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total number of vessels in the \u0027vessels_register\u0027 table.", + "sql_context": "CREATE TABLE vessels_register (vessel_id INT, name VARCHAR(255), type VARCHAR(255), flag_state VARCHAR(255));", + "sql": "SELECT COUNT(*) FROM vessels_register;", + "sql_explanation": "This SQL query counts all records in the \u0027vessels_register\u0027 table using the COUNT(*) function." +}, { + "id": "5796", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all marine protected areas", + "sql_context": "CREATE TABLE marine_protected_areas (id INT PRIMARY KEY, name VARCHAR(255), location VARCHAR(255), size FLOAT, year_established INT); INSERT INTO marine_protected_areas (id, name, location, size, year_established) VALUES (1, \u0027Great Barrier Reef\u0027, \u0027Australia\u0027, 344400, 1975), (2, \u0027Galapagos Marine Reserve\u0027, \u0027Ecuador\u0027, 133000, 1998);", + "sql": "SELECT * FROM marine_protected_areas;", + "sql_explanation": "1. This statement selects all columns from the \u0027marine_protected_areas\u0027 table. 2. It returns all records in the table, displaying the marine protected areas data." +}, { + "id": "5837", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average depth of all marine protected areas (\u0027mpa\u0027)?", + "sql_context": "CREATE TABLE mpa (id INT, name VARCHAR(50), area_sqkm FLOAT, avg_depth FLOAT); INSERT INTO mpa (id, name, area_sqkm, avg_depth) VALUES (1, \u0027Great Barrier Reef\u0027, 344400, -2000);", + "sql": "SELECT AVG(avg_depth) FROM mpa;", + "sql_explanation": "This query calculates the average depth of all marine protected areas by taking the average of the \u0027avg_depth\u0027 column in the \u0027mpa\u0027 table." +}, { + "id": "5848", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Delete the \u0027ocean_species\u0027 table", + "sql_context": "DROP TABLE ocean_species;", + "sql": "DROP TABLE ocean_species;", + "sql_explanation": "1. It removes the \u0027ocean_species\u0027 table and all its records permanently from the database." +}, { + "id": "5851", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Delete \u0027species\u0027 table", + "sql_context": "CREATE TABLE species (id INT PRIMARY KEY, name VARCHAR(255), population INT, conservation_status VARCHAR(255), last_sighting DATE); INSERT INTO species (id, name, population, conservation_status, last_sighting) VALUES (1, \u0027Blue Whale\u0027, 10000, \u0027Critically Endangered\u0027, \u00272020-01-01\u0027);", + "sql": "DROP TABLE species;", + "sql_explanation": "1. This statement drops the \u0027species\u0027 table from the database. 2. All records and columns in the table are deleted." +}, { + "id": "2032", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average CO2 emissions for products made in Germany.", + "sql_context": "CREATE TABLE co2_emissions (product_id INT, product_name TEXT, co2_emissions INT); INSERT INTO co2_emissions (product_id, product_name, co2_emissions) VALUES (1, \u0027Leather Jacket\u0027, 50), (2, \u0027Wool Sweater\u0027, 30);", + "sql": "SELECT AVG(co2_emissions) FROM co2_emissions WHERE product_name IN (\u0027Leather Jacket\u0027, \u0027Wool Sweater\u0027) AND country_of_origin \u003d \u0027Germany\u0027;", + "sql_explanation": "The SQL query calculates the average CO2 emissions for products made in Germany by filtering the \u0027co2_emissions\u0027 table for product name and country of origin and then applying the AVG() function." +}, { + "id": "2647", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a view to display bottom 2 labor practice score manufacturing processes", + "sql_context": "CREATE TABLE manufacturing_processes( process_id INT PRIMARY KEY, name VARCHAR(255), location VARCHAR(255), labor_practice_score INT); INSERT INTO manufacturing_processes (process_id, name, location, labor_practice_score) VALUES (1, \u0027Process 1\u0027, \u0027Location 1\u0027, 80), (2, \u0027Process 2\u0027, \u0027Location 2\u0027, 85), (3, \u0027Process 3\u0027, \u0027Location 3\u0027, 90), (4, \u0027Process 4\u0027, \u0027Location 4\u0027, 95), (5, \u0027Process 5\u0027, \u0027Location 5\u0027, 100); CREATE VIEW bottom_labor_practice_processes AS SELECT * FROM manufacturing_processes WHERE labor_practice_score \u003c\u003d 85;", + "sql": "CREATE VIEW bottom_labor_practice_processes AS SELECT * FROM manufacturing_processes WHERE labor_practice_score \u003c\u003d 85;", + "sql_explanation": "This SQL statement creates a new view named \u0027bottom_labor_practice_processes\u0027 that displays only the records from the \u0027manufacturing_processes\u0027 table where the \u0027labor_practice_score\u0027 is 85 or less." +}, { + "id": "2837", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average labor cost for factories in African countries?", + "sql_context": "CREATE TABLE factories (factory_id INT, country VARCHAR(50), labor_cost DECIMAL(10,2)); INSERT INTO factories (factory_id, country, labor_cost) VALUES (1, \u0027Kenya\u0027, 500), (2, \u0027Nigeria\u0027, 450), (3, \u0027South Africa\u0027, 520);", + "sql": "SELECT AVG(factories.labor_cost) FROM factories WHERE factories.country IN (\u0027Kenya\u0027, \u0027Nigeria\u0027, \u0027South Africa\u0027);", + "sql_explanation": "The SQL query calculates the average labor cost for factories in African countries by filtering rows from the factories table where the country column is \u0027Kenya\u0027, \u0027Nigeria\u0027, or \u0027South Africa\u0027. It then calculates the average of the labor_cost column." +}, { + "id": "2869", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum quantity of a vegan product in the cosmetics category?", + "sql_context": "CREATE TABLE products (product_id INT, is_vegan BOOLEAN, category VARCHAR(20), quantity INT); INSERT INTO products (product_id, is_vegan, category, quantity) VALUES (1, true, \u0027Cosmetics\u0027, 10), (2, false, \u0027Food\u0027, 20), (3, true, \u0027Cosmetics\u0027, 30);", + "sql": "SELECT MIN(products.quantity) FROM products WHERE products.is_vegan \u003d true AND products.category \u003d \u0027Cosmetics\u0027;", + "sql_explanation": "This query selects the minimum quantity from the products table where the is_vegan column is true and the category column is \u0027Cosmetics\u0027." +}, { + "id": "3365", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average price of products in the \u0027Electronics\u0027 category that are produced using circular supply chains?", + "sql_context": "CREATE TABLE products (product_id INT, product_name TEXT, category TEXT, is_circular_supply_chain BOOLEAN, price DECIMAL); INSERT INTO products (product_id, product_name, category, is_circular_supply_chain, price) VALUES (1, \u0027Refurbished Smartphone\u0027, \u0027Electronics\u0027, TRUE, 299.99), (2, \u0027Reconditioned Laptop\u0027, \u0027Electronics\u0027, TRUE, 599.99), (3, \u0027New Headphones\u0027, \u0027Electronics\u0027, FALSE, 99.99);", + "sql": "SELECT AVG(price) FROM products WHERE category \u003d \u0027Electronics\u0027 AND is_circular_supply_chain \u003d TRUE;", + "sql_explanation": "This query calculates the average price of products in the \u0027Electronics\u0027 category that are produced using circular supply chains by using the AVG() function and filtering for rows where \u0027category\u0027 is \u0027Electronics\u0027 and \u0027is_circular_supply_chain\u0027 is TRUE." +}, { + "id": "3602", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many \u0027product_transparency\u0027 records have more than 50% recycled content and are from \u0027China\u0027?", + "sql_context": "CREATE TABLE product_transparency (product_id INT, product_name VARCHAR(50), circular_supply_chain BOOLEAN, recycled_content DECIMAL(4,2), COUNTRY VARCHAR(50));", + "sql": "SELECT COUNT(*) FROM product_transparency WHERE recycled_content \u003e 0.5 AND country \u003d \u0027China\u0027;", + "sql_explanation": "The SQL query filters the \u0027product_transparency\u0027 table based on the conditions \u0027recycled_content\u0027 \u003e 0.5 and \u0027country\u0027 \u003d \u0027China\u0027. It then counts the number of records that meet both conditions using the COUNT() function." +}, { + "id": "3627", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new eco-friendly product \u0027Reusable Straws\u0027 into the Inventory table", + "sql_context": "CREATE TABLE Inventory (product_id INT, product_name VARCHAR(50), price DECIMAL(5,2));", + "sql": "INSERT INTO Inventory (product_id, product_name, price) VALUES (4, \u0027Reusable Straws\u0027, 7.99);", + "sql_explanation": "This query inserts a new product \u0027Reusable Straws\u0027 with an ID of 4 and a price of $7.99 into the Inventory table." +}, { + "id": "3657", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new record into \u0027supplier_info\u0027 table for \u0027Supplier B\u0027 and \u0027Brazil\u0027", + "sql_context": "CREATE TABLE supplier_info (supplier_name VARCHAR(50), supplier_country VARCHAR(50));", + "sql": "INSERT INTO supplier_info (supplier_name, supplier_country) VALUES (\u0027Supplier B\u0027, \u0027Brazil\u0027);", + "sql_explanation": "This query inserts a new record into the supplier_info table for the supplier \u0027Supplier B\u0027 and country \u0027Brazil\u0027." +}, { + "id": "4087", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which suppliers are based in India and have sustainable practices?", + "sql_context": "CREATE TABLE suppliers (id INT PRIMARY KEY, name VARCHAR(255), country VARCHAR(255), sustainable_practices BOOLEAN);", + "sql": "SELECT name FROM suppliers WHERE country \u003d \u0027India\u0027 AND sustainable_practices \u003d TRUE;", + "sql_explanation": "This query selects the name column from the suppliers table where the country is India and the sustainable_practices column is True. It retrieves the names of all suppliers based in India that follow sustainable practices." +}, { + "id": "4259", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average revenue of ethical \u0027apparel\u0027 sales?", + "sql_context": "CREATE TABLE sales (sale_id INT, product_id INT, category VARCHAR(20), revenue DECIMAL(5,2), is_ethical BOOLEAN); INSERT INTO sales (sale_id, product_id, category, revenue, is_ethical) VALUES (1, 1, \u0027apparel\u0027, 150.00, true), (2, 2, \u0027apparel\u0027, 120.00, false), (3, 3, \u0027apparel\u0027, 175.00, true);", + "sql": "SELECT AVG(revenue) FROM sales WHERE category \u003d \u0027apparel\u0027 AND is_ethical \u003d true;", + "sql_explanation": "This query calculates the average revenue of ethical \u0027apparel\u0027 sales. It does this by using the AVG function on the \u0027revenue\u0027 column, filtering the rows with a WHERE clause to only consider ethical apparel sales." +}, { + "id": "4363", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of unique product categories for suppliers in India.", + "sql_context": "CREATE TABLE suppliers (supplier_id INT, supplier_name TEXT, country TEXT, product_category TEXT); INSERT INTO suppliers (supplier_id, supplier_name, country, product_category) VALUES (101, \u0027Supplier 1\u0027, \u0027India\u0027, \u0027Category 1\u0027), (102, \u0027Supplier 2\u0027, \u0027India\u0027, \u0027Category 2\u0027), (103, \u0027Supplier 3\u0027, \u0027USA\u0027, \u0027Category 1\u0027), (104, \u0027Supplier 4\u0027, \u0027USA\u0027, \u0027Category 3\u0027);", + "sql": "SELECT COUNT(DISTINCT product_category) FROM suppliers WHERE country \u003d \u0027India\u0027;", + "sql_explanation": "This query counts the number of unique product categories for suppliers in India by filtering the \u0027suppliers\u0027 table using the WHERE clause and using the COUNT and DISTINCT functions to count the unique \u0027product_category\u0027 values." +}, { + "id": "4463", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update product transparency for products with a circular supply chain", + "sql_context": "CREATE TABLE products (product_id INT, product_name VARCHAR(50), sourcing_type VARCHAR(50), transparency_score INT);", + "sql": "UPDATE products SET transparency_score \u003d 10 WHERE sourcing_type \u003d \u0027Circular\u0027;", + "sql_explanation": "This query updates the transparency_score for all products sourced from a circular supply chain." +}, { + "id": "4804", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum price of products that are part of the circular supply chain?", + "sql_context": "CREATE TABLE products (product_id int, name varchar(255), price decimal(5,2), is_circular_supply_chain boolean); INSERT INTO products (product_id, name, price, is_circular_supply_chain) VALUES (1, \u0027Recycled Paper Notebook\u0027, 12.99, true), (2, \u0027Bamboo Toothbrush\u0027, 7.99, true), (3, \u0027Regular Notebook\u0027, 9.99, false);", + "sql": "SELECT MIN(price) FROM products WHERE is_circular_supply_chain \u003d true;", + "sql_explanation": "This query calculates the minimum price of products that are part of the circular supply chain by filtering the products table where is_circular_supply_chain is true and then calculating the minimum price." +}, { + "id": "5205", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average quantity of Fair Trade certified products in the inventory?", + "sql_context": "CREATE TABLE products (product_id int, name varchar(255), quantity int, is_fair_trade boolean); INSERT INTO products (product_id, name, quantity, is_fair_trade) VALUES (1, \u0027Organic Cotton T-Shirt\u0027, 100, true), (2, \u0027Regular Cotton T-Shirt\u0027, 150, false), (3, \u0027Reusable Water Bottle\u0027, 200, false), (4, \u0027Fair Trade Coffee\u0027, 50, true);", + "sql": "SELECT AVG(quantity) FROM products WHERE is_fair_trade \u003d true;", + "sql_explanation": "This query calculates the average quantity of Fair Trade certified products in the inventory by filtering the products table where is_fair_trade is true and then calculating the average quantity." +}, { + "id": "5475", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average price of products manufactured in Italy?", + "sql_context": "CREATE TABLE products (product_id INT, product_name TEXT, price DECIMAL(5,2), country TEXT); INSERT INTO products (product_id, product_name, price, country) VALUES (1, \u0027T-Shirt\u0027, 20.99, \u0027Italy\u0027); INSERT INTO products (product_id, product_name, price, country) VALUES (2, \u0027Jeans\u0027, 50.49, \u0027France\u0027); INSERT INTO products (product_id, product_name, price, country) VALUES (3, \u0027Shoes\u0027, 75.99, \u0027Italy\u0027);", + "sql": "SELECT AVG(price) FROM products WHERE country \u003d \u0027Italy\u0027;", + "sql_explanation": "The SQL query calculates the average price of products by filtering the \u0027products\u0027 table for rows with the \u0027country\u0027 value of \u0027Italy\u0027 and then using the AVG() function to find the average \u0027price\u0027." +}, { + "id": "5662", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of unique vendors in the Vendors table?", + "sql_context": "CREATE TABLE Vendors (vendor_id INT, vendor_name TEXT, vendor_location TEXT); INSERT INTO Vendors (vendor_id, vendor_name, vendor_location) VALUES (301, \u0027Green Farms\u0027, \u0027CA\u0027); INSERT INTO Vendors (vendor_id, vendor_name, vendor_location) VALUES (302, \u0027Eco Goods\u0027, \u0027NY\u0027); INSERT INTO Vendors (vendor_id, vendor_name, vendor_location) VALUES (303, \u0027Fair Trade Imports\u0027, \u0027TX\u0027);", + "sql": "SELECT COUNT(DISTINCT vendor_name) FROM Vendors;", + "sql_explanation": "The SQL query retrieves the number of unique vendors in the Vendors table by selecting distinct values from the vendor_name column and computing the count." +}, { + "id": "1159", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of medals won by athletes from Japan in the Olympics?", + "sql_context": "CREATE TABLE olympics (athlete TEXT, country TEXT, medal TEXT);", + "sql": "SELECT SUM(CASE WHEN medal \u003d \u0027Gold\u0027 THEN 1 WHEN medal \u003d \u0027Silver\u0027 THEN 0.5 WHEN medal \u003d \u0027Bronze\u0027 THEN 0.25 ELSE 0 END) as total_medals FROM olympics WHERE country \u003d \u0027Japan\u0027;", + "sql_explanation": "This query calculates the total number of medals won by athletes from Japan in the Olympics. It does this by using a CASE statement to assign a value of 1, 0.5, or 0.25 to each gold, silver, or bronze medal respectively. It then sums these values for all rows where the country column is \u0027Japan\u0027." +}, { + "id": "1568", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of blocks by the Thunder\u0027s Russell Westbrook in the 2015-2016 season?", + "sql_context": "CREATE TABLE teams (team_name VARCHAR(255), season_start_year INT, season_end_year INT); INSERT INTO teams (team_name, season_start_year, season_end_year) VALUES (\u0027Thunder\u0027, 2015, 2016); CREATE TABLE players (player_name VARCHAR(255), team_name VARCHAR(255), blocks INT);", + "sql": "SELECT SUM(blocks) FROM players WHERE player_name \u003d \u0027Russell Westbrook\u0027 AND team_name \u003d \u0027Thunder\u0027 AND season_start_year \u003d 2015 AND season_end_year \u003d 2016;", + "sql_explanation": "This SQL query calculates the total number of blocks by Russell Westbrook for the Thunder in the 2015-2016 season by selecting the blocks column from the players table and filtering for Russell Westbrook and the specified season range. It then calculates the total using the SUM() function." +}, { + "id": "1584", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of rebounds by the Warriors\u0027 Stephen Curry in the 2019-2020 season?", + "sql_context": "CREATE TABLE teams (team_name VARCHAR(255), season_start_year INT, season_end_year INT); INSERT INTO teams (team_name, season_start_year, season_end_year) VALUES (\u0027Warriors\u0027, 2019, 2020); CREATE TABLE players (player_name VARCHAR(255), team_name VARCHAR(255), rebounds INT);", + "sql": "SELECT SUM(rebounds) FROM players WHERE player_name \u003d \u0027Stephen Curry\u0027 AND team_name \u003d \u0027Warriors\u0027 AND season_start_year \u003d 2019 AND season_end_year \u003d 2020;", + "sql_explanation": "This SQL query calculates the total number of rebounds by Stephen Curry for the Warriors in the 2019-2020 season by selecting the rebounds column from the players table and filtering for Stephen Curry and the specified season range. It then calculates the total using the SUM() function." +}, { + "id": "1721", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add a new soccer match to the \u0027matches\u0027 table with the given details.", + "sql_context": "CREATE TABLE matches (match_id INT, home_team TEXT, away_team TEXT, home_goals INT, away_goals INT, match_date DATE);", + "sql": "INSERT INTO matches (match_id, home_team, away_team, home_goals, away_goals, match_date) VALUES (1, \u0027Barcelona\u0027, \u0027Real Madrid\u0027, 3, 2, \u00272022-10-02\u0027);", + "sql_explanation": "This query inserts a new record into the \u0027matches\u0027 table with the given details. It specifies the match ID, home team, away team, home goals, away goals, and match date." +}, { + "id": "2408", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many basketball matches took place in Los Angeles in the last month?", + "sql_context": "CREATE TABLE if not exists cities (city_id INT, city VARCHAR(255)); INSERT INTO cities (city_id, city) VALUES (1, \u0027Los Angeles\u0027), (2, \u0027New York\u0027), (3, \u0027Chicago\u0027); CREATE TABLE if not exists matches (match_id INT, city_id INT, sport VARCHAR(255), date DATE); INSERT INTO matches (match_id, city_id, sport, date) VALUES (1, 1, \u0027Basketball\u0027, \u00272022-05-01\u0027), (2, 2, \u0027Baseball\u0027, \u00272022-05-05\u0027), (3, 3, \u0027Soccer\u0027, \u00272022-05-03\u0027), (4, 1, \u0027Basketball\u0027, \u00272022-05-15\u0027), (5, 1, \u0027Basketball\u0027, \u00272022-05-25\u0027);", + "sql": "SELECT COUNT(match_id) FROM matches WHERE city_id \u003d 1 AND sport \u003d \u0027Basketball\u0027 AND date \u003e\u003d DATE_SUB(NOW(), INTERVAL 1 MONTH);", + "sql_explanation": "This SQL query filters the matches table to only include basketball matches in Los Angeles (city_id \u003d 1) in the last month (date \u003e\u003d DATE_SUB(NOW(), INTERVAL 1 MONTH)) and then calculates the number of matches by using the COUNT() function." +}, { + "id": "3085", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many sacks did J.J. Watt record in the 2018 NFL season?", + "sql_context": "CREATE TABLE nfl_sacks (player_name VARCHAR(50), team VARCHAR(50), season YEAR, sacks INT); INSERT INTO nfl_sacks (player_name, team, season, sacks) VALUES (\u0027J.J. Watt\u0027, \u0027Houston Texans\u0027, 2018, 16);", + "sql": "SELECT sacks FROM nfl_sacks WHERE player_name \u003d \u0027J.J. Watt\u0027 AND team \u003d \u0027Houston Texans\u0027 AND season \u003d 2018;", + "sql_explanation": "The SQL query filters the data for J.J. Watt\u0027s sack statistics with the Houston Texans in the 2018 NFL season and returns the number of sacks." +}, { + "id": "3599", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 5 goal scorers in the 2022 FIFA World Cup?", + "sql_context": "CREATE TABLE world_cup_goals (id INT, player VARCHAR(100), team VARCHAR(50), goals INT, world_cup BOOLEAN); INSERT INTO world_cup_goals (id, player, team, goals, world_cup) VALUES (1, \u0027Messi\u0027, \u0027Argentina\u0027, 5, true), (2, \u0027Ronaldo\u0027, \u0027Portugal\u0027, 4, true), (3, \u0027Neymar\u0027, \u0027Brazil\u0027, 6, true);", + "sql": "SELECT player, goals FROM world_cup_goals WHERE world_cup \u003d true ORDER BY goals DESC LIMIT 5;", + "sql_explanation": "This query lists the top 5 goal scorers in the 2022 FIFA World Cup. It does so by selecting the player and goals columns, filtering the rows with the world_cup column set to true, ordering the rows by the goals column in descending order, and limiting the result to 5 rows." +}, { + "id": "3718", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the difference between the most and least goals conceded by each team in the 2022-2023 EPL season?", + "sql_context": "CREATE TABLE epl_matches (match_id INT, team_a TEXT, team_b TEXT, goals_scored_a INT, goals_scored_b INT); INSERT INTO epl_matches (match_id, team_a, team_b, goals_scored_a, goals_scored_b) VALUES (1, \u0027Manchester City\u0027, \u0027West Ham United\u0027, 2, 0), (2, \u0027Liverpool\u0027, \u0027Fulham\u0027, 2, 2);", + "sql": "SELECT team_a, team_b, ABS(goals_scored_a - goals_scored_b) AS difference FROM epl_matches;", + "sql_explanation": "Find the difference between the goals conceded by each team by calculating the absolute value of the difference between the goals scored by team A and team B." +}, { + "id": "4272", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert records for a new team, \u0027Seattle Kraken\u0027", + "sql_context": "CREATE TABLE players (player_id INT, name VARCHAR(100), position VARCHAR(50), team_id INT); CREATE TABLE teams (team_id INT, name VARCHAR(100), city VARCHAR(100)); INSERT INTO players (player_id, name, position, team_id) VALUES (1, \u0027John Doe\u0027, \u0027Forward\u0027, 1), (2, \u0027Jane Smith\u0027, \u0027Goalie\u0027, 2); INSERT INTO teams (team_id, name, city) VALUES (1, \u0027Boston Bruins\u0027, \u0027Boston\u0027), (2, \u0027New York Rangers\u0027, \u0027New York\u0027);", + "sql": "INSERT INTO teams (team_id, name, city) VALUES (3, \u0027Seattle Kraken\u0027, \u0027Seattle\u0027);", + "sql_explanation": "This query inserts a new record into the teams table for the Seattle Kraken, assigning them a team_id of 3." +}, { + "id": "4407", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the end_time for all records in \u0027facility_schedule\u0027 table to 1 hour after the start_time", + "sql_context": "CREATE TABLE facility_schedule (id INT, facility_id INT, start_time TIMESTAMP, end_time TIMESTAMP); INSERT INTO facility_schedule (id, facility_id, start_time, end_time) VALUES (1, 1, \u00272022-01-01 09:00:00\u0027, \u00272022-01-01 11:00:00\u0027);", + "sql": "UPDATE facility_schedule SET end_time \u003d DATE_ADD(start_time, INTERVAL 1 HOUR);", + "sql_explanation": "We update the end_time of all records in the facility_schedule table to 1 hour after the start_time. We use the DATE_ADD function to add 1 hour to the start_time." +}, { + "id": "4795", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of tries scored by rugby players in the Six Nations Championship?", + "sql_context": "CREATE TABLE rugby (id INT, player VARCHAR(50), team VARCHAR(50), league VARCHAR(50), tries INT); INSERT INTO rugby (id, player, team, league, tries) VALUES (1, \u0027Jonny May\u0027, \u0027England\u0027, \u0027Six Nations Championship\u0027, 5); INSERT INTO rugby (id, player, team, league, tries) VALUES (2, \u0027Jacob Stockdale\u0027, \u0027Ireland\u0027, \u0027Six Nations Championship\u0027, 4);", + "sql": "SELECT SUM(tries) FROM rugby WHERE league \u003d \u0027Six Nations Championship\u0027;", + "sql_explanation": "This query calculates the total number of tries scored by rugby players in the Six Nations Championship by using the SUM function on the tries column, filtering by the league column with the value \u0027Six Nations Championship\u0027." +}, { + "id": "4905", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the average number of assists per game for Team D in the 2017 season?", + "sql_context": "CREATE TABLE games (id INT, team TEXT, location TEXT, assists INT); INSERT INTO games (id, team, location, assists) VALUES (1, \u0027Team D\u0027, \u0027Home\u0027, 12);", + "sql": "SELECT AVG(assists) FROM games WHERE team \u003d \u0027Team D\u0027 AND year \u003d 2017;", + "sql_explanation": "This query calculates the average number of assists per game for Team D in the 2017 season. It does this by selecting the average value of the assists column for rows where the team is \u0027Team D\u0027 and the year is 2017." +}, { + "id": "4948", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of matches played by cricket team 504?", + "sql_context": "CREATE TABLE matches (match_id INT, team1_id INT, team2_id INT, winner INT, score INT); INSERT INTO matches (match_id, team1_id, team2_id, winner, score) VALUES (1, 504, 505, 505, 200);", + "sql": "SELECT COUNT(*) FROM matches WHERE team1_id \u003d 504 OR team2_id \u003d 504;", + "sql_explanation": "This query counts the number of records in the \u0027matches\u0027 table where the \u0027team1_id\u0027 or \u0027team2_id\u0027 is equal to 504." +}, { + "id": "5005", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the names and salaries of baseball players earning more than $100,000 in the \u0027baseball_players\u0027 table.", + "sql_context": "CREATE TABLE baseball_players (player_id INT, name VARCHAR(50), salary DECIMAL(10, 2)); INSERT INTO baseball_players (player_id, name, salary) VALUES (1, \u0027Jim Brown\u0027, 120000.00); INSERT INTO baseball_players (player_id, name, salary) VALUES (2, \u0027Mike Johnson\u0027, 90000.00);", + "sql": "SELECT name, salary FROM baseball_players WHERE salary \u003e 100000.00;", + "sql_explanation": "This query retrieves the names and salaries of baseball players from the \u0027baseball_players\u0027 table who earn more than $100,000 by using the WHERE clause to filter the results based on the \u0027salary\u0027 column." +}, { + "id": "5137", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum height of soccer players in the \u0027Premier League\u0027?", + "sql_context": "CREATE TABLE players (player_id INT, name VARCHAR(50), position VARCHAR(50), height FLOAT, weight INT, team_id INT, league VARCHAR(50)); INSERT INTO players (player_id, name, position, height, weight, team_id, league) VALUES (3, \u0027Carol\u0027, \u0027Forward\u0027, 1.68, 65, 301, \u0027Premier League\u0027);", + "sql": "SELECT MIN(height) FROM players WHERE league \u003d \u0027Premier League\u0027;", + "sql_explanation": "Filter the records to only include players in the Premier League, then calculate the minimum height of those records." +}, { + "id": "5236", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of goals scored by a hockey player in a single game in the \u0027hockey_games\u0027 table?", + "sql_context": "CREATE TABLE hockey_games (game_id INT, date DATE, home_team_id INT, away_team_id INT, goals_home INT, goals_away INT);", + "sql": "SELECT MAX(goals_home) FROM hockey_games WHERE goals_home \u003e 0;", + "sql_explanation": "This SQL query finds the maximum number of goals scored by a hockey player in a single game by using the MAX function on the \u0027goals_home\u0027 column in the \u0027hockey_games\u0027 table, where the \u0027goals_home\u0027 column has a value greater than 0." +}, { + "id": "5291", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many matches did each soccer team in the English Premier League play in the 2021-2022 season?", + "sql_context": "CREATE TABLE english_premier_league (team_id INT, team_name VARCHAR(50), matches_played INT); INSERT INTO english_premier_league (team_id, team_name, matches_played) VALUES (1, \u0027Manchester City\u0027, 38), (2, \u0027Liverpool\u0027, 38), (3, \u0027Chelsea\u0027, 38);", + "sql": "SELECT team_name, matches_played FROM english_premier_league;", + "sql_explanation": "This SQL query retrieves the team_name and matches_played from the english_premier_league table." +}, { + "id": "5396", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all the hockey players and their respective positions.", + "sql_context": "CREATE TABLE players (id INT, name VARCHAR(50), position VARCHAR(20), sport VARCHAR(20)); INSERT INTO players (id, name, position, sport) VALUES (1, \u0027Sidney Crosby\u0027, \u0027Center\u0027, \u0027Hockey\u0027); INSERT INTO players (id, name, position, sport) VALUES (2, \u0027Alex Ovechkin\u0027, \u0027Left Wing\u0027, \u0027Hockey\u0027);", + "sql": "SELECT name, position FROM players WHERE sport \u003d \u0027Hockey\u0027;", + "sql_explanation": "This SQL query retrieves the names and positions of all hockey players in the \u0027players\u0027 table by using the WHERE clause to filter for the sport \u0027Hockey\u0027." +}, { + "id": "5470", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names of all the teams in the \u0027soccer_teams\u0027 table that have played more than 20 games?", + "sql_context": "CREATE TABLE soccer_teams (team_id INT, team_name VARCHAR(100), num_games INT);", + "sql": "SELECT team_name FROM soccer_teams WHERE num_games \u003e 20;", + "sql_explanation": "This SQL query returns the \u0027team_name\u0027 column for all rows in the \u0027soccer_teams\u0027 table where the \u0027num_games\u0027 column is greater than 20." +}, { + "id": "5482", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the \u0027tenure\u0027 of the coach of the \u0027Patriots\u0027 team to 25 years.", + "sql_context": "CREATE TABLE coaches (id INT PRIMARY KEY, name VARCHAR(50), team VARCHAR(30), tenure INT); INSERT INTO coaches (id, name, team, tenure) VALUES (1, \u0027Bill Belichick\u0027, \u0027Patriots\u0027, 20);", + "sql": "UPDATE coaches SET tenure \u003d 25 WHERE team \u003d \u0027Patriots\u0027;", + "sql_explanation": "This query updates the \u0027tenure\u0027 column in the \u0027coaches\u0027 table to 25 where the \u0027team\u0027 is \u0027Patriots\u0027." +}, { + "id": "5494", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many matches were won by the New York Yankees in the 2022 MLB season?", + "sql_context": "CREATE TABLE teams (team TEXT, wins INT, losses INT); INSERT INTO teams (team, wins, losses) VALUES (\u0027New York Yankees\u0027, 87, 68), (\u0027Houston Astros\u0027, 86, 70), (\u0027Los Angeles Dodgers\u0027, 84, 71), (\u0027Atlanta Braves\u0027, 81, 73), (\u0027Toronto Blue Jays\u0027, 79, 75);", + "sql": "SELECT wins FROM teams WHERE team \u003d \u0027New York Yankees\u0027;", + "sql_explanation": "1. Select the wins column from the teams table where the team is the New York Yankees. This will return the number of matches won by the New York Yankees in the 2022 MLB season." +}, { + "id": "5596", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete the record of the athlete with \u0027athlete_id\u0027 152 from the \u0027baseball_players\u0027 table", + "sql_context": "CREATE TABLE baseball_players (player_id INT, player_name VARCHAR(50), position VARCHAR(50), team VARCHAR(50));", + "sql": "DELETE FROM baseball_players WHERE player_id \u003d 152;", + "sql_explanation": "This SQL query deletes the record from the \u0027baseball_players\u0027 table where the \u0027player_id\u0027 column has a value of 152." +}, { + "id": "5641", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many penalties were given in Match 4?", + "sql_context": "CREATE TABLE Match_4 (penalties INT); INSERT INTO Match_4 (penalties) VALUES (5), (4), (6);", + "sql": "SELECT penalties FROM Match_4 WHERE match_id \u003d 4;", + "sql_explanation": "This query calculates the number of penalties given in Match 4 by selecting the \u0027penalties\u0027 column, filtered by the \u0027match_id\u0027 being 4." +}, { + "id": "5647", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the highest score in a cricket match in the IPL?", + "sql_context": "CREATE TABLE ipl_matches (match_id INT, team1 VARCHAR(50), team2 VARCHAR(50), score1 INT, score2 INT); INSERT INTO ipl_matches (match_id, team1, team2, score1, score2) VALUES (1, \u0027Mumbai Indians\u0027, \u0027Chennai Super Kings\u0027, 200, 150); INSERT INTO ipl_matches (match_id, team1, team2, score1, score2) VALUES (2, \u0027Royal Challengers Bangalore\u0027, \u0027Kolkata Knight Riders\u0027, 220, 210);", + "sql": "SELECT GREATEST(score1, score2) FROM ipl_matches;", + "sql_explanation": "The SQL query returns the highest score in a cricket match in the IPL by selecting the greatest value between the score1 and score2 columns from the ipl_matches table." +}, { + "id": "5746", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of fans attending each rugby match?", + "sql_context": "CREATE TABLE RugbyMatches (match_id INTEGER, home_team TEXT, away_team TEXT, attendance INTEGER); INSERT INTO RugbyMatches (match_id, home_team, away_team, attendance) VALUES (1, \u0027Team A\u0027, \u0027Team B\u0027, 5000), (2, \u0027Team C\u0027, \u0027Team A\u0027, 7000), (3, \u0027Team B\u0027, \u0027Team C\u0027, 6000);", + "sql": "SELECT AVG(attendance) FROM RugbyMatches;", + "sql_explanation": "The SQL query calculates the average number of fans attending each rugby match by summing up the attendance for each match and dividing by the number of matches." +}, { + "id": "5783", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of assists by a player in a single season?", + "sql_context": "CREATE TABLE season_stats (id INT, player TEXT, assists INT, season INT); INSERT INTO season_stats (id, player, assists, season) VALUES (1, \u0027Xavi\u0027, 20, 2010), (2, \u0027Iniesta\u0027, 19, 2011), (3, \u0027Messi\u0027, 18, 2012);", + "sql": "SELECT MAX(assists) FROM season_stats;", + "sql_explanation": "This SQL query calculates the maximum number of assists by a player in a single season by using the MAX() aggregation function on the assists column. The query does not require any filtering or grouping because it is looking for the maximum value in the entire table." +}, { + "id": "5801", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete team 2", + "sql_context": "CREATE TABLE players (player_id INT, name VARCHAR(100), position VARCHAR(50), team_id INT); INSERT INTO players (player_id, name, position, team_id) VALUES (1, \u0027John Doe\u0027, \u0027Forward\u0027, 1), (2, \u0027Jane Smith\u0027, \u0027Goalie\u0027, 2); CREATE TABLE teams (team_id INT, name VARCHAR(100), city VARCHAR(100)); INSERT INTO teams (team_id, name, city) VALUES (1, \u0027Boston Bruins\u0027, \u0027Boston\u0027), (2, \u0027New York Rangers\u0027, \u0027New York\u0027);", + "sql": "DELETE FROM teams WHERE team_id \u003d 2;", + "sql_explanation": "This query deletes the record for team_id 2 from the teams table." +}, { + "id": "5841", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of players in the \u0027players\u0027 table?", + "sql_context": "CREATE TABLE players (player_id INT, name VARCHAR(50), age INT, position VARCHAR(20), team_id INT);", + "sql": "SELECT AVG(age) FROM players;", + "sql_explanation": "This SQL query calculates the average age of all players in the \u0027players\u0027 table by using the AVG function on the \u0027age\u0027 column." +}, { + "id": "889", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many defense diplomacy events took place between India and Pakistan from 2015 to 2019?", + "sql_context": "CREATE TABLE DiplomacyEvents(Country1 NVARCHAR(50), Country2 NVARCHAR(50), EventType VARCHAR(50), Year INT);INSERT INTO DiplomacyEvents(Country1, Country2, EventType, Year) VALUES (\u0027India\u0027, \u0027Pakistan\u0027, \u0027Defense Talks\u0027, 2015), (\u0027Pakistan\u0027, \u0027India\u0027, \u0027Military Exercise\u0027, 2016), (\u0027India\u0027, \u0027Pakistan\u0027, \u0027Joint Military Training\u0027, 2017), (\u0027Pakistan\u0027, \u0027India\u0027, \u0027Defense Talks\u0027, 2018), (\u0027India\u0027, \u0027Pakistan\u0027, \u0027Military Exercise\u0027, 2019);", + "sql": "SELECT COUNT(*) AS Total_Events FROM DiplomacyEvents WHERE (Country1 \u003d \u0027India\u0027 AND Country2 \u003d \u0027Pakistan\u0027) OR (Country1 \u003d \u0027Pakistan\u0027 AND Country2 \u003d \u0027India\u0027) AND Year BETWEEN 2015 AND 2019;", + "sql_explanation": "The query counts the number of defense diplomacy events between India and Pakistan from 2015 to 2019 by using the COUNT(*) function. It selects only the records where the country pair is \u0027India\u0027 and \u0027Pakistan\u0027 or \u0027Pakistan\u0027 and \u0027India\u0027 and the year is between 2015 and 2019." +}, { + "id": "1537", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total cost of humanitarian assistance provided by France and Germany combined in the Middle East since 2010?", + "sql_context": "CREATE TABLE humanitarian_assistance (donor VARCHAR(255), region VARCHAR(255), cost DECIMAL(10, 2), assistance_date DATE);", + "sql": "SELECT SUM(cost) FROM humanitarian_assistance WHERE (donor \u003d \u0027France\u0027 OR donor \u003d \u0027Germany\u0027) AND region \u003d \u0027Middle East\u0027 AND assistance_date \u003e\u003d \u00272010-01-01\u0027;", + "sql_explanation": "This query calculates the total cost of humanitarian assistance provided by France and Germany combined in the Middle East since 2010 by summing the cost of assistance where the donor is either France or Germany, the region is the Middle East, and the assistance date is since 2010." +}, { + "id": "1830", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average budget allocated for military innovation by countries in Europe?", + "sql_context": "CREATE TABLE MilitaryInnovation (id INT, country VARCHAR(50), budget DECIMAL(10,2), year INT); INSERT INTO MilitaryInnovation (id, country, budget, year) VALUES (1, \u0027France\u0027, 5000000, 2020), (2, \u0027Germany\u0027, 6000000, 2020), (3, \u0027Italy\u0027, 4000000, 2020);", + "sql": "SELECT AVG(budget) FROM MilitaryInnovation WHERE country IN (\u0027France\u0027, \u0027Germany\u0027, \u0027Italy\u0027) AND year \u003d 2020 AND category \u003d \u0027military innovation\u0027;", + "sql_explanation": "This SQL query calculates the average budget allocated for military innovation by European countries in 2020. It filters the MilitaryInnovation table based on the country and year, and then calculates the average budget allocated using the AVG() function." +}, { + "id": "1891", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the defense diplomacy events in 2022 with the highest number of participating countries?", + "sql_context": "CREATE TABLE defense_diplomacy (id INT, event VARCHAR(50), country VARCHAR(50), num_participating_countries INT, year INT); INSERT INTO defense_diplomacy (id, event, country, num_participating_countries, year) VALUES (1, \u0027Joint Military Exercise\u0027, \u0027India\u0027, 15, 2022); INSERT INTO defense_diplomacy (id, event, country, num_participating_countries, year) VALUES (2, \u0027Military AttachÊ Visit\u0027, \u0027Japan\u0027, 5, 2022); INSERT INTO defense_diplomacy (id, event, country, num_participating_countries, year) VALUES (3, \u0027Defense Minister Summit\u0027, \u0027Australia\u0027, 10, 2022);", + "sql": "SELECT event, country, num_participating_countries FROM defense_diplomacy WHERE year \u003d 2022 ORDER BY num_participating_countries DESC LIMIT 1;", + "sql_explanation": "This SQL query filters the defense_diplomacy table for the year 2022, orders the results in descending order by num_participating_countries, and returns the top defense diplomacy event with the highest number of participating countries." +}, { + "id": "2165", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average defense diplomacy spending for Western European nations in 2016?", + "sql_context": "CREATE TABLE DefenseDiplomacy (nation VARCHAR(50), year INT, spending FLOAT); INSERT INTO DefenseDiplomacy (nation, year, spending) VALUES (\u0027France\u0027, 2016, 300000000), (\u0027Germany\u0027, 2016, 350000000), (\u0027United Kingdom\u0027, 2016, 400000000), (\u0027Italy\u0027, 2016, 280000000), (\u0027Spain\u0027, 2016, 330000000);", + "sql": "SELECT AVG(spending) FROM DefenseDiplomacy WHERE nation IN (\u0027France\u0027, \u0027Germany\u0027, \u0027United Kingdom\u0027, \u0027Italy\u0027, \u0027Spain\u0027) AND year \u003d 2016;", + "sql_explanation": "Calculate the average defense diplomacy spending for Western European nations in 2016." +}, { + "id": "2200", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a table named \u0027military_equipment\u0027", + "sql_context": "CREATE TABLE military_equipment (equipment_id INT, name VARCHAR(255), type VARCHAR(255), country_of_origin VARCHAR(255), year INT);", + "sql": "CREATE TABLE military_equipment (equipment_id INT, name VARCHAR(255), type VARCHAR(255), country_of_origin VARCHAR(255), year INT);", + "sql_explanation": "This SQL statement creates a new table named \u0027military_equipment\u0027 with 5 columns: equipment_id, name, type, country_of_origin, and year." +}, { + "id": "2217", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of peacekeeping troops contributed by \u0027Brazil\u0027 to all peacekeeping missions in the last 7 years?", + "sql_context": "CREATE TABLE peacekeeping_troops (id INT, country TEXT, mission TEXT, contribution_date DATE, troops INT); INSERT INTO peacekeeping_troops (id, country, mission, contribution_date, troops) VALUES (1, \u0027Brazil\u0027, \u0027Mission 1\u0027, \u00272016-01-01\u0027, 300);", + "sql": "SELECT SUM(troops) FROM peacekeeping_troops WHERE country \u003d \u0027Brazil\u0027 AND contribution_date \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 7 YEAR);", + "sql_explanation": "This query calculates the sum of the troops column from the peacekeeping_troops table where the country is \u0027Brazil\u0027 and the contribution_date is within the last 7 years, by subtracting 7 years from the current date using the DATE_SUB function and comparing it to the contribution_date." +}, { + "id": "2479", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all the countries that have received humanitarian assistance from the Red Cross in the last 3 years?", + "sql_context": "CREATE TABLE Humanitarian (donor VARCHAR(255), recipient VARCHAR(255), assistance VARCHAR(255), quantity INT, assistance_date DATE); INSERT INTO Humanitarian (donor, recipient, assistance, quantity, assistance_date) VALUES (\u0027Red Cross\u0027, \u0027Syria\u0027, \u0027food supplies\u0027, 5000, \u00272020-06-15\u0027);", + "sql": "SELECT DISTINCT recipient FROM Humanitarian WHERE donor \u003d \u0027Red Cross\u0027 AND assistance_date \u003e\u003d DATE(NOW()) - INTERVAL 3 YEAR;", + "sql_explanation": "The SQL query lists all the countries that have received humanitarian assistance by selecting the distinct \u0027recipient\u0027 column where the \u0027donor\u0027 is \u0027Red Cross\u0027 and the \u0027assistance_date\u0027 is within the last 3 years." +}, { + "id": "2603", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of military innovation patents filed by \u0027Canada\u0027 in the last 6 years?", + "sql_context": "CREATE TABLE military_patents (id INT, country TEXT, filing_date DATE); INSERT INTO military_patents (id, country, filing_date) VALUES (1, \u0027Canada\u0027, \u00272016-01-01\u0027);", + "sql": "SELECT COUNT(*) FROM military_patents WHERE country \u003d \u0027Canada\u0027 AND filing_date \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 6 YEAR);", + "sql_explanation": "This query counts the number of records in the military_patents table where the country is \u0027Canada\u0027 and the filing_date is within the last 6 years, by subtracting 6 years from the current date using the DATE_SUB function and comparing it to the filing_date." +}, { + "id": "2628", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the \u0027humanitarian_assistance\u0027 table with the following data: \u0027Operation Pacific Haven\u0027, \u0027Asia\u0027, \u0027completed\u0027", + "sql_context": "CREATE TABLE humanitarian_assistance (id INT PRIMARY KEY, name VARCHAR(255), continent VARCHAR(255), status VARCHAR(255));", + "sql": "INSERT INTO humanitarian_assistance (name, continent, status) VALUES (\u0027Operation Pacific Haven\u0027, \u0027Asia\u0027, \u0027completed\u0027);", + "sql_explanation": "This query inserts a new record into the \u0027humanitarian_assistance\u0027 table with the following data: \u0027Operation Pacific Haven\u0027, \u0027Asia\u0027, \u0027completed\u0027. It uses the INSERT INTO statement, specifying the \u0027humanitarian_assistance\u0027 table and the values for the columns (\u0027name\u0027, \u0027continent\u0027, \u0027status\u0027)." +}, { + "id": "2752", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new records for Foreign Military Aid to \u0027Somalia\u0027 with an Amount of 4000000 for the year 2012 and to \u0027Syria\u0027 with an Amount of 5000000 for the year 2014.", + "sql_context": "CREATE TABLE ForeignMilitaryAid (Year INT, Country VARCHAR(50), Amount DECIMAL(10,2)); INSERT INTO ForeignMilitaryAid (Year, Country, Amount) VALUES (2005, \u0027Afghanistan\u0027, 5000000), (2006, \u0027Iraq\u0027, 7000000), (2010, \u0027Pakistan\u0027, 6000000);", + "sql": "INSERT INTO ForeignMilitaryAid (Year, Country, Amount) VALUES (2012, \u0027Somalia\u0027, 4000000), (2014, \u0027Syria\u0027, 5000000);", + "sql_explanation": "The INSERT statement adds new records to the ForeignMilitaryAid table for Foreign Military Aid to Somalia with an Amount of 4000000 for the year 2012 and to Syria with an Amount of 5000000 for the year 2014." +}, { + "id": "2789", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the minimum budget allocated for military innovation by South American countries in 2019?", + "sql_context": "CREATE TABLE military_innovation (country VARCHAR(50), year INT, budget INT); INSERT INTO military_innovation (country, year, budget) VALUES (\u0027Brazil\u0027, 2019, 5000000), (\u0027Argentina\u0027, 2019, 4000000), (\u0027Colombia\u0027, 2019, 3000000);", + "sql": "SELECT MIN(budget) FROM military_innovation WHERE country IN (\u0027Brazil\u0027, \u0027Argentina\u0027, \u0027Colombia\u0027) AND year \u003d 2019;", + "sql_explanation": "This query calculates the minimum budget allocated for military innovation by South American countries (Brazil, Argentina, and Colombia) in 2019 by using the MIN function." +}, { + "id": "2912", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update military equipment records with equipment_id 101 and 102, setting quantity to 20 and 30 respectively", + "sql_context": "CREATE TABLE military_equipment (equipment_id INT, equipment_type VARCHAR(20), year_acquired INT, quantity INT); INSERT INTO military_equipment (equipment_id, equipment_type, year_acquired, quantity) VALUES (101, \u0027Tank\u0027, 2015, 15), (102, \u0027Aircraft\u0027, 2018, 25), (103, \u0027Helicopter\u0027, 2017, 30);", + "sql": "UPDATE military_equipment SET quantity \u003d CASE equipment_id WHEN 101 THEN 20 WHEN 102 THEN 30 ELSE quantity END;", + "sql_explanation": "This query updates the \"quantity\" field in the \"military_equipment\" table for equipment_id 101 and 102 with the values 20 and 30 respectively." +}, { + "id": "2993", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of humanitarian assistance provided by the United Kingdom to any country in 2019?", + "sql_context": "CREATE TABLE humanitarian_assistance (country VARCHAR(50), year INT, amount INT); INSERT INTO humanitarian_assistance (country, year, amount) VALUES (\u0027Syria\u0027, 2019, 1000000), (\u0027Yemen\u0027, 2019, 1500000), (\u0027Iraq\u0027, 2019, 1200000);", + "sql": "SELECT SUM(amount) FROM humanitarian_assistance WHERE country IN (\u0027Syria\u0027, \u0027Yemen\u0027, \u0027Iraq\u0027) AND year \u003d 2019;", + "sql_explanation": "This query calculates the total amount of humanitarian assistance provided by the United Kingdom to any country (Syria, Yemen, and Iraq) in 2019 by using the SUM function." +}, { + "id": "3130", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average defense diplomacy budget (in USD) for NATO between 2015 and 2020, inclusive?", + "sql_context": "CREATE TABLE defense_diplomacy(id INT, event_name VARCHAR(255), organizer VARCHAR(255), budget INT, event_year INT); INSERT INTO defense_diplomacy(id, event_name, organizer, budget, event_year) VALUES (1, \u0027NATO-Russia Council\u0027, \u0027NATO\u0027, 30000000, 2015), (2, \u0027NATO-Ukraine Commission\u0027, \u0027NATO\u0027, 40000000, 2016), (3, \u0027NATO-Georgia Commission\u0027, \u0027NATO\u0027, 50000000, 2017), (4, \u0027NATO Defense College\u0027, \u0027NATO\u0027, 60000000, 2018), (5, \u0027NATO Parliamentary Assembly\u0027, \u0027NATO\u0027, 70000000, 2019), (6, \u0027NATO Summit\u0027, \u0027NATO\u0027, 80000000, 2020);", + "sql": "SELECT AVG(budget) FROM defense_diplomacy WHERE organizer \u003d \u0027NATO\u0027 AND event_year BETWEEN 2015 AND 2020;", + "sql_explanation": "This query calculates the average defense diplomacy budget (in USD) for NATO between 2015 and 2020, inclusive." +}, { + "id": "3161", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of humanitarian assistance provided by \u0027Canada\u0027 between 2010 and 2015?", + "sql_context": "CREATE TABLE humanitarian_assistance (id INT, country VARCHAR(255), year INT, amount INT); INSERT INTO humanitarian_assistance (id, country, year, amount) VALUES (1, \u0027Canada\u0027, 2010, 1500000);", + "sql": "SELECT SUM(amount) FROM humanitarian_assistance WHERE country \u003d \u0027Canada\u0027 AND year BETWEEN 2010 AND 2015;", + "sql_explanation": "This SQL query calculates the sum of the \u0027amount\u0027 column for all records in the \u0027humanitarian_assistance\u0027 table where the \u0027country\u0027 column is equal to \u0027Canada\u0027 and the \u0027year\u0027 column is between 2010 and 2015." +}, { + "id": "3214", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total budget allocated for defense diplomacy by countries in North America in 2019?", + "sql_context": "CREATE TABLE DefenseDiplomacy (id INT, country VARCHAR(50), budget DECIMAL(10,2), year INT); INSERT INTO DefenseDiplomacy (id, country, budget, year) VALUES (1, \u0027USA\u0027, 10000000, 2019), (2, \u0027Canada\u0027, 5000000, 2019), (3, \u0027Mexico\u0027, 2000000, 2019);", + "sql": "SELECT SUM(budget) FROM DefenseDiplomacy WHERE country IN (\u0027USA\u0027, \u0027Canada\u0027, \u0027Mexico\u0027) AND year \u003d 2019;", + "sql_explanation": "This SQL query calculates the total budget allocated for defense diplomacy by North American countries in 2019. It filters the DefenseDiplomacy table based on the country and year, and then calculates the total budget using the SUM() function." +}, { + "id": "3238", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the humanitarian assistance operations carried out by the United Nations in Asia?", + "sql_context": "CREATE TABLE humanitarian_assistance (id INT, organization VARCHAR(50), location VARCHAR(50)); INSERT INTO humanitarian_assistance (id, organization, location) VALUES (1, \u0027United Nations\u0027, \u0027Afghanistan\u0027), (2, \u0027United Nations\u0027, \u0027Myanmar\u0027), (3, \u0027World Food Programme\u0027, \u0027Pakistan\u0027), (4, \u0027International Committee of the Red Cross\u0027, \u0027Syria\u0027);", + "sql": "SELECT * FROM humanitarian_assistance WHERE organization \u003d \u0027United Nations\u0027 AND location LIKE \u0027%Asia\u0027;", + "sql_explanation": "This query selects all records from the humanitarian_assistance table where the organization is \u0027United Nations\u0027 and the location is in Asia. This returns the humanitarian assistance operations carried out by the United Nations in Asia." +}, { + "id": "3325", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many humanitarian assistance missions were conducted by UN peacekeeping forces in 2019?", + "sql_context": "CREATE TABLE un_peacekeeping (mission_id INT, year INT, mission_type VARCHAR(50), country VARCHAR(50)); INSERT INTO un_peacekeeping (mission_id, year, mission_type, country) VALUES (101, 2019, \u0027peacekeeping\u0027, \u0027South Sudan\u0027), (102, 2019, \u0027humanitarian_assistance\u0027, \u0027Syria\u0027), (103, 2019, \u0027peacekeeping\u0027, \u0027Somalia\u0027), (104, 2019, \u0027humanitarian_assistance\u0027, \u0027Yemen\u0027), (105, 2019, \u0027peacekeeping\u0027, \u0027Mali\u0027);", + "sql": "SELECT COUNT(*) FROM un_peacekeeping WHERE year \u003d 2019 AND mission_type \u003d \u0027humanitarian_assistance\u0027;", + "sql_explanation": "This query determines the number of humanitarian assistance missions conducted by UN peacekeeping forces in 2019 by counting rows in the \u0027un_peacekeeping\u0027 table that have a \u0027year\u0027 of 2019 and a \u0027mission_type\u0027 of \u0027humanitarian_assistance\u0027." +}, { + "id": "3401", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add new record to military_equipment table, including \u0027M1 Abrams\u0027 as equipment_name, \u0027USA\u0027 as origin, \u0027Tank\u0027 as type", + "sql_context": "CREATE TABLE military_equipment (id INT PRIMARY KEY, equipment_name VARCHAR(100), origin VARCHAR(50), type VARCHAR(50));", + "sql": "INSERT INTO military_equipment (equipment_name, origin, type) VALUES (\u0027M1 Abrams\u0027, \u0027USA\u0027, \u0027Tank\u0027);", + "sql_explanation": "* This query adds a new record to the military_equipment table for the \u0027M1 Abrams\u0027 equipment, USA as origin, with type \u0027Tank\u0027." +}, { + "id": "3505", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average budget for peacekeeping operations in Africa since 2015?", + "sql_context": "CREATE TABLE PeacekeepingOperations (id INT PRIMARY KEY, operation VARCHAR(100), location VARCHAR(50), year INT, budget INT); INSERT INTO PeacekeepingOperations (id, operation, location, year, budget) VALUES (1, \u0027MINUSCA\u0027, \u0027Central African Republic\u0027, 2016, 864731532);", + "sql": "SELECT AVG(budget) FROM PeacekeepingOperations WHERE location LIKE \u0027%Africa%\u0027 AND year \u003e\u003d 2015;", + "sql_explanation": "This query calculates the average budget for peacekeeping operations in Africa since 2015. It filters rows by location and year, then calculates the average budget of the remaining rows." +}, { + "id": "3523", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the defense spending trend for South America from 2015 to 2020?", + "sql_context": "CREATE TABLE defense_spending_trend (year INT, region VARCHAR(50), spending NUMERIC(10,2)); INSERT INTO defense_spending_trend (year, region, spending) VALUES (2015, \u0027South America\u0027, 4000000000), (2016, \u0027South America\u0027, 4500000000), (2017, \u0027South America\u0027, 5000000000), (2018, \u0027South America\u0027, 5500000000), (2019, \u0027South America\u0027, 6000000000), (2020, \u0027South America\u0027, 6500000000);", + "sql": "SELECT year, spending FROM defense_spending_trend WHERE region \u003d \u0027South America\u0027 ORDER BY year;", + "sql_explanation": "The SQL query selects the year and spending columns from the defense_spending_trend table, where the region is \u0027South America\u0027, and orders the results by year. This shows the defense spending trend for South America from 2015 to 2020." +}, { + "id": "3581", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the count of peacekeeping operations conducted in the year 2021 by countries in the European Union?", + "sql_context": "CREATE SCHEMA peace_operations;CREATE TABLE eu_operations (operation_name VARCHAR(50), year INT, organization VARCHAR(50));INSERT INTO peace_operations.eu_operations (operation_name, year, organization) VALUES (\u0027France I\u0027, 2021, \u0027EU\u0027), (\u0027Germany II\u0027, 2021, \u0027EU\u0027), (\u0027Italy III\u0027, 2021, \u0027EU\u0027), (\u0027Spain IV\u0027, 2021, \u0027EU\u0027), (\u0027Poland V\u0027, 2021, \u0027EU\u0027);", + "sql": "SELECT COUNT(*) FROM peace_operations.eu_operations WHERE year \u003d 2021 AND organization \u003d \u0027EU\u0027;", + "sql_explanation": "Counts the number of peacekeeping operations conducted in the year 2021 by countries in the European Union." +}, { + "id": "3887", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who were the defense diplomacy partners of India in 2018?", + "sql_context": "CREATE TABLE defense_diplomacy (country VARCHAR(50), year INT, partner VARCHAR(50)); INSERT INTO defense_diplomacy (country, year, partner) VALUES (\u0027India\u0027, 2018, \u0027France\u0027), (\u0027India\u0027, 2018, \u0027Russia\u0027), (\u0027India\u0027, 2018, \u0027USA\u0027), (\u0027India\u0027, 2018, \u0027Israel\u0027), (\u0027India\u0027, 2018, \u0027Japan\u0027), (\u0027India\u0027, 2018, \u0027Australia\u0027);", + "sql": "SELECT DISTINCT partner FROM defense_diplomacy WHERE country \u003d \u0027India\u0027 AND year \u003d 2018;", + "sql_explanation": "This query identifies the defense diplomacy partners of India in 2018 by selecting distinct values from the \u0027partner\u0027 column in the \u0027defense_diplomacy\u0027 table, filtering rows by \u0027country\u0027 and \u0027year\u0027 to only include data from India and 2018." +}, { + "id": "4037", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of military innovation projects per year?", + "sql_context": "CREATE TABLE military_innovation (project_year INT, project_status VARCHAR(255));", + "sql": "SELECT AVG(project_year) FROM military_innovation WHERE project_status \u003d \u0027completed\u0027;", + "sql_explanation": "The SQL query calculates the average number of military innovation projects per year. It filters the records based on the project_status column and then calculates the average of the project_year column using the AVG function." +}, { + "id": "4214", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the \"country\" and \"resource_type\" columns from the \"humanitarian_assistance\" table, showing only records where the \"quantity\" column is greater than 5000", + "sql_context": "CREATE TABLE humanitarian_assistance (id INT, country VARCHAR(50), resource_type VARCHAR(50), quantity INT); INSERT INTO humanitarian_assistance (id, country, resource_type, quantity) VALUES (1, \u0027Syria\u0027, \u0027food\u0027, 15000), (2, \u0027Yemen\u0027, \u0027water\u0027, 800), (3, \u0027Afghanistan\u0027, \u0027medicine\u0027, 500);", + "sql": "SELECT country, resource_type FROM humanitarian_assistance WHERE quantity \u003e 5000;", + "sql_explanation": "This query displays the \"country\" and \"resource_type\" columns from the \"humanitarian_assistance\" table, showing only records where the \"quantity\" column is greater than 5000." +}, { + "id": "4349", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the \u0027status\u0027 column to \u0027completed\u0027 for all records in the \u0027peacekeeping_operations\u0027 table where \u0027name\u0027 is \u0027MONUSCO\u0027", + "sql_context": "CREATE TABLE peacekeeping_operations (id INT PRIMARY KEY, name VARCHAR(255), continent VARCHAR(255), status VARCHAR(255)); INSERT INTO peacekeeping_operations (id, name, continent, status) VALUES (1, \u0027MINUSCA\u0027, \u0027Africa\u0027, \u0027inactive\u0027), (2, \u0027MONUSCO\u0027, \u0027Africa\u0027, \u0027planning\u0027), (3, \u0027UNMISS\u0027, \u0027Africa\u0027, \u0027active\u0027);", + "sql": "UPDATE peacekeeping_operations SET status \u003d \u0027completed\u0027 WHERE name \u003d \u0027MONUSCO\u0027;", + "sql_explanation": "This query updates the \u0027status\u0027 column to \u0027completed\u0027 for all records in the \u0027peacekeeping_operations\u0027 table where the \u0027name\u0027 is \u0027MONUSCO\u0027. It uses the UPDATE statement, specifying the \u0027peacekeeping_operations\u0027 table, the column to update (\u0027status\u0027), and the condition for updating in the WHERE clause." +}, { + "id": "4874", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the \u0027defense_diplomacy\u0027 table and change the partnership of the record with id 1 to \u0027US-Germany\u0027", + "sql_context": "CREATE TABLE defense_diplomacy (id INT PRIMARY KEY, partnership VARCHAR(50), year INT); INSERT INTO defense_diplomacy (id, partnership, year) VALUES (1, \u0027US-UK\u0027, 2005); INSERT INTO defense_diplomacy (id, partnership, year) VALUES (2, \u0027US-France\u0027, 1999);", + "sql": "UPDATE defense_diplomacy SET partnership \u003d \u0027US-Germany\u0027 WHERE id \u003d 1;", + "sql_explanation": "This query updates the \u0027defense_diplomacy\u0027 table and changes the partnership of the record with id 1 to \u0027US-Germany\u0027." +}, { + "id": "4881", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the number of defense diplomacy events in 2018 and 2019?", + "sql_context": "CREATE TABLE defense_diplomacy (id INT, year INT, events INT); INSERT INTO defense_diplomacy (id, year, events) VALUES (1, 2017, 25), (2, 2018, 30), (3, 2019, 35), (4, 2020, 40), (5, 2021, 45);", + "sql": "SELECT SUM(events) FROM defense_diplomacy WHERE year IN (2018, 2019);", + "sql_explanation": "The SQL query adds up the number of defense diplomacy events that took place in the years 2018 and 2019." +}, { + "id": "5149", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update military_personnel table, set status to \u0027retired\u0027 where age is greater than 65", + "sql_context": "CREATE TABLE military_personnel (id INT PRIMARY KEY, name VARCHAR(100), rank VARCHAR(50), age INT, status VARCHAR(50)); INSERT INTO military_personnel (id, name, rank, age, status) VALUES (1, \u0027John Doe\u0027, \u0027Colonel\u0027, 66, \u0027active\u0027);", + "sql": "UPDATE military_personnel SET status \u003d \u0027retired\u0027 WHERE age \u003e 65;", + "sql_explanation": "* This query updates the status to \u0027retired\u0027 in the military_personnel table for personnel who are older than 65." +}, { + "id": "5154", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete diplomacy meeting records with meeting_id 10, 11, and 12", + "sql_context": "CREATE TABLE diplomacy_meetings (meeting_id INT, country_name VARCHAR(50), meeting_date DATE, meeting_outcome VARCHAR(20));", + "sql": "DELETE FROM diplomacy_meetings WHERE meeting_id IN (10, 11, 12);", + "sql_explanation": "This query deletes records from the \"diplomacy_meetings\" table where the \"meeting_id\" is 10, 11, or 12." +}, { + "id": "5201", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List defense diplomacy events with their respective countries and dates in 2021.", + "sql_context": "CREATE TABLE Defense_Diplomacy_Events (Event_ID INT PRIMARY KEY, Country VARCHAR(100), Date DATE);", + "sql": "SELECT * FROM Defense_Diplomacy_Events WHERE Year(Date) \u003d 2021;", + "sql_explanation": "This query lists defense diplomacy events with their respective countries and dates in 2021 by selecting all columns from the Defense_Diplomacy_Events table where the year of the date is 2021." +}, { + "id": "5214", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many peacekeeping operations were conducted in 2014?", + "sql_context": "CREATE TABLE PeacekeepingOperations (Year INT, Operation VARCHAR(50), Country VARCHAR(50)); INSERT INTO PeacekeepingOperations (Year, Operation, Country) VALUES (2014, \u0027Operation 1\u0027, \u0027Country 1\u0027), (2014, \u0027Operation 2\u0027, \u0027Country 2\u0027);", + "sql": "SELECT COUNT(*) FROM PeacekeepingOperations WHERE Year \u003d 2014;", + "sql_explanation": "This query counts all rows in the PeacekeepingOperations table where the year is 2014." +}, { + "id": "5397", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the defense spending by region for each year?", + "sql_context": "CREATE TABLE defense_spending_trend (year INT, region VARCHAR(50), spending NUMERIC(10,2)); INSERT INTO defense_spending_trend (year, region, spending) VALUES (2015, \u0027Africa\u0027, 3000000000), (2015, \u0027Asia\u0027, 8000000000), (2015, \u0027Europe\u0027, 7000000000), (2015, \u0027North America\u0027, 10000000000), (2015, \u0027South America\u0027, 4000000000), (2016, \u0027Africa\u0027, 3500000000), (2016, \u0027Asia\u0027, 8500000000), (2016, \u0027Europe\u0027, 7500000000), (2016, \u0027North America\u0027, 11000000000), (2016, \u0027South America\u0027, 4500000000), (2017, \u0027Africa\u0027, 4000000000), (2017, \u0027Asia\u0027, 9000000000), (2017, \u0027Europe\u0027, 8000000000), (2017, \u0027North America\u0027, 12000000000), (2017, \u0027South America\u0027, 5000000000);", + "sql": "SELECT year, region, spending FROM defense_spending_trend;", + "sql_explanation": "The SQL query selects the year, region, and spending columns from the defense_spending_trend table, which already contains the defense spending by region for each year." +}, { + "id": "5433", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total military innovation funding in 2015?", + "sql_context": "CREATE TABLE MilitaryInnovation (Year INT, Funding FLOAT); INSERT INTO MilitaryInnovation (Year, Funding) VALUES (2015, 12000000);", + "sql": "SELECT Funding FROM MilitaryInnovation WHERE Year \u003d 2015;", + "sql_explanation": "This query selects the funding from the MilitaryInnovation table where the year is 2015." +}, { + "id": "5502", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the Rank of soldier with SoldierID 1 to \u0027Major\u0027.", + "sql_context": "CREATE TABLE Soldiers (SoldierID INT, Name VARCHAR(50), Rank VARCHAR(20), EntryYear INT); INSERT INTO Soldiers (SoldierID, Name, Rank, EntryYear) VALUES (1, \u0027John Doe\u0027, \u0027Captain\u0027, 1995), (2, \u0027Jane Smith\u0027, \u0027Lieutenant\u0027, 2002);", + "sql": "UPDATE Soldiers SET Rank \u003d \u0027Major\u0027 WHERE SoldierID \u003d 1;", + "sql_explanation": "The UPDATE statement modifies the Rank of the soldier with SoldierID 1 to \u0027Major\u0027." +}, { + "id": "5622", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of peacekeeping operations conducted by NATO since its inception?", + "sql_context": "CREATE TABLE nato_peacekeeping_operations (id INT, operation_name VARCHAR(255), start_date DATE); INSERT INTO nato_peacekeeping_operations (id, operation_name, start_date) VALUES (1, \u0027NATO-led Kosovo Force\u0027, \u00271999-06-12\u0027);", + "sql": "SELECT COUNT(*) FROM nato_peacekeeping_operations;", + "sql_explanation": "This SQL query counts the number of records in the nato_peacekeeping_operations table." +}, { + "id": "5665", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records from the \u0027defense_diplomacy\u0027 table where the year is less than 2000", + "sql_context": "CREATE TABLE defense_diplomacy (id INT PRIMARY KEY, partnership VARCHAR(50), year INT); INSERT INTO defense_diplomacy (id, partnership, year) VALUES (1, \u0027US-UK\u0027, 2005); INSERT INTO defense_diplomacy (id, partnership, year) VALUES (2, \u0027US-France\u0027, 1999);", + "sql": "DELETE FROM defense_diplomacy WHERE year \u003c 2000;", + "sql_explanation": "This query deletes all records from the \u0027defense_diplomacy\u0027 table where the year is less than 2000." +}, { + "id": "527", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total quantity of organic cotton clothing items manufactured globally in Q3 2021?", + "sql_context": "CREATE TABLE manufacturing (item_code VARCHAR(20), item_name VARCHAR(50), category VARCHAR(20), country VARCHAR(50), quantity INT, is_organic BOOLEAN, manufacturing_date DATE);", + "sql": "SELECT SUM(quantity) as total_quantity FROM manufacturing WHERE category \u003d \u0027clothing\u0027 AND is_organic \u003d TRUE AND manufacturing_date BETWEEN \u00272021-07-01\u0027 AND \u00272021-09-30\u0027 AND country NOT IN (\u0027United States\u0027, \u0027Canada\u0027);", + "sql_explanation": "This SQL query calculates the total quantity of organic cotton clothing items manufactured globally in Q3 2021, excluding the United States and Canada. It filters the manufacturing table by category, is_organic, manufacturing date, and country, and then calculates the sum of the quantity for the specified time period (Q3 2021) where is_organic is true." +}, { + "id": "2333", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the total quantity of sustainable fabric types used in the last two years.", + "sql_context": "CREATE TABLE Fabrics_History (id INT PRIMARY KEY, type VARCHAR(20), year INT, quantity INT); INSERT INTO Fabrics_History (id, type, year, quantity) VALUES (1, \u0027Organic_Cotton\u0027, 2021, 6000), (2, \u0027Recycled_Polyester\u0027, 2020, 7000), (3, \u0027Organic_Cotton\u0027, 2020, 4000);", + "sql": "SELECT SUM(quantity) FROM Fabrics_History WHERE type IN (\u0027Organic_Cotton\u0027, \u0027Recycled_Polyester\u0027) AND year BETWEEN 2020 AND 2021;", + "sql_explanation": "The SQL query calculates the total quantity of sustainable fabric types used in the last two years by filtering the Fabrics_History table using the WHERE clause and then applying the SUM function." +}, { + "id": "2486", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average retail price of women\u0027s organic cotton t-shirts sold in Canada?", + "sql_context": "CREATE TABLE garment_sales (id INT, garment_type VARCHAR(50), material VARCHAR(50), country VARCHAR(50), price DECIMAL(5,2)); INSERT INTO garment_sales (id, garment_type, material, country, price) VALUES (1, \u0027t-shirt\u0027, \u0027organic cotton\u0027, \u0027Canada\u0027, 19.99), (2, \u0027t-shirt\u0027, \u0027conventional cotton\u0027, \u0027Canada\u0027, 15.99);", + "sql": "SELECT AVG(price) FROM garment_sales WHERE garment_type \u003d \u0027t-shirt\u0027 AND material \u003d \u0027organic cotton\u0027 AND country \u003d \u0027Canada\u0027;", + "sql_explanation": "This SQL query calculates the average retail price of women\u0027s organic cotton t-shirts by filtering the garment_sales table based on the garment_type, material, and country columns, and then calculating the average price using the AVG function." +}, { + "id": "2568", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many garments of the \u0027Jeans\u0027 category were sold in the last six months of 2021?", + "sql_context": "CREATE TABLE sales_category_2 (sale_id INT, sale_date DATE, category VARCHAR(20), quantity INT); INSERT INTO sales_category_2 (sale_id, sale_date, category, quantity) VALUES (1, \u00272021-07-05\u0027, \u0027Jeans\u0027, 8), (2, \u00272021-08-10\u0027, \u0027Tops\u0027, 18), (3, \u00272021-09-20\u0027, \u0027Jeans\u0027, 12), (4, \u00272021-10-15\u0027, \u0027Jackets\u0027, 5), (5, \u00272021-11-25\u0027, \u0027Jeans\u0027, 15), (6, \u00272021-12-05\u0027, \u0027Tops\u0027, 14);", + "sql": "SELECT SUM(quantity) FROM sales_category_2 WHERE category \u003d \u0027Jeans\u0027 AND sale_date BETWEEN \u00272021-07-01\u0027 AND \u00272021-12-31\u0027;", + "sql_explanation": "The SQL query calculates the number of garments in the \u0027Jeans\u0027 category sold in the last six months of 2021 by summing up the quantity column where the category is Jeans and the sale_date is between 2021-07-01 and 2021-12-31." +}, { + "id": "2688", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records of unsustainable accessories sold in Spain.", + "sql_context": "CREATE TABLE garment_sales (id INT, garment_type VARCHAR(50), sustainability_rating INT, country VARCHAR(50)); INSERT INTO garment_sales (id, garment_type, sustainability_rating, country) VALUES (1, \u0027hat\u0027, 2, \u0027Spain\u0027), (2, \u0027scarf\u0027, 3, \u0027Spain\u0027);", + "sql": "DELETE FROM garment_sales WHERE garment_type IN (\u0027hat\u0027, \u0027scarf\u0027) AND country \u003d \u0027Spain\u0027 AND sustainability_rating \u003c 4;", + "sql_explanation": "This SQL query deletes all records of unsustainable accessories sold in Spain by filtering the garment_sales table based on the garment_type, country, and sustainability_rating columns and then using the DELETE statement to remove the rows that meet the criteria." +}, { + "id": "2744", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the \u0027products\u0027 table with the following data: \u0027Product 100\u0027, \u0027Ethical Jeans\u0027, \u0027Eco-Friendly\u0027, 65.99", + "sql_context": "CREATE TABLE products (product_id INT, product_name VARCHAR(50), brand VARCHAR(20), price DECIMAL(5,2));", + "sql": "INSERT INTO products (product_id, product_name, brand, price) VALUES (100, \u0027Ethical Jeans\u0027, \u0027Eco-Friendly\u0027, 65.99);", + "sql_explanation": "A new record is inserted into the products table using the INSERT INTO statement with the specified values." +}, { + "id": "3285", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify suppliers who provide sustainable fabrics but are not located in Europe.", + "sql_context": "CREATE TABLE Suppliers (supplier_id INT, supplier_name VARCHAR(255), country VARCHAR(255), industry VARCHAR(255)); INSERT INTO Suppliers (supplier_id, supplier_name, country, industry) VALUES (1, \u0027ABC Inc.\u0027, \u0027USA\u0027, \u0027Textile\u0027), (2, \u0027XYZ Ltd.\u0027, \u0027Brazil\u0027, \u0027Garment\u0027), (3, \u0027LMN Corp.\u0027, \u0027China\u0027, \u0027Accessories\u0027), (4, \u0027 DEF GmbH\u0027, \u0027Germany\u0027, \u0027Sustainable Fabrics\u0027), (5, \u0027GHI Enterprises\u0027, \u0027Nigeria\u0027, \u0027Garment\u0027), (6, \u0027JKL Sustainability\u0027, \u0027France\u0027, \u0027Sustainable Fabrics\u0027);", + "sql": "SELECT s.* FROM Suppliers s WHERE s.industry \u003d \u0027Sustainable Fabrics\u0027 AND s.country NOT IN (\u0027Europe\u0027);", + "sql_explanation": "This query identifies suppliers who provide sustainable fabrics but are not located in Europe. It does so by selecting all columns from the Suppliers table where the industry is \u0027Sustainable Fabrics\u0027 and the country is not in Europe." +}, { + "id": "3302", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Count the number of garment manufacturers that use recycled materials in the United States.", + "sql_context": "CREATE TABLE garment_manufacturers (id INT, name VARCHAR(100), country VARCHAR(50), uses_recycled_materials BOOLEAN); INSERT INTO garment_manufacturers (id, name, country, uses_recycled_materials) VALUES (1, \u0027Manufacturer A\u0027, \u0027USA\u0027, true), (2, \u0027Manufacturer B\u0027, \u0027USA\u0027, false);", + "sql": "SELECT COUNT(*) FROM garment_manufacturers WHERE country \u003d \u0027USA\u0027 AND uses_recycled_materials \u003d true;", + "sql_explanation": "This SQL query counts the number of garment manufacturers in the United States that use recycled materials by filtering the garment_manufacturers table based on the country and uses_recycled_materials columns and then using the COUNT function to count the number of rows that meet the criteria." +}, { + "id": "3399", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which garment categories have the highest CO2 emissions?", + "sql_context": "CREATE TABLE garment_emissions (garment_category VARCHAR(50), co2_emissions DECIMAL(10,2));", + "sql": "SELECT garment_category, co2_emissions FROM garment_emissions ORDER BY co2_emissions DESC LIMIT 5;", + "sql_explanation": "The SQL query retrieves the garment categories with the highest CO2 emissions by sorting the records by co2_emissions in descending order and limiting the results to the top 5 records." +}, { + "id": "3600", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records from the customer_sales table for customers who have not made a purchase in the last 12 months.", + "sql_context": "CREATE TABLE customer_sales (id INT, customer_name VARCHAR(255), region VARCHAR(255), quantity INT, last_purchase_date DATE); INSERT INTO customer_sales (id, customer_name, region, quantity, last_purchase_date) VALUES (1, \u0027John Smith\u0027, \u0027Australia\u0027, 500, \u00272021-08-01\u0027), (2, \u0027Jane Doe\u0027, \u0027Australia\u0027, 600, \u00272022-03-01\u0027), (3, \u0027Bob Johnson\u0027, \u0027Australia\u0027, 700, \u00272021-12-01\u0027);", + "sql": "DELETE FROM customer_sales WHERE last_purchase_date \u003c DATEADD(month, -12, CURRENT_TIMESTAMP);", + "sql_explanation": "The SQL query deletes records from the customer_sales table for customers who have not made a purchase in the last 12 months using the DELETE statement and a WHERE clause with a date comparison." +}, { + "id": "3629", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total revenue for eco-friendly garments in India in Q2 2021?", + "sql_context": "CREATE TABLE india_eco_friendly_garments (garment_type VARCHAR(255), sales_amount DECIMAL(10,2), quarter INT, year INT); INSERT INTO india_eco_friendly_garments (garment_type, sales_amount, quarter, year) VALUES (\u0027Shirt\u0027, 700.00, 2, 2021), (\u0027Pants\u0027, 800.00, 2, 2021);", + "sql": "SELECT SUM(sales_amount) FROM india_eco_friendly_garments WHERE quarter \u003d 2 AND year \u003d 2021;", + "sql_explanation": "This query calculates the total revenue for eco-friendly garments in India during Q2 2021 by summing up the sales_amount values that meet the specified conditions." +}, { + "id": "3670", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the average sales amount for eco-friendly garments in Italy in Q2 2021?", + "sql_context": "CREATE TABLE italy_eco_friendly_garments (garment_type VARCHAR(255), sales_amount DECIMAL(10,2), quarter INT, year INT); INSERT INTO italy_eco_friendly_garments (garment_type, sales_amount, quarter, year) VALUES (\u0027T-Shirt\u0027, 2500.00, 2, 2021), (\u0027Hoodie\u0027, 3000.00, 2, 2021);", + "sql": "SELECT AVG(sales_amount) FROM italy_eco_friendly_garments WHERE quarter \u003d 2 AND year \u003d 2021;", + "sql_explanation": "This query calculates the average sales amount for eco-friendly garments in Italy during Q2 2021 by averaging the sales_amount values that meet the specified conditions." +}, { + "id": "3784", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the \u0027Revenue\u0027 for \u0027Pants\u0027 sold in \u0027United Kingdom\u0027.", + "sql_context": "CREATE TABLE pants_revenue(product VARCHAR(20), location VARCHAR(20), revenue INT); INSERT INTO pants_revenue VALUES(\u0027Pants\u0027, \u0027United Kingdom\u0027, 7000);", + "sql": "SELECT revenue FROM pants_revenue WHERE product \u003d \u0027Pants\u0027 AND location \u003d \u0027United Kingdom\u0027;", + "sql_explanation": "Select the \u0027Revenue\u0027 for \u0027Pants\u0027 sold in \u0027United Kingdom\u0027 directly from the pants_revenue table." +}, { + "id": "4507", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the sustainability_metrics table to reflect the latest CO2 emissions data for garment production in Asia.", + "sql_context": "CREATE TABLE sustainability_metrics (id INT, region VARCHAR(255), co2_emissions INT); INSERT INTO sustainability_metrics (id, region, co2_emissions) VALUES (1, \u0027South America\u0027, 130), (2, \u0027Europe\u0027, 100), (3, \u0027Asia\u0027, 150);", + "sql": "UPDATE sustainability_metrics SET co2_emissions \u003d 160 WHERE region \u003d \u0027Asia\u0027;", + "sql_explanation": "The SQL query updates the co2_emissions column value for the \u0027Asia\u0027 region in the sustainability_metrics table to 160 using the UPDATE statement." +}, { + "id": "4695", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all garments in the \"Spring 2023\" collection that are made of silk or cotton.", + "sql_context": "CREATE TABLE Spring2023 (garment_id INT, garment_name VARCHAR(50), material VARCHAR(50)); INSERT INTO Spring2023 (garment_id, garment_name, material) VALUES (1, \u0027Linen Blend Dress\u0027, \u0027Linen-Hemp Blend\u0027), (2, \u0027Silk Top\u0027, \u0027Silk\u0027), (3, \u0027Recycled Polyester Skirt\u0027, \u0027Recycled Polyester\u0027), (4, \u0027Cotton Shirt\u0027, \u0027Cotton\u0027);", + "sql": "SELECT garment_name FROM Spring2023 WHERE material IN (\u0027Silk\u0027, \u0027Cotton\u0027);", + "sql_explanation": "This query retrieves the names of garments in the \"Spring 2023\" collection that are made of silk or cotton by filtering the material column for the values \u0027Silk\u0027 and \u0027Cotton\u0027." +}, { + "id": "4962", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all garments and their corresponding categories, with prices greater than 30.00, from the garments table", + "sql_context": "CREATE TABLE garments (id INT, name VARCHAR(100), price DECIMAL(5,2), category VARCHAR(50));", + "sql": "SELECT id, name, category, price FROM garments WHERE price \u003e 30.00;", + "sql_explanation": "This query lists all garments and their corresponding categories, with prices greater than 30.00, from the garments table using the SELECT statement and WHERE clause to filter the results based on price." +}, { + "id": "4971", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many different \u0027Colors\u0027 are available for \u0027Trousers\u0027?", + "sql_context": "CREATE TABLE Trousers (color VARCHAR(20)); INSERT INTO Trousers VALUES (\u0027Red\u0027), (\u0027Blue\u0027), (\u0027Green\u0027);", + "sql": "SELECT COUNT(DISTINCT color) FROM Trousers WHERE item \u003d \u0027Trousers\u0027;", + "sql_explanation": "This SQL query calculates the number of different \u0027Colors\u0027 available for \u0027Trousers\u0027 by counting the distinct \u0027color\u0027 column values where the \u0027item\u0027 is \u0027Trousers\u0027." +}, { + "id": "5306", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all garments in the \"Spring 2023\" collection that use sustainable materials.", + "sql_context": "CREATE TABLE Spring2023 (garment_id INT, garment_name VARCHAR(50), material VARCHAR(50), sustainable BOOLEAN); INSERT INTO Spring2023 (garment_id, garment_name, material, sustainable) VALUES (1, \u0027Linen Blend Dress\u0027, \u0027Linen-Hemp Blend\u0027, true), (2, \u0027Silk Top\u0027, \u0027Silk\u0027, false), (3, \u0027Recycled Polyester Skirt\u0027, \u0027Recycled Polyester\u0027, true);", + "sql": "SELECT garment_name FROM Spring2023 WHERE sustainable \u003d true;", + "sql_explanation": "This query retrieves the names of garments in the \"Spring 2023\" collection that use sustainable materials by filtering the sustainable column for true values." +}, { + "id": "5395", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the price of all garments in the \u0027Tops\u0027 category to $25.00", + "sql_context": "CREATE TABLE Garments (id INT, name VARCHAR(255), category VARCHAR(255), color VARCHAR(255), size VARCHAR(10), price DECIMAL(5, 2));", + "sql": "UPDATE Garments SET price \u003d 25.00 WHERE category \u003d \u0027Tops\u0027;", + "sql_explanation": "This SQL query updates the price of all garments in the \u0027Tops\u0027 category to $25.00. It uses the SET clause to modify the price column and the WHERE clause to filter for records in the \u0027Tops\u0027 category." +}, { + "id": "5418", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the color of all garments in the \u0027Tops\u0027 category to \u0027Red\u0027", + "sql_context": "CREATE TABLE Garments (id INT, name VARCHAR(255), category VARCHAR(255), color VARCHAR(255), size VARCHAR(10), price DECIMAL(5, 2));", + "sql": "UPDATE Garments SET color \u003d \u0027Red\u0027 WHERE category \u003d \u0027Tops\u0027;", + "sql_explanation": "This SQL query updates the color of all garments in the \u0027Tops\u0027 category to \u0027Red\u0027. It uses the SET clause to modify the color column and the WHERE clause to filter for records in the \u0027Tops\u0027 category." +}, { + "id": "5458", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "minimum retail price of garments in the \u0027Tops\u0027 category", + "sql_context": "CREATE TABLE GarmentCategories (category VARCHAR(25)); INSERT INTO GarmentCategories (category) VALUES (\u0027Tops\u0027), (\u0027Bottoms\u0027), (\u0027Dresses\u0027); CREATE TABLE Garments (garment_id INT, price DECIMAL(5,2), category VARCHAR(25)); INSERT INTO Garments (garment_id, price, category) VALUES (1, 50.00, \u0027Tops\u0027), (2, 75.00, \u0027Tops\u0027), (3, 30.00, \u0027Bottoms\u0027);", + "sql": "SELECT MIN(price) FROM Garments WHERE category \u003d \u0027Tops\u0027;", + "sql_explanation": "This query calculates the minimum retail price of garments in the \u0027Tops\u0027 category. It does this by filtering the Garments table to only include rows where the category is \u0027Tops\u0027, and then calculating the minimum price." +}, { + "id": "1242", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the percentage of microloans in the \u0027microfinance\u0027 schema\u0027s \u0027loans\u0027 table that are disbursed to female borrowers.", + "sql_context": "CREATE TABLE microfinance.loans (loan_id INT, loan_type VARCHAR(20), borrower_gender VARCHAR(10)); INSERT INTO microfinance.loans (loan_id, loan_type, borrower_gender) VALUES (1, \u0027microloan\u0027, \u0027female\u0027), (2, \u0027small_business\u0027, \u0027male\u0027), (3, \u0027microloan\u0027, \u0027male\u0027), (4, \u0027microloan\u0027, \u0027female\u0027), (5, \u0027small_business\u0027, \u0027female\u0027);", + "sql": "SELECT 100.0 * COUNT(*) FILTER (WHERE loan_type \u003d \u0027microloan\u0027 AND borrower_gender \u003d \u0027female\u0027) / COUNT(*) FILTER (WHERE loan_type \u003d \u0027microloan\u0027) FROM microfinance.loans;", + "sql_explanation": "Calculate the percentage of microloans disbursed to female borrowers by dividing the count of microloans issued to women by the total count of microloans." +}, { + "id": "1621", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total loan amount disbursed by socially responsible lending organizations in the United States?", + "sql_context": "CREATE TABLE tlad_orgs (org_name TEXT, loan_amount NUMERIC); INSERT INTO tlad_orgs (org_name, loan_amount) VALUES (\u0027Socially Responsible USA\u0027, 2000000), (\u0027Lending with Integrity\u0027, 2500000), (\u0027Fair Lending Inc\u0027, 1800000);", + "sql": "SELECT SUM(loan_amount) FROM tlad_orgs WHERE org_name IN (\u0027Socially Responsible USA\u0027, \u0027Lending with Integrity\u0027, \u0027Fair Lending Inc\u0027) AND country \u003d \u0027USA\u0027;", + "sql_explanation": "The SQL query calculates the total loan amount disbursed by socially responsible lending organizations in the United States by summing the loan_amount column where the org_name is in the list of socially responsible lending organizations in the United States." +}, { + "id": "2182", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of individuals in Europe who have received financial capability training in the last 12 months?", + "sql_context": "CREATE TABLE financial_capability (individual_id TEXT, training_date DATE, country TEXT); INSERT INTO financial_capability (individual_id, training_date, country) VALUES (\u002711111\u0027, \u00272022-01-01\u0027, \u0027Germany\u0027); INSERT INTO financial_capability (individual_id, training_date, country) VALUES (\u002722222\u0027, \u00272022-02-01\u0027, \u0027France\u0027);", + "sql": "SELECT COUNT(individual_id) FROM financial_capability WHERE training_date \u003e\u003d DATEADD(year, -1, CURRENT_DATE) AND country \u003d \u0027Europe\u0027;", + "sql_explanation": "This query calculates the number of individuals in Europe who have received financial capability training in the last 12 months. It uses the COUNT() function to count the number of individuals who have received training, and the WHERE clause to filter for individuals who have received training in the last 12 months and are in Europe." +}, { + "id": "2492", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average socially responsible lending loan amount for microfinance institutions in Southeast Asia?", + "sql_context": "CREATE TABLE socially_responsible_lending(id INT, loan_number INT, institution_region VARCHAR(50), amount INT); INSERT INTO socially_responsible_lending VALUES (1, 701, \u0027Southeast Asia\u0027, 5000); INSERT INTO socially_responsible_lending VALUES (2, 702, \u0027South Asia\u0027, 7000); INSERT INTO socially_responsible_lending VALUES (3, 703, \u0027East Asia\u0027, 9000); INSERT INTO socially_responsible_lending VALUES (4, 704, \u0027Southeast Asia\u0027, 6000);", + "sql": "SELECT AVG(amount) FROM socially_responsible_lending WHERE institution_region \u003d \u0027Southeast Asia\u0027 AND type \u003d \u0027microfinance\u0027;", + "sql_explanation": "This query calculates the average socially responsible lending loan amount for microfinance institutions in Southeast Asia by selecting the AVG function on the amount column, filtering by institution_region \u003d \u0027Southeast Asia\u0027 and type \u003d \u0027microfinance\u0027." +}, { + "id": "2497", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total amount of interest earned from Shariah-compliant financing in Q1 2022.", + "sql_context": "CREATE TABLE shariah_financing (transaction_id INT, client_id INT, transaction_date DATE, interest_rate DECIMAL(10,2), principal DECIMAL(10,2)); INSERT INTO shariah_financing (transaction_id, client_id, transaction_date, interest_rate, principal) VALUES (1, 201, \u00272022-01-05\u0027, 0.02, 1000.00), (2, 202, \u00272022-02-15\u0027, 0.03, 2000.00), (3, 203, \u00272022-03-30\u0027, 0.01, 500.00);", + "sql": "SELECT SUM(principal * interest_rate) FROM shariah_financing WHERE transaction_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-03-31\u0027;", + "sql_explanation": "This SQL query calculates the total amount of interest earned from Shariah-compliant financing in Q1 2022 by summing the product of the principal and interest_rate columns, filtered by a WHERE clause that uses the BETWEEN operator to select transactions within the specified date range." +}, { + "id": "2637", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of loans issued for Shariah-compliant microfinance in India?", + "sql_context": "CREATE TABLE shariah_microfinance (id INT, loan_type VARCHAR(255), amount DECIMAL(10,2), country VARCHAR(255));", + "sql": "SELECT SUM(amount) FROM shariah_microfinance WHERE loan_type \u003d \u0027Shariah-compliant microfinance\u0027 AND country \u003d \u0027India\u0027;", + "sql_explanation": "This query calculates the total amount of loans issued for Shariah-compliant microfinance in India by using the SUM function on the amount column, filtering for rows where the loan type is \u0027Shariah-compliant microfinance\u0027 and the country is \u0027India\u0027." +}, { + "id": "2717", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total financial assets of Shariah-compliant institutions in Malaysia and Indonesia?", + "sql_context": "CREATE TABLE if not exists financial_assets (id INT, institution_name VARCHAR(100), country VARCHAR(50), is_shariah_compliant BOOLEAN, assets DECIMAL(15,2));", + "sql": "SELECT SUM(assets) FROM financial_assets WHERE country IN (\u0027Malaysia\u0027, \u0027Indonesia\u0027) AND is_shariah_compliant \u003d TRUE;", + "sql_explanation": "This SQL query calculates the total financial assets of Shariah-compliant institutions in Malaysia and Indonesia. It does so by selecting the sum (SUM) of the \u0027assets\u0027 column from the \u0027financial_assets\u0027 table where the \u0027country\u0027 is either \u0027Malaysia\u0027 or \u0027Indonesia\u0027 and \u0027is_shariah_compliant\u0027 is true (representing Shariah-compliant institutions)." +}, { + "id": "2917", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of loans issued for financial capability programs in Germany?", + "sql_context": "CREATE TABLE financial_capability (id INT, loan_type VARCHAR(255), amount DECIMAL(10,2), country VARCHAR(255));", + "sql": "SELECT SUM(amount) FROM financial_capability WHERE loan_type \u003d \u0027financial capability\u0027 AND country \u003d \u0027Germany\u0027;", + "sql_explanation": "This query calculates the total amount of loans issued for financial capability programs in Germany by using the SUM function on the amount column, filtering for rows where the loan type is \u0027financial capability\u0027 and the country is \u0027Germany\u0027." +}, { + "id": "3106", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total assets of the top 3 Shariah-compliant banks?", + "sql_context": "CREATE TABLE ShariahBanks (id INT, bank_name VARCHAR(50), country VARCHAR(50), total_assets FLOAT); INSERT INTO ShariahBanks (id, bank_name, country, total_assets) VALUES (1, \u0027ABC Islamic Bank\u0027, \u0027Malaysia\u0027, 5000000), (2, \u0027XYZ Islamic Bank\u0027, \u0027Malaysia\u0027, 6000000), (3, \u0027Islamic Bank of Saudi Arabia\u0027, \u0027Saudi Arabia\u0027, 12000000), (4, \u0027Al Rajhi Bank\u0027, \u0027Saudi Arabia\u0027, 15000000), (5, \u0027Bank Islam Brunei Darussalam\u0027, \u0027Brunei\u0027, 2000000);", + "sql": "SELECT bank_name, SUM(total_assets) as total_assets FROM ShariahBanks ORDER BY total_assets DESC LIMIT 3;", + "sql_explanation": "This SQL query calculates the total assets of the top 3 Shariah-compliant banks by ordering the records based on the \u0027total_assets\u0027 column in descending order and selecting the top 3 records. The query calculates the sum of \u0027total_assets\u0027 for each of the top 3 records." +}, { + "id": "3118", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total assets of the top 2 Shariah-compliant banks?", + "sql_context": "CREATE TABLE ShariahBanks (id INT, bank_name VARCHAR(50), country VARCHAR(50), total_assets FLOAT); INSERT INTO ShariahBanks (id, bank_name, country, total_assets) VALUES (1, \u0027Al Iman Bank\u0027, \u0027Bahrain\u0027, 7000000), (2, \u0027Dubai Islamic Bank\u0027, \u0027United Arab Emirates\u0027, 9000000), (3, \u0027Maybank Islamic\u0027, \u0027Malaysia\u0027, 11000000), (4, \u0027Bank Islam\u0027, \u0027Brunei\u0027, 13000000), (5, \u0027CIMB Islamic\u0027, \u0027Malaysia\u0027, 15000000);", + "sql": "SELECT bank_name, SUM(total_assets) as total_assets FROM ShariahBanks ORDER BY total_assets DESC LIMIT 2;", + "sql_explanation": "This SQL query calculates the total assets of the top 2 Shariah-compliant banks by ordering the records based on the \u0027total_assets\u0027 column in descending order and selecting the top 2 records. The query calculates the sum of \u0027total_assets\u0027 for each of the top 2 records." +}, { + "id": "3345", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average financial capability score for women in South Africa and Brazil.", + "sql_context": "CREATE TABLE fc_scores (name TEXT, gender TEXT, country TEXT, score NUMERIC); INSERT INTO fc_scores (name, gender, country, score) VALUES (\u0027Jane Doe\u0027, \u0027Female\u0027, \u0027South Africa\u0027, 75), (\u0027Jane Smith\u0027, \u0027Female\u0027, \u0027Brazil\u0027, 80), (\u0027John Doe\u0027, \u0027Male\u0027, \u0027South Africa\u0027, 70);", + "sql": "SELECT AVG(score) FROM fc_scores WHERE gender \u003d \u0027Female\u0027 AND country IN (\u0027South Africa\u0027, \u0027Brazil\u0027);", + "sql_explanation": "The SQL query calculates the average financial capability score for women in South Africa and Brazil by selecting the average score where the gender is \u0027Female\u0027 and the country is either \u0027South Africa\u0027 or \u0027Brazil\u0027." +}, { + "id": "3390", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum socially responsible loan amount?", + "sql_context": "CREATE TABLE loans (id INT PRIMARY KEY, loan_id INT, amount INT, client_id INT, is_socially_responsible BOOLEAN);", + "sql": "SELECT MAX(loans.amount) as max_loan_amount FROM loans WHERE loans.is_socially_responsible \u003d TRUE;", + "sql_explanation": "This query calculates the maximum loan amount for socially responsible loans. It starts by selecting the maximum value of the amount column (aliased as max_loan_amount) from the loans table, filtered by the is_socially_responsible column set to TRUE." +}, { + "id": "3634", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total value of Shariah-compliant investments in a specific sector?", + "sql_context": "CREATE TABLE shariah_compliant_investments (investment_id INT, sector VARCHAR(255), investment_value DECIMAL); INSERT INTO shariah_compliant_investments (investment_id, sector, investment_value) VALUES (1, \u0027Technology\u0027, 5000), (2, \u0027Healthcare\u0027, 7000), (3, \u0027Finance\u0027, 3000);", + "sql": "SELECT SUM(investment_value) FROM shariah_compliant_investments WHERE sector \u003d \u0027Technology\u0027;", + "sql_explanation": "The SQL query calculates the total value of Shariah-compliant investments in the technology sector. It does this by selecting the sum of the investment_value column from the shariah_compliant_investments table where the sector is equal to \u0027Technology\u0027." +}, { + "id": "3792", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the total amount lent by \u0027GreenLenders\u0027 in the SociallyResponsibleLending table.", + "sql_context": "CREATE TABLE SociallyResponsibleLending (lendingID INT, lenderName VARCHAR(50), borrowerName VARCHAR(50), amountLent DECIMAL(10,2), interestRate DECIMAL(4,2), lendingDate DATE); INSERT INTO SociallyResponsibleLending (lendingID, lenderName, borrowerName, amountLent, interestRate, lendingDate) VALUES (1, \u0027GreenLenders\u0027, \u0027EcoFriendlyInc\u0027, 25000.00, 2.50, \u00272022-02-01\u0027), (2, \u0027FairFinance\u0027, \u0027HelpingHands\u0027, 10000.00, 1.00, \u00272022-02-02\u0027);", + "sql": "SELECT SUM(amountLent) FROM SociallyResponsibleLending WHERE lenderName \u003d \u0027GreenLenders\u0027;", + "sql_explanation": "The SQL query calculates the total amount lent by GreenLenders in the SociallyResponsibleLending table. It does this by using the SUM() function, which returns the total sum of a numeric column." +}, { + "id": "3924", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average financial wellbeing score for clients in urban areas?", + "sql_context": "CREATE TABLE clients(id INT, name TEXT, location TEXT, financial_wellbeing_score INT);", + "sql": "SELECT AVG(c.financial_wellbeing_score) FROM clients c WHERE c.location LIKE \u0027%urban%\u0027;", + "sql_explanation": "This SQL query calculates the average financial wellbeing score for clients in urban areas by filtering the clients table based on the location column using the LIKE operator and the % wildcard character to match rows where the location column contains the word urban, and then using the AVG function to calculate the average of the financial_wellbeing_score column for the filtered table." +}, { + "id": "4102", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the interest rate for a Shariah-compliant mortgage to 4.5%.", + "sql_context": "CREATE TABLE mortgages (id INT, mortgage_type VARCHAR(255), interest_rate DECIMAL(10,2)); INSERT INTO mortgages (id, mortgage_type, interest_rate) VALUES (1, \u0027Shariah-compliant\u0027, 4.25), (2, \u0027Conventional\u0027, 5.00);", + "sql": "UPDATE mortgages SET interest_rate \u003d 4.5 WHERE mortgage_type \u003d \u0027Shariah-compliant\u0027;", + "sql_explanation": "The SQL query updates the interest rate for a Shariah-compliant mortgage to 4.5% by updating the interest_rate in the mortgages table where the mortgage_type is \u0027Shariah-compliant\u0027." +}, { + "id": "4263", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum transaction amount for Islamic finance in Indonesia?", + "sql_context": "CREATE TABLE islamic_finance (id INT, country VARCHAR(255), transaction_type VARCHAR(255), transaction_amount DECIMAL(10,2));", + "sql": "SELECT MAX(transaction_amount) FROM islamic_finance WHERE country \u003d \u0027Indonesia\u0027;", + "sql_explanation": "The SQL query calculates the maximum transaction amount for Islamic finance in Indonesia by using the MAX function on the transaction_amount column." +}, { + "id": "4304", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of individuals with a financial wellbeing score above 8 in the United States?", + "sql_context": "CREATE TABLE if not exists us_wellbeing (id INT, individual_id INT, country VARCHAR(50), gender VARCHAR(10), score DECIMAL(3,1));", + "sql": "SELECT COUNT(*) FROM us_wellbeing WHERE country \u003d \u0027United States\u0027 AND score \u003e 8;", + "sql_explanation": "This SQL query calculates the number of individuals with a financial wellbeing score above 8 in the United States. It does so by selecting the count (COUNT*) of rows from the \u0027us_wellbeing\u0027 table where the \u0027country\u0027 is \u0027United States\u0027 and \u0027score\u0027 is greater than 8." +}, { + "id": "4309", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the maximum home value for Shariah-compliant loans in California", + "sql_context": "CREATE TABLE shariah_compliant_loans (id INT, home_value FLOAT, state VARCHAR(255));", + "sql": "SELECT MAX(home_value) FROM shariah_compliant_loans WHERE state \u003d \u0027California\u0027;", + "sql_explanation": "1. Filter rows with California state 2. Find maximum home_value" +}, { + "id": "4413", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find all Shariah-compliant banks with total assets greater than $1 billion", + "sql_context": "CREATE TABLE shariah_compliant_banks (bank_id INT, bank_name VARCHAR(50), total_assets DECIMAL(18,2));", + "sql": "SELECT bank_name FROM shariah_compliant_banks WHERE total_assets \u003e 1000000000;", + "sql_explanation": "This query retrieves the names of all Shariah-compliant banks with total assets greater than $1 billion by selecting the bank_name column in the shariah_compliant_banks table where the total_assets column is greater than 1 billion." +}, { + "id": "4622", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records of clients who have not taken out any loans in the past year from the \u0027loans\u0027 table.", + "sql_context": "CREATE TABLE loans (loan_id INT, client_id INT, loan_type VARCHAR(50), issue_date DATE); INSERT INTO loans VALUES (16, 16, \u0027Consumer Loan\u0027, \u00272022-03-15\u0027); INSERT INTO loans VALUES (17, 17, \u0027Car Loan\u0027, \u00272022-02-20\u0027);", + "sql": "DELETE FROM loans WHERE issue_date \u003c DATE_SUB(CURDATE(), INTERVAL 1 YEAR);", + "sql_explanation": "The SQL query deletes records from the loans table where the issue date is more than one year before the current date, which corresponds to clients who have not taken out any loans in the past year." +}, { + "id": "4691", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Count of socially responsible loans in the Western region", + "sql_context": "CREATE TABLE socially_responsible_loans (id INT, state VARCHAR(255), region VARCHAR(255));", + "sql": "SELECT COUNT(*) FROM socially_responsible_loans WHERE region \u003d \u0027Western\u0027;", + "sql_explanation": "1. Filter rows with Western region 2. Count all rows" +}, { + "id": "4710", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total savings of customers who live in \u0027Texas\u0027 or \u0027California\u0027?", + "sql_context": "CREATE TABLE savings (customer_id INT, name TEXT, state TEXT, savings DECIMAL(10, 2)); INSERT INTO savings (customer_id, name, state, savings) VALUES (9, \u0027Ella Green\u0027, \u0027Texas\u0027, 11000.00), (10, \u0027Liam White\u0027, \u0027California\u0027, 12000.00);", + "sql": "SELECT SUM(savings) FROM savings WHERE state IN (\u0027Texas\u0027, \u0027California\u0027);", + "sql_explanation": "This query calculates the total savings of customers who live in \u0027Texas\u0027 or \u0027California\u0027 by using the SUM() function on the \u0027savings\u0027 column, and filtering for rows where the \u0027state\u0027 column is either \u0027Texas\u0027 or \u0027California\u0027." +}, { + "id": "4769", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum financial wellbeing score for individuals in Indonesia?", + "sql_context": "CREATE TABLE financial_wellbeing (id INT, individual_id INT, score INT, country VARCHAR(255)); INSERT INTO financial_wellbeing (id, individual_id, score, country) VALUES (1, 4001, 60, \u0027Indonesia\u0027), (2, 4002, 75, \u0027Indonesia\u0027), (3, 4003, 80, \u0027Indonesia\u0027);", + "sql": "SELECT MIN(score) FROM financial_wellbeing WHERE country \u003d \u0027Indonesia\u0027;", + "sql_explanation": "This query calculates the minimum financial wellbeing score for individuals in Indonesia by using the MIN function on the score column, and filtering for rows where the country is \u0027Indonesia\u0027." +}, { + "id": "4991", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many customers have a financial wellbeing score greater than 8?", + "sql_context": "CREATE TABLE customers (customer_id INT, region VARCHAR(20), financial_wellbeing_score DECIMAL(3, 1)); INSERT INTO customers (customer_id, region, financial_wellbeing_score) VALUES (1, \u0027Northeast\u0027, 6.5), (2, \u0027Midwest\u0027, 7.2), (3, \u0027West\u0027, 8.5);", + "sql": "SELECT COUNT(*) FROM customers WHERE financial_wellbeing_score \u003e 8;", + "sql_explanation": "This SQL query counts the number of customers with a financial wellbeing score greater than 8 by filtering the customers table for rows where financial_wellbeing_score is greater than 8." +}, { + "id": "5166", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average salary of employees in the \u0027finance\u0027 department?", + "sql_context": "CREATE TABLE employees (id INT, name VARCHAR(50), department VARCHAR(50), salary FLOAT);", + "sql": "SELECT AVG(salary) FROM employees WHERE department \u003d \u0027finance\u0027;", + "sql_explanation": "This query calculates the average salary of employees in the \u0027finance\u0027 department. It does so by using the AVG function on the \u0027salary\u0027 column, while filtering the records for the \u0027finance\u0027 department." +}, { + "id": "5259", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the balance column to 0 for all records in the accounts table where the customer_name is \u0027Ahmed\u0027", + "sql_context": "CREATE TABLE accounts (account_number INT, balance DECIMAL(10, 2), customer_name VARCHAR(50), created_at TIMESTAMP);", + "sql": "UPDATE accounts SET balance \u003d 0 WHERE customer_name \u003d \u0027Ahmed\u0027;", + "sql_explanation": "This query updates the balance column to 0 for all records in the accounts table where the customer_name column is equal to \u0027Ahmed\u0027." +}, { + "id": "5365", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of Shariah-compliant loans issued to customers in a specific city?", + "sql_context": "CREATE TABLE customer_city (id INT PRIMARY KEY, customer_id INT, city VARCHAR(50)); CREATE TABLE shariah_loans (id INT PRIMARY KEY, customer_id INT, amount DECIMAL(10,2), date DATE); CREATE VIEW city_loans AS SELECT shariah_loans.amount, customer_city.city FROM shariah_loans, customer_city WHERE shariah_loans.customer_id \u003d customer_city.customer_id;", + "sql": "SELECT SUM(amount) FROM city_loans WHERE city \u003d \u0027New York\u0027;", + "sql_explanation": "This query calculates the total amount of Shariah-compliant loans issued to customers in a specific city (\u0027New York\u0027 in this example) by using the \u0027city_loans\u0027 view, which joins the \u0027shariah_loans\u0027 and \u0027customer_city\u0027 tables based on the \u0027customer_id\u0027 column." +}, { + "id": "5404", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the names of customers who have a savings balance greater than $7000.", + "sql_context": "CREATE TABLE customers (id INT, name TEXT, region TEXT, savings REAL);", + "sql": "SELECT customers.name FROM customers WHERE savings \u003e 7000;", + "sql_explanation": "This SQL query retrieves the \u0027name\u0027 column from the \u0027customers\u0027 table for rows where the \u0027savings\u0027 column is greater than 7000." +}, { + "id": "5638", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the total financial capability training hours for employees in microfinance organizations in Asia", + "sql_context": "CREATE TABLE AsiaMicrofinance (id INT, employee_id INT, training_hours INT); INSERT INTO AsiaMicrofinance (id, employee_id, training_hours) VALUES (1, 1, 25), (2, 2, 35);", + "sql": "SELECT SUM(training_hours) FROM AsiaMicrofinance;", + "sql_explanation": "This query calculates the total financial capability training hours for employees in microfinance organizations in Asia. It sums the \u0027training_hours\u0027 column for all rows." +}, { + "id": "5713", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete the record of customer \u0027456\u0027?", + "sql_context": "CREATE TABLE savings (customer_id INT, name TEXT, state TEXT, savings DECIMAL(10, 2)); INSERT INTO savings (customer_id, name, state, savings) VALUES (456, \u0027Alice\u0027, \u0027New York\u0027, 7000.00);", + "sql": "DELETE FROM savings WHERE customer_id \u003d 456;", + "sql_explanation": "This query deletes the record of customer \u0027456\u0027 by using the DELETE FROM keyword, and a WHERE clause to filter for the correct customer." +}, { + "id": "1413", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total number of cases in the \u0027CriminalJustice\u0027 table and the number of cases with \u0027case_status\u0027 of \u0027pending\u0027 or \u0027in_progress\u0027", + "sql_context": "CREATE TABLE CriminalJustice (case_id INT, case_status VARCHAR(10)); INSERT INTO CriminalJustice (case_id, case_status) VALUES (1, \u0027pending\u0027), (2, \u0027closed\u0027), (3, \u0027pending\u0027), (4, \u0027in_progress\u0027), (5, \u0027closed\u0027), (6, \u0027in_progress\u0027);", + "sql": "SELECT COUNT(*) AS total_cases, SUM(CASE WHEN case_status IN (\u0027pending\u0027, \u0027in_progress\u0027) THEN 1 ELSE 0 END) AS pending_or_in_progress_cases FROM CriminalJustice;", + "sql_explanation": "This query uses the COUNT and SUM aggregate functions to count the total number of cases and the number of cases with a \u0027case_status\u0027 of \u0027pending\u0027 or \u0027in_progress\u0027 in the \u0027CriminalJustice\u0027 table. The CASE statement is used to conditionally sum the number of \u0027pending\u0027 and \u0027in_progress\u0027 cases." +}, { + "id": "1467", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many restorative justice programs were implemented in India, South Africa, and Brazil between 2015 and 2020?", + "sql_context": "CREATE TABLE restorative_justice_programs (id INT, program_name VARCHAR(255), country VARCHAR(255), start_year INT, end_year INT); INSERT INTO restorative_justice_programs (id, program_name, country, start_year, end_year) VALUES (1, \u0027Victim Offender Mediation Program\u0027, \u0027United States\u0027, 2016, 2020), (2, \u0027Restorative Circles\u0027, \u0027Canada\u0027, 2017, 2020), (3, \u0027Family Group Conferencing\u0027, \u0027India\u0027, 2015, 2018), (4, \u0027Ubuntu Restorative Justice\u0027, \u0027South Africa\u0027, 2016, 2021), (5, \u0027Community Reconciliation Process\u0027, \u0027Brazil\u0027, 2017, 2020);", + "sql": "SELECT COUNT(*) AS total_programs FROM restorative_justice_programs WHERE country IN (\u0027India\u0027, \u0027South Africa\u0027, \u0027Brazil\u0027) AND start_year BETWEEN 2015 AND 2020;", + "sql_explanation": "This query calculates the number of restorative justice programs implemented in India, South Africa, and Brazil between 2015 and 2020 by counting the rows in the restorative_justice_programs table where the country is one of the specified countries and the start_year is between 2015 and 2020." +}, { + "id": "1711", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many legal technology patents were granted to women-led teams in the past decade?", + "sql_context": "CREATE TABLE patents (patent_id INT, year INT, team_leader VARCHAR(10), technology VARCHAR(20)); INSERT INTO patents (patent_id, year, team_leader, technology) VALUES (1, 2012, \u0027Aisha\u0027, \u0027Legal Tech\u0027), (2, 2015, \u0027Brian\u0027, \u0027Legal Tech\u0027);", + "sql": "SELECT COUNT(*) FROM patents WHERE technology \u003d \u0027Legal Tech\u0027 AND YEAR(year) \u003e\u003d 2011 AND team_leader IN (\u0027Aisha\u0027, \u0027Brian\u0027, \u0027Candace\u0027, \u0027Dana\u0027, \u0027Eva\u0027);", + "sql_explanation": "This SQL query counts the number of legal technology patents granted to women-led teams in the past decade. It does so by selecting the COUNT function on all records, filtering the data by technology, year, and team_leader." +}, { + "id": "2415", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which restorative justice programs in South America have the lowest completion rates?", + "sql_context": "CREATE TABLE programs (program_id INT, program_name VARCHAR(50), country VARCHAR(20), completion_rate DECIMAL(5,2)); INSERT INTO programs (program_id, program_name, country, completion_rate) VALUES (1, \u0027Program 1\u0027, \u0027Brazil\u0027, 0.60), (2, \u0027Program 2\u0027, \u0027Argentina\u0027, 0.70);", + "sql": "SELECT program_name, country, completion_rate FROM programs WHERE country LIKE \u0027South America%\u0027 ORDER BY completion_rate ASC;", + "sql_explanation": "Filters programs table for countries in South America. Returns the program name, country, and completion rate ordered by completion rate in ascending order." +}, { + "id": "2512", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of criminal justice reform cases handled by legal aid organizations in New York", + "sql_context": "CREATE TABLE legal_aid_organizations (org_id INT, name VARCHAR(50), cases_handled INT, cases_type VARCHAR(50), state VARCHAR(2)); INSERT INTO legal_aid_organizations (org_id, name, cases_handled, cases_type, state) VALUES (1, \u0027California Legal Aid\u0027, 200, \u0027access to justice, criminal justice reform\u0027, \u0027CA\u0027), (2, \u0027New York Legal Aid\u0027, 300, \u0027legal technology\u0027, \u0027NY\u0027), (3, \u0027Texas Legal Aid\u0027, 150, \u0027criminal justice reform\u0027, \u0027TX\u0027), (4, \u0027Florida Legal Aid\u0027, 250, \u0027restorative justice\u0027, \u0027FL\u0027), (5, \u0027Los Angeles Legal Aid\u0027, 400, \u0027criminal justice reform\u0027, \u0027CA\u0027);", + "sql": "SELECT SUM(cases_handled) FROM legal_aid_organizations WHERE cases_type LIKE \u0027%criminal justice reform%\u0027 AND state \u003d \u0027NY\u0027;", + "sql_explanation": "This query calculates the total number of criminal justice reform cases handled by legal aid organizations in New York by summing up the \"cases_handled\" column values where the \"cases_type\" column contains the phrase \"criminal justice reform\" and the \"state\" column is equal to \u0027NY\u0027." +}, { + "id": "2819", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of cases handled by community mediation centers in the year 2020?", + "sql_context": "CREATE TABLE CommunityMediationCenters (ID INT, Name VARCHAR(50), YearEstablished INT, CasesHandled INT); INSERT INTO CommunityMediationCenters (ID, Name, YearEstablished, CasesHandled) VALUES (1, \u0027PeaceBuilders\u0027, 2005, 120), (2, \u0027CommunityHealers\u0027, 2010, 150), (3, \u0027HarmonyKeepers\u0027, 2008, 180), (4, \u0027UnityCreators\u0027, 2015, 200);", + "sql": "SELECT SUM(CasesHandled) FROM CommunityMediationCenters WHERE YearEstablished \u003c 2020 AND YearEstablished \u003e\u003d 2000;", + "sql_explanation": "This query calculates the total number of cases handled by all community mediation centers established between the year 2000 and 2020 (exclusive)." +}, { + "id": "2832", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum number of cases handled by restorative justice programs in Washington D.C. in a single year?", + "sql_context": "CREATE TABLE restorative_justice_programs (program_id INT, cases_handled INT, year INT, district VARCHAR(20)); INSERT INTO restorative_justice_programs (program_id, cases_handled, year, district) VALUES (1, 100, 2021, \u0027Washington D.C.\u0027), (2, 200, 2020, \u0027Washington D.C.\u0027), (3, 150, 2019, \u0027Washington D.C.\u0027), (4, 250, 2018, \u0027Washington D.C.\u0027), (5, 300, 2017, \u0027Washington D.C.\u0027);", + "sql": "SELECT MIN(cases_handled) FROM restorative_justice_programs WHERE year \u003e\u003d 2017 AND district \u003d \u0027Washington D.C.\u0027;", + "sql_explanation": "This query finds the minimum number of cases handled by restorative justice programs in Washington D.C. in a single year. It uses the MIN function to find the minimum value in the \u0027cases_handled\u0027 column, and filters the data to only include rows where \u0027year\u0027 is greater than or equal to 2017 and \u0027district\u0027 is \u0027Washington D.C.\u0027." +}, { + "id": "3062", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names and case IDs of all cases that have been resolved in the state of California?", + "sql_context": "CREATE TABLE court_cases (case_id INT, case_status TEXT, case_state TEXT); INSERT INTO court_cases (case_id, case_status, case_state) VALUES (66666, \u0027Resolved\u0027, \u0027California\u0027);", + "sql": "SELECT case_id, case_status FROM court_cases WHERE case_state \u003d \u0027California\u0027 AND case_status \u003d \u0027Resolved\u0027;", + "sql_explanation": "This query selects the case_id and case_status columns from the court_cases table where the case_state is \u0027California\u0027 and the case_status is \u0027Resolved\u0027." +}, { + "id": "3092", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify legal technology initiatives and their respective countries, excluding those launched before 2010.", + "sql_context": "CREATE TABLE historical_legal_tech (id INT, initiative VARCHAR(255), launch_date DATE, country VARCHAR(255)); INSERT INTO historical_legal_tech (id, initiative, launch_date, country) VALUES (1, \u0027Legacy AI Platform\u0027, \u00272005-02-28\u0027, \u0027US\u0027), (2, \u0027Traditional Contracts\u0027, \u00272000-01-01\u0027, \u0027Canada\u0027), (3, \u0027Legal Chatbot\u0027, \u00272011-08-15\u0027, \u0027US\u0027);", + "sql": "SELECT initiative, country FROM historical_legal_tech WHERE launch_date \u003e\u003d \u00272010-01-01\u0027 ORDER BY country;", + "sql_explanation": "This SQL query filters the historical_legal_tech table based on the condition that launch_date is greater than or equal to \u00272010-01-01\u0027, and orders the result by country." +}, { + "id": "3268", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average time taken for restorative justice cases in New York?", + "sql_context": "CREATE TABLE RestorativeJustice (CaseID INT, CaseType TEXT, StartDate DATE, EndDate DATE, State TEXT); INSERT INTO RestorativeJustice (CaseID, CaseType, StartDate, EndDate, State) VALUES (1, \u0027Restorative Justice\u0027, \u00272022-01-01\u0027, \u00272022-03-15\u0027, \u0027New York\u0027);", + "sql": "SELECT AVG(DATEDIFF(EndDate, StartDate)) as AvgTime FROM RestorativeJustice WHERE State \u003d \u0027New York\u0027;", + "sql_explanation": "The SQL query calculates the average time taken for restorative justice cases in New York by using the AVG function with the DATEDIFF function, which calculates the difference between the end and start dates for each case." +}, { + "id": "3297", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names and ages of all victims who have participated in restorative justice programs in the state of California?", + "sql_context": "CREATE TABLE restorative_justice_programs (victim_name TEXT, victim_age INT, program_state TEXT); INSERT INTO restorative_justice_programs (victim_name, victim_age, program_state) VALUES (\u0027John Doe\u0027, 34, \u0027California\u0027);", + "sql": "SELECT victim_name, victim_age FROM restorative_justice_programs WHERE program_state \u003d \u0027California\u0027;", + "sql_explanation": "This query selects the victim_name and victim_age columns from the restorative_justice_programs table where the program_state is \u0027California\u0027." +}, { + "id": "3331", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of legal technology patents filed in the last 10 years?", + "sql_context": "CREATE TABLE legal_tech_patents (patent_id INT, company_id INT, file_date DATE); INSERT INTO legal_tech_patents (patent_id, company_id, file_date) VALUES (1, 1001, \u00272013-01-01\u0027), (2, 1002, \u00272019-03-15\u0027);", + "sql": "SELECT COUNT(*) FROM legal_tech_patents WHERE file_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 10 YEAR);", + "sql_explanation": "This SQL query counts the total number of legal technology patents filed in the last 10 years. It does so by using the COUNT function and filtering the data for the last 10 years using the DATE_SUB function." +}, { + "id": "3410", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of traffic violations in the state of New York for the year 2021?", + "sql_context": "CREATE TABLE traffic_violations (id INT, state VARCHAR(255), year INT, number_of_violations INT); INSERT INTO traffic_violations (id, state, year, number_of_violations) VALUES (1, \u0027New York\u0027, 2021, 1000), (2, \u0027New York\u0027, 2020, 900), (3, \u0027California\u0027, 2021, 1200);", + "sql": "SELECT SUM(number_of_violations) FROM traffic_violations WHERE state \u003d \u0027New York\u0027 AND year \u003d 2021;", + "sql_explanation": "This query calculates the total number of traffic violations in the state of New York for the year 2021. It does this by selecting the sum of the \u0027number_of_violations\u0027 column, but only for the rows where the \u0027state\u0027 column is equal to \u0027New York\u0027 and the \u0027year\u0027 column is equal to 2021." +}, { + "id": "3667", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of restorative justice programs in Texas that were successful?", + "sql_context": "CREATE TABLE restorative_justice_outcomes (outcome_id INT, program_id INT, success BOOLEAN); INSERT INTO restorative_justice_outcomes (outcome_id, program_id, success) VALUES (1, 1001, TRUE), (2, 1002, FALSE);", + "sql": "SELECT 100.0 * SUM(success) / COUNT(*) FROM restorative_justice_outcomes WHERE state \u003d \u0027TX\u0027;", + "sql_explanation": "This SQL query calculates the percentage of restorative justice programs in Texas that were successful. It does so by using the SUM and COUNT functions to calculate the number of successful programs and total number of programs in Texas. It then divides these numbers and multiplies by 100.0 to get the percentage." +}, { + "id": "4012", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum and maximum number of cases handled by legal clinics in \u0027East River\u0027 justice district in a year?", + "sql_context": "CREATE TABLE LegalClinics (ID INT, JusticeDistrict VARCHAR(20), Year INT, Cases INT); INSERT INTO LegalClinics (ID, JusticeDistrict, Year, Cases) VALUES (1, \u0027East River\u0027, 2017, 120), (2, \u0027East River\u0027, 2018, 150), (3, \u0027East River\u0027, 2019, 210), (4, \u0027East River\u0027, 2020, 200);", + "sql": "SELECT MIN(Cases), MAX(Cases) FROM LegalClinics WHERE JusticeDistrict \u003d \u0027East River\u0027;", + "sql_explanation": "This query calculates the minimum and maximum number of cases handled by legal clinics in the East River justice district in a year." +}, { + "id": "4083", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total bail amount set in the last month for all cases?", + "sql_context": "CREATE TABLE cases (id INT, bail_amount DECIMAL(10, 2), bail_date DATE); INSERT INTO cases (id, bail_amount, bail_date) VALUES (1, 5000, \u00272021-03-23\u0027), (2, 10000, \u00272021-04-15\u0027);", + "sql": "SELECT SUM(bail_amount) FROM cases WHERE bail_date \u003e\u003d DATEADD(month, -1, GETDATE());", + "sql_explanation": "This SQL query calculates the total bail amount set in the last month for all cases. It does this by selecting all rows from the \u0027cases\u0027 table where \u0027bail_date\u0027 is within the last month, and then using the SUM function to find the total bail amount." +}, { + "id": "4287", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all the legal clinics in \u0027South Peak\u0027 justice district that have provided more than 250 hours of service in a year.", + "sql_context": "CREATE TABLE LegalClinics (ID INT, ClinicID VARCHAR(20), District VARCHAR(20), Hours INT, Year INT); INSERT INTO LegalClinics (ID, ClinicID, District, Hours, Year) VALUES (1, \u0027LC2017\u0027, \u0027South Peak\u0027, 300, 2017), (2, \u0027LC2018\u0027, \u0027North Valley\u0027, 200, 2018), (3, \u0027LC2019\u0027, \u0027South Peak\u0027, 250, 2019);", + "sql": "SELECT ClinicID FROM LegalClinics WHERE District \u003d \u0027South Peak\u0027 AND Hours \u003e 250;", + "sql_explanation": "This query retrieves the IDs of all legal clinics in the South Peak justice district that have provided more than 250 hours of service in a year." +}, { + "id": "4294", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many distinct offenses are present in the justice_data schema\u0027s court_cases table, and what is the maximum number of charges filed in a single case?", + "sql_context": "CREATE TABLE justice_data.court_cases (id INT, case_number INT, filing_date DATE, charge_count INT, offense VARCHAR(50));", + "sql": "SELECT COUNT(DISTINCT offense), MAX(charge_count) FROM justice_data.court_cases;", + "sql_explanation": "This query calculates the number of distinct offenses in the court_cases table and the maximum number of charges filed in a single case." +}, { + "id": "4328", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all legal technology initiatives and their respective launch dates in the US, sorted by launch date in descending order.", + "sql_context": "CREATE TABLE legal_tech_launch (id INT, initiative VARCHAR(255), launch_date DATE); INSERT INTO legal_tech_launch (id, initiative, launch_date) VALUES (1, \u0027Legal AI Platform\u0027, \u00272018-05-15\u0027), (2, \u0027Online Dispute Resolution\u0027, \u00272016-09-01\u0027), (3, \u0027Smart Contracts\u0027, \u00272017-12-21\u0027);", + "sql": "SELECT * FROM legal_tech_launch WHERE country \u003d \u0027US\u0027 ORDER BY launch_date DESC;", + "sql_explanation": "This SQL query filters the legal_tech_launch table based on country value \u0027US\u0027, and orders the result by launch_date in descending order." +}, { + "id": "4341", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many cases were resolved through alternative_dispute_resolution methods?", + "sql_context": "CREATE TABLE alternative_dispute_resolution (id INT, case_id INT, method TEXT, outcome TEXT);", + "sql": "SELECT COUNT(*) FROM alternative_dispute_resolution WHERE outcome \u003d \u0027resolved\u0027;", + "sql_explanation": "This query counts the number of rows in the \u0027alternative_dispute_resolution\u0027 table where the \u0027outcome\u0027 column is equal to \u0027resolved\u0027, providing a count of resolved cases." +}, { + "id": "4522", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average budget for access to justice programs in the Midwest region?", + "sql_context": "CREATE TABLE access_to_justice_programs (id INT, program_name TEXT, budget INT, region TEXT); INSERT INTO access_to_justice_programs (id, program_name, budget, region) VALUES (1, \u0027Legal Aid Chicago\u0027, 500000, \u0027Midwest\u0027); INSERT INTO access_to_justice_programs (id, program_name, budget, region) VALUES (2, \u0027Legal Aid Minnesota\u0027, 750000, \u0027Midwest\u0027);", + "sql": "SELECT AVG(budget) FROM access_to_justice_programs WHERE region \u003d \u0027Midwest\u0027;", + "sql_explanation": "This query calculates the average budget for access to justice programs in the Midwest region by selecting the budget for all records with a region of \u0027Midwest\u0027 and averaging the resulting values." +}, { + "id": "4614", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many access to justice initiatives are there in total in the \u0027justice_initiatives\u0027 table?", + "sql_context": "CREATE TABLE justice_initiatives (id INT, name VARCHAR(50), type VARCHAR(30), organization VARCHAR(30)); INSERT INTO justice_initiatives (id, name, type, organization) VALUES (1, \u0027Legal Clinic\u0027, \u0027Access to Justice\u0027, \u0027ACLU\u0027); INSERT INTO justice_initiatives (id, name, type, organization) VALUES (2, \u0027Know Your Rights Campaign\u0027, \u0027Access to Justice\u0027, \u0027Immigrant Defense Project\u0027); INSERT INTO justice_initiatives (id, name, type, organization) VALUES (3, \u0027Civil Legal Aid Program\u0027, \u0027Access to Justice\u0027, \u0027Legal Services Corporation\u0027);", + "sql": "SELECT COUNT(*) FROM justice_initiatives WHERE type \u003d \u0027Access to Justice\u0027;", + "sql_explanation": "This SQL query counts the number of rows in the \u0027justice_initiatives\u0027 table where the \u0027type\u0027 column is equal to \u0027Access to Justice\u0027 to find the total number of access to justice initiatives." +}, { + "id": "4659", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the \u0027case_id\u0027 and \u0027case_type\u0027 for cases in the \u0027AccessToJustice\u0027 table where the \u0027case_type\u0027 starts with \u0027c\u0027", + "sql_context": "CREATE TABLE AccessToJustice (case_id INT, case_type VARCHAR(10)); INSERT INTO AccessToJustice (case_id, case_type) VALUES (1, \u0027civil\u0027), (2, \u0027criminal\u0027), (3, \u0027constitutional\u0027);", + "sql": "SELECT case_id, case_type FROM AccessToJustice WHERE case_type LIKE \u0027c%\u0027;", + "sql_explanation": "This query selects the \u0027case_id\u0027 and \u0027case_type\u0027 columns from the \u0027AccessToJustice\u0027 table where the \u0027case_type\u0027 column starts with the letter \u0027c\u0027. The LIKE operator is used with the \u0027c%\u0027 wildcard pattern to match any value that starts with \u0027c\u0027." +}, { + "id": "5258", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum number of sessions completed by a volunteer in the volunteers table who is over 30 years old?", + "sql_context": "CREATE TABLE volunteers (id INT, name VARCHAR(50), age INT, sessions_completed INT);", + "sql": "SELECT MIN(sessions_completed) FROM volunteers WHERE age \u003e 30;", + "sql_explanation": "The SQL query calculates the minimum number of sessions completed by a volunteer in the volunteers table who is over 30 years old. It first filters the volunteers table for rows where the age column is greater than 30. Then, it calculates the minimum number of sessions completed by the remaining rows using the MIN aggregation function." +}, { + "id": "5488", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all legal technology programs from the \u0027programs\u0027 table", + "sql_context": "CREATE TABLE programs (id INT, name VARCHAR(50), location VARCHAR(50), type VARCHAR(50), start_date DATE, end_date DATE);", + "sql": "SELECT * FROM programs WHERE type \u003d \u0027Legal Technology\u0027;", + "sql_explanation": "This query selects all columns from the \u0027programs\u0027 table where the \u0027type\u0027 is \u0027Legal Technology\u0027." +}, { + "id": "5752", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the age of the victim with id 1", + "sql_context": "CREATE TABLE victims (id INT PRIMARY KEY, name VARCHAR(255), age INT, state VARCHAR(2));", + "sql": "UPDATE victims SET age \u003d 23 WHERE id \u003d 1;", + "sql_explanation": "This SQL statement updates the age of the victim with id 1 to 23." +}, { + "id": "5827", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete the record with ID 1 from the prison table.", + "sql_context": "CREATE TABLE prison (id INT, name TEXT, security_level TEXT, age INT); INSERT INTO prison (id, name, security_level, age) VALUES (1, \u0027John Doe\u0027, \u0027low_security\u0027, 45);", + "sql": "DELETE FROM prison WHERE id \u003d 1;", + "sql_explanation": "This query deletes the record with ID 1 from the prison table by using the DELETE statement and specifying the condition for the deletion in the WHERE clause." +}, { + "id": "1616", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new records for visitors from Nigeria, South Africa, and Egypt into the visitors table", + "sql_context": "CREATE TABLE visitors (id INT, age INT, gender TEXT, country TEXT);", + "sql": "INSERT INTO visitors (id, age, gender, country) VALUES (7001, 28, \u0027Male\u0027, \u0027Nigeria\u0027), (7002, 35, \u0027Female\u0027, \u0027South Africa\u0027), (7003, 42, \u0027Male\u0027, \u0027Egypt\u0027);", + "sql_explanation": "This query inserts three new records into the visitors table with the given values." +}, { + "id": "1840", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the average attendance at exhibitions in Europe in Q2 2019?", + "sql_context": "CREATE TABLE Exhibitions (id INT, region VARCHAR(255), quarter INT, year INT, attendance INT);", + "sql": "SELECT AVG(Exhibitions.attendance) FROM Exhibitions WHERE Exhibitions.region \u003d \u0027Europe\u0027 AND Exhibitions.quarter \u003d 2 AND Exhibitions.year \u003d 2019;", + "sql_explanation": "This query calculates the average attendance at exhibitions in Europe during Q2 2019 by filtering on the region, quarter, and year and computing the average attendance." +}, { + "id": "1843", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new records for visitors from Brazil and Argentina.", + "sql_context": "CREATE TABLE Visitors (id INT, name VARCHAR(100), country VARCHAR(50), visit_date DATE);", + "sql": "INSERT INTO Visitors (id, name, country, visit_date) VALUES (1, \u0027PelÊ\u0027, \u0027Brazil\u0027, \u00272022-01-01\u0027), (2, \u0027Lionel Messi\u0027, \u0027Argentina\u0027, \u00272022-02-01\u0027);", + "sql_explanation": "This query inserts new records for visitors from Brazil and Argentina." +}, { + "id": "2034", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of visitors for exhibitions held in Barcelona in June 2021?", + "sql_context": "CREATE TABLE Exhibitions (id INT, city VARCHAR(20), visitors INT, exhibition_date DATE); INSERT INTO Exhibitions (id, city, visitors, exhibition_date) VALUES (1, \u0027Barcelona\u0027, 40, \u00272021-06-01\u0027), (2, \u0027Barcelona\u0027, 50, \u00272021-06-05\u0027);", + "sql": "SELECT MAX(visitors) as max_visitors FROM Exhibitions WHERE city \u003d \u0027Barcelona\u0027 AND exhibition_date BETWEEN \u00272021-06-01\u0027 AND \u00272021-06-30\u0027", + "sql_explanation": "We filter the records based on city and exhibition date in June 2021 for Barcelona, then find the maximum number of visitors for these records." +}, { + "id": "2090", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new records into the museum_exhibitions table for an exhibition titled \u0027Women in Art\u0027 that will take place in Paris between 2024-05-01 and 2024-08-31.", + "sql_context": "CREATE TABLE museum_exhibitions (id INT, title VARCHAR(50), city VARCHAR(50), start_date DATE, end_date DATE);", + "sql": "INSERT INTO museum_exhibitions (id, title, city, start_date, end_date) VALUES (1, \u0027Women in Art\u0027, \u0027Paris\u0027, \u00272024-05-01\u0027, \u00272024-08-31\u0027);", + "sql_explanation": "1. Insert a new record into the museum_exhibitions table for an exhibition titled \u0027Women in Art\u0027 that will take place in Paris between May 1, 2024 and August 31, 2024." +}, { + "id": "2652", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many visitors attended the \"Modern Art\" exhibition in Tokyo last year?", + "sql_context": "CREATE TABLE Exhibition_Attendance (exhibition_id INT, city VARCHAR(50), year INT, visitor_count INT);", + "sql": "SELECT visitor_count FROM Exhibition_Attendance WHERE exhibition_id \u003d \u0027Modern Art\u0027 AND city \u003d \u0027Tokyo\u0027 AND year \u003d 2021;", + "sql_explanation": "This query retrieves the number of visitors who attended the \"Modern Art\" exhibition in Tokyo in 2021 from the Exhibition_Attendance table." +}, { + "id": "2884", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many exhibitions were held in Tokyo in July 2021?", + "sql_context": "CREATE TABLE Exhibitions (id INT, city VARCHAR(20), visitors INT, exhibition_date DATE); INSERT INTO Exhibitions (id, city, visitors, exhibition_date) VALUES (1, \u0027Tokyo\u0027, 50, \u00272021-07-01\u0027), (2, \u0027Tokyo\u0027, 60, \u00272021-07-05\u0027);", + "sql": "SELECT COUNT(*) FROM Exhibitions WHERE city \u003d \u0027Tokyo\u0027 AND exhibition_date BETWEEN \u00272021-07-01\u0027 AND \u00272021-07-31\u0027", + "sql_explanation": "We filter the records based on city and exhibition date in July 2021, then count the number of exhibitions held in Tokyo." +}, { + "id": "3038", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of digital museum experiences in the last month?", + "sql_context": "CREATE TABLE DigitalExperiences (experience_id INT, date DATE, revenue DECIMAL(10, 2)); INSERT INTO DigitalExperiences (experience_id, date, revenue) VALUES (1, \u00272022-02-10\u0027, 50.00), (2, \u00272022-03-11\u0027, 75.00), (3, \u00272022-04-12\u0027, 100.00);", + "sql": "SELECT COUNT(experience_id) FROM DigitalExperiences WHERE date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH);", + "sql_explanation": "This query selects the experience_id column from the DigitalExperiences table and filters for experiences in the last month. It then counts the number of digital museum experiences using the COUNT function." +}, { + "id": "3101", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average age of visitors who participated in online events in Argentina.", + "sql_context": "CREATE TABLE EventParticipants (event_id INT, country VARCHAR(20), participant_age INT, event_type VARCHAR(10)); INSERT INTO EventParticipants (event_id, country, participant_age, event_type) VALUES (1, \u0027Argentina\u0027, 25, \u0027Online\u0027), (2, \u0027Brazil\u0027, 30, \u0027Offline\u0027), (3, \u0027Chile\u0027, 35, \u0027Offline\u0027);", + "sql": "SELECT AVG(participant_age) FROM EventParticipants WHERE country \u003d \u0027Argentina\u0027 AND event_type \u003d \u0027Online\u0027;", + "sql_explanation": "This query calculates the average age of visitors who participated in online events in Argentina by filtering the EventParticipants table based on the country and event_type and then computing the average participant_age." +}, { + "id": "3200", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many community engagement events were organized in Canada in Q3 2021?", + "sql_context": "CREATE TABLE Community_Engagement_Events (id INT, country VARCHAR(255), quarter INT, number_of_events INT);", + "sql": "SELECT SUM(number_of_events) FROM Community_Engagement_Events WHERE country \u003d \u0027Canada\u0027 AND quarter \u003d 3;", + "sql_explanation": "Sum the number of community engagement events organized in Canada in Q3 2021." +}, { + "id": "3278", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What percentage of visitors identified as \u0027transgender\u0027 out of total visitors?", + "sql_context": "CREATE TABLE Visitors (visitor_id INT, exhibition_id INT, age INT, gender VARCHAR(50));", + "sql": "SELECT (COUNT(CASE WHEN gender \u003d \u0027transgender\u0027 THEN 1 END)/COUNT(*))*100 as percentage FROM Visitors;", + "sql_explanation": "This query calculates the percentage of visitors who identified as transgender out of total visitors by counting the number of visitors with gender equal to transgender and dividing it by the total number of visitors, then multiplying by 100 to get the percentage." +}, { + "id": "3553", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the total number of visitors from African countries.", + "sql_context": "CREATE TABLE Visitors (id INT, country VARCHAR(20), visitor_count INT); INSERT INTO Visitors (id, country, visitor_count) VALUES (1, \u0027Egypt\u0027, 100), (2, \u0027Nigeria\u0027, 200), (3, \u0027South Africa\u0027, 150), (4, \u0027USA\u0027, 250);", + "sql": "SELECT SUM(visitor_count) FROM Visitors WHERE country IN (\u0027Egypt\u0027, \u0027Nigeria\u0027, \u0027South Africa\u0027);", + "sql_explanation": "This query determines the total number of visitors from African countries by filtering the Visitors table based on the country and then summing the visitor_count." +}, { + "id": "3622", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum age of visitors who attended the exhibition \u0027The Art of the Silk Road\u0027?", + "sql_context": "CREATE TABLE exhibitions (id INT, city VARCHAR(20), visitor_age INT, visit_date DATE); INSERT INTO exhibitions (id, city, visitor_age, visit_date) VALUES (1, \u0027New York\u0027, 12, \u00272022-01-01\u0027); INSERT INTO exhibitions (id, city, visitor_age, visit_date) VALUES (2, \u0027Los Angeles\u0027, 15, \u00272022-02-15\u0027);", + "sql": "SELECT MIN(visitor_age) FROM exhibitions WHERE exhibition_name \u003d \u0027The Art of the Silk Road\u0027;", + "sql_explanation": "This query calculates the minimum age of visitors who attended the exhibition \u0027The Art of the Silk Road\u0027 by using the MIN() function on the visitor_age column and filtering the records by the exhibition_name column." +}, { + "id": "3882", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 3 most visited exhibitions in the digital museum", + "sql_context": "CREATE TABLE DigitalExhibitions (exhibition_id INT, exhibition_name VARCHAR(50), visitors INT); INSERT INTO DigitalExhibitions (exhibition_id, exhibition_name, visitors) VALUES (1, \u0027Nature\u0027, 25000), (2, \u0027Art\u0027, 30000), (3, \u0027History\u0027, 20000), (4, \u0027Science\u0027, 35000);", + "sql": "SELECT exhibition_name, visitors FROM DigitalExhibitions ORDER BY visitors DESC LIMIT 3;", + "sql_explanation": "The SQL query orders the exhibitions by the number of visitors in descending order and returns the top 3 rows." +}, { + "id": "4103", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of visitors who attended the \"Impressionist Exhibition\"?", + "sql_context": "CREATE TABLE Exhibitions (ExhibitionID INT, ExhibitionName VARCHAR(255), Age INT); INSERT INTO Exhibitions (ExhibitionID, ExhibitionName, Age) VALUES (1, \u0027Impressionist Exhibition\u0027, 35), (2, \u0027Modern Art Exhibition\u0027, 32);", + "sql": "SELECT AVG(Age) FROM Exhibitions WHERE ExhibitionName \u003d \u0027Impressionist Exhibition\u0027;", + "sql_explanation": "This SQL query calculates the average age of visitors who attended the \"Impressionist Exhibition\". It does this by using the AVG function on the Age column, but only for the rows where the ExhibitionName is \u0027Impressionist Exhibition\u0027." +}, { + "id": "4566", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of visitors for a single exhibition in Canada?", + "sql_context": "CREATE TABLE ExhibitionVisitors (exhibition_id INT, country VARCHAR(20), visitor_count INT); INSERT INTO ExhibitionVisitors (exhibition_id, country, visitor_count) VALUES (1, \u0027Canada\u0027, 100), (2, \u0027Canada\u0027, 120), (3, \u0027Mexico\u0027, 150);", + "sql": "SELECT MAX(visitor_count) FROM ExhibitionVisitors WHERE country \u003d \u0027Canada\u0027;", + "sql_explanation": "The query determines the maximum number of visitors for a single exhibition in Canada by filtering the ExhibitionVisitors table based on the country and then finding the maximum visitor_count." +}, { + "id": "4882", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum digital engagement for exhibitions in Tokyo?", + "sql_context": "CREATE TABLE ExhibitionDigitalEngagementTokyo (exhibition_id INT, city VARCHAR(50), digital_engagement INT); INSERT INTO ExhibitionDigitalEngagementTokyo (exhibition_id, city, digital_engagement) VALUES (1, \u0027Tokyo\u0027, 5000), (2, \u0027Tokyo\u0027, 7000), (3, \u0027Tokyo\u0027, 9000);", + "sql": "SELECT MAX(digital_engagement) FROM ExhibitionDigitalEngagementTokyo;", + "sql_explanation": "This SQL query calculates the maximum digital engagement for exhibitions in Tokyo. It does so by using the MAX function, which returns the maximum value in a set of values." +}, { + "id": "5043", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of museum members from the Asia-Pacific region?", + "sql_context": "CREATE TABLE members(member_id INT, name VARCHAR(50), age INT, member_region VARCHAR(50)); INSERT INTO members (member_id, name, age, member_region) VALUES (1, \u0027John Doe\u0027, 25, \u0027North America\u0027), (2, \u0027Jane Smith\u0027, 30, \u0027Europe\u0027), (3, \u0027Alice Johnson\u0027, 35, \u0027Asia-Pacific\u0027);", + "sql": "SELECT AVG(age) FROM members WHERE member_region \u003d \u0027Asia-Pacific\u0027;", + "sql_explanation": "This query calculates the average age of museum members from the Asia-Pacific region by finding the average value of the age column in the members table for rows with member_region equal to \u0027Asia-Pacific\u0027." +}, { + "id": "5103", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the average age of visitors who attended the Impressionist Art exhibition?", + "sql_context": "CREATE TABLE exhibitions (exhibition_id INT, name VARCHAR(255)); INSERT INTO exhibitions (exhibition_id, name) VALUES (1, \u0027Art of the Renaissance\u0027), (2, \u0027Modern Art\u0027), (3, \u0027Impressionist Art\u0027); CREATE TABLE visitors (visitor_id INT, exhibition_id INT, age INT); INSERT INTO visitors (visitor_id, exhibition_id, age) VALUES (1, 1, 25), (2, 1, 42), (3, 2, 28), (4, 3, 29), (5, 3, 22), (6, 3, 35);", + "sql": "SELECT AVG(age) as avg_age FROM visitors WHERE exhibition_id \u003d 3;", + "sql_explanation": "This query calculates the average age of visitors who attended the Impressionist Art exhibition. It does so by selecting the age column from the visitors table where the exhibition_id is 3 and then using the AVG() function to calculate the average age." +}, { + "id": "5156", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average exhibition rating in Madrid?", + "sql_context": "CREATE TABLE ExhibitionRatings (id INT, city VARCHAR(50), exhibition VARCHAR(50), rating INT);", + "sql": "SELECT AVG(rating) FROM ExhibitionRatings WHERE city \u003d \u0027Madrid\u0027;", + "sql_explanation": "This SQL query calculates the average exhibition rating (AVG(rating)) from the \"ExhibitionRatings\" table in Madrid." +}, { + "id": "5186", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum revenue generated in a single day for the \"Impressionist Art\" exhibition?", + "sql_context": "CREATE TABLE daily_revenue (date DATE, exhibition_id INT, revenue DECIMAL(5,2)); INSERT INTO daily_revenue (date, exhibition_id, revenue) VALUES (\u00272022-01-01\u0027, 3, 500.00), (\u00272022-01-02\u0027, 3, 600.00), (\u00272022-01-03\u0027, 4, 700.00);", + "sql": "SELECT MAX(revenue) FROM daily_revenue WHERE exhibition_id \u003d 3;", + "sql_explanation": "This query calculates the maximum revenue generated in a single day for the \"Impressionist Art\" exhibition by selecting MAX function on the revenue column where the exhibition_id is 3." +}, { + "id": "5531", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of visitors from \u0027India\u0027?", + "sql_context": "CREATE TABLE visitors (id INT, country VARCHAR(255), exhibition_id INT); INSERT INTO visitors (id, country, exhibition_id) VALUES (1, \u0027India\u0027, 1), (2, \u0027Brazil\u0027, 1), (3, \u0027Russia\u0027, 1), (4, \u0027India\u0027, 2), (5, \u0027Brazil\u0027, 2); CREATE TABLE exhibitions (id INT, name VARCHAR(255), type VARCHAR(255)); INSERT INTO exhibitions (id, name, type) VALUES (1, \u0027Contemporary Art\u0027, \u0027Modern\u0027), (2, \u0027Ancient Civilizations\u0027, \u0027Historical\u0027);", + "sql": "SELECT COUNT(*) FROM visitors WHERE country \u003d \u0027India\u0027;", + "sql_explanation": "This SQL query counts the total number of visitors from \u0027India\u0027 by using the COUNT(*) function and filtering for rows with a \u0027country\u0027 value of \u0027India\u0027." +}, { + "id": "5539", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many exhibitions are there in the \u0027Asian Art\u0027 category?", + "sql_context": "CREATE TABLE exhibitions (id INT, name VARCHAR(255), type VARCHAR(255)); INSERT INTO exhibitions (id, name, type) VALUES (1, \u0027Impressionism\u0027, \u0027Classic\u0027), (2, \u0027Surrealism\u0027, \u0027Modern\u0027), (3, \u0027Renaissance\u0027, \u0027Classic\u0027), (4, \u0027Asian Art: Landscapes\u0027, \u0027Asian\u0027), (5, \u0027Asian Art: Ceramics\u0027, \u0027Asian\u0027);", + "sql": "SELECT COUNT(*) FROM exhibitions WHERE type \u003d \u0027Asian\u0027;", + "sql_explanation": "This SQL query counts the number of exhibitions in the \u0027Asian Art\u0027 category by using the COUNT(*) function and filtering for rows with a \u0027type\u0027 value of \u0027Asian\u0027." +}, { + "id": "5732", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average visitor count for exhibitions?", + "sql_context": "CREATE TABLE Exhibitions (id INT, name TEXT, visitor_count INT); INSERT INTO Exhibitions (id, name, visitor_count) VALUES (1, \u0027Dinosaurs\u0027, 1000), (2, \u0027Egypt\u0027, 800);", + "sql": "SELECT AVG(visitor_count) FROM Exhibitions;", + "sql_explanation": "This query calculates the average visitor count for all exhibitions in the Exhibitions table." +}, { + "id": "949", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of certified sustainable tourism businesses in the Caribbean?", + "sql_context": "CREATE TABLE businesses (id INT, name VARCHAR(30), location VARCHAR(20), certified BOOLEAN); INSERT INTO businesses (id, name, location, certified) VALUES (1, \u0027Caribbean Eco Tours\u0027, \u0027Bahamas\u0027, TRUE), (2, \u0027Green Travel Jamaica\u0027, \u0027Jamaica\u0027, TRUE), (3, \u0027Eco Adventures\u0027, \u0027Puerto Rico\u0027, FALSE);", + "sql": "SELECT COUNT(*) FROM businesses WHERE certified \u003d TRUE AND location IN (\u0027Bahamas\u0027, \u0027Jamaica\u0027, \u0027Puerto Rico\u0027, \u0027Cuba\u0027, \u0027Dominican Republic\u0027, \u0027Barbados\u0027, \u0027Haiti\u0027, \u0027Trinidad and Tobago\u0027);", + "sql_explanation": "The query calculates the total number of certified sustainable tourism businesses in the Caribbean by filtering rows with \u0027TRUE\u0027 in the \u0027certified\u0027 column and Caribbean countries in the \u0027location\u0027 column using the IN operator. It then uses the COUNT() function to count the number of rows." +}, { + "id": "2128", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the \"destinations\" table with the following information: destination_id \u003d 102, destination_name \u003d \u0027Paris\u0027, region \u003d \u0027Europe\u0027, sustainable_practices_score \u003d 8.2", + "sql_context": "CREATE TABLE destinations (destination_id INT, destination_name VARCHAR(50), region VARCHAR(20), sustainable_practices_score DECIMAL(3,1), PRIMARY KEY (destination_id));", + "sql": "INSERT INTO destinations (destination_id, destination_name, region, sustainable_practices_score) VALUES (102, \u0027Paris\u0027, \u0027Europe\u0027, 8.2);", + "sql_explanation": "This query inserts a new record into the \"destinations\" table with the specified information. It uses the VALUES keyword to specify the new record\u0027s data." +}, { + "id": "2174", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total CO2 emission of flights from USA to India in 2020?", + "sql_context": "CREATE TABLE flights (flight_id INT, departure_date DATE, departure_country TEXT, arrival_country TEXT, co2_emission DECIMAL); INSERT INTO flights (flight_id, departure_date, departure_country, arrival_country, co2_emission) VALUES (1, \u00272020-01-01\u0027, \u0027USA\u0027, \u0027India\u0027, 100.00), (2, \u00272020-12-31\u0027, \u0027USA\u0027, \u0027India\u0027, 120.00);", + "sql": "SELECT SUM(co2_emission) FROM flights WHERE departure_country \u003d \u0027USA\u0027 AND arrival_country \u003d \u0027India\u0027 AND YEAR(departure_date) \u003d 2020;", + "sql_explanation": "This query calculates the total CO2 emission of flights from the USA to India in 2020 by filtering the flights table based on the departure_country, arrival_country, and departure_date columns, then calculating the total CO2 emission using the SUM function." +}, { + "id": "2226", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the destination_marketing table for a new marketing campaign in India focused on eco-tourism.", + "sql_context": "CREATE TABLE destination_marketing (campaign VARCHAR(255), country VARCHAR(255), focus VARCHAR(255)); INSERT INTO destination_marketing (campaign, country, focus) VALUES (\u0027Explore India\u0027, \u0027India\u0027, \u0027Eco-tourism\u0027);", + "sql": "INSERT INTO destination_marketing (campaign, country, focus) VALUES (\u0027Discover India Through Eco-Tourism\u0027, \u0027India\u0027, \u0027Eco-tourism\u0027);", + "sql_explanation": "This query inserts a new record into the destination_marketing table for a new marketing campaign in India focused on eco-tourism. It does this by using the INSERT INTO statement to add a new row to the destination_marketing table with the campaign name, country, and focus all specified." +}, { + "id": "2441", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the \"countries\" table for \u0027Bolivia\u0027", + "sql_context": "CREATE TABLE countries (id INT PRIMARY KEY, name TEXT, continent TEXT, area FLOAT, population INT);", + "sql": "INSERT INTO countries (id, name, continent, area, population) VALUES (123, \u0027Bolivia\u0027, \u0027South America\u0027, 1098581.0, 11673029);", + "sql_explanation": "This query inserts a new record into the \"countries\" table for \u0027Bolivia\u0027. It provides values for all columns: id, name, continent, area, and population." +}, { + "id": "2533", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of tourists visiting India from the USA since 2019?", + "sql_context": "CREATE TABLE tourism_flows (source_country VARCHAR(20), target_country VARCHAR(20), year INT, total_visitors INT); INSERT INTO tourism_flows (source_country, target_country, year, total_visitors) VALUES (\u0027India\u0027, \u0027USA\u0027, 2019, 25000), (\u0027India\u0027, \u0027USA\u0027, 2020, 15000), (\u0027India\u0027, \u0027USA\u0027, 2021, 20000);", + "sql": "SELECT SUM(total_visitors) FROM tourism_flows WHERE source_country \u003d \u0027India\u0027 AND target_country \u003d \u0027USA\u0027 AND year \u003e\u003d 2019;", + "sql_explanation": "The SQL query sums the total_visitors from the tourism_flows table where the source_country is India, the target_country is the USA, and the year is greater than or equal to 2019." +}, { + "id": "2634", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of sustainable tourism certifications in Africa?", + "sql_context": "CREATE TABLE sustainable_tourism_certifications (id INT, continent VARCHAR(255), certification_count INT); INSERT INTO sustainable_tourism_certifications (id, continent, certification_count) VALUES (1, \u0027Asia\u0027, 100), (2, \u0027Europe\u0027, 150), (3, \u0027Africa\u0027, 75), (4, \u0027North America\u0027, 200), (5, \u0027South America\u0027, 125), (6, \u0027Australia\u0027, 50);", + "sql": "SELECT certification_count as total_certifications FROM sustainable_tourism_certifications WHERE continent \u003d \u0027Africa\u0027;", + "sql_explanation": "This query retrieves the total number of sustainable tourism certifications in Africa." +}, { + "id": "2746", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of visitors to Greece from Canada in a single month?", + "sql_context": "CREATE TABLE visitor_stats (id INT PRIMARY KEY, visitor_country VARCHAR(50), year INT, month INT, num_visitors INT); INSERT INTO visitor_stats (id, visitor_country, year, month, num_visitors) VALUES (1, \u0027Canada\u0027, 2019, 6, 15000); INSERT INTO visitor_stats (id, visitor_country, year, month, num_visitors) VALUES (2, \u0027Canada\u0027, 2019, 9, 18000);", + "sql": "SELECT MAX(num_visitors) FROM visitor_stats WHERE visitor_country \u003d \u0027Canada\u0027 AND year \u003d 2019 AND month IS NOT NULL;", + "sql_explanation": "The SQL query finds the maximum number of visitors to Greece from Canada in a single month by using the MAX function on the \u0027num_visitors\u0027 column, filtering the data with the WHERE clause for the visitor country, year, and month." +}, { + "id": "2821", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 3 countries with the most tourists in 2025 based on current trends.", + "sql_context": "CREATE TABLE future_trends (country VARCHAR(50), year INT, projected_visitors INT); INSERT INTO future_trends (country, year, projected_visitors) VALUES (\u0027France\u0027, 2025, 25000000), (\u0027Spain\u0027, 2025, 20000000), (\u0027Italy\u0027, 2025, 18000000), (\u0027Japan\u0027, 2025, 16000000), (\u0027Germany\u0027, 2025, 15000000);", + "sql": "SELECT country, projected_visitors FROM future_trends WHERE year \u003d 2025 ORDER BY projected_visitors DESC LIMIT 3;", + "sql_explanation": "The SQL query lists the top 3 countries with the most projected visitors in 2025 by using the ORDER BY clause to sort the data in descending order of the projected number of visitors and the LIMIT clause to restrict the results to the top 3 records." +}, { + "id": "2868", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average trip duration for tourists visiting South Africa from Asia?", + "sql_context": "CREATE TABLE trip_duration (destination_country VARCHAR(50), visitor_country VARCHAR(50), avg_duration FLOAT); INSERT INTO trip_duration (destination_country, visitor_country, avg_duration) VALUES (\u0027South Africa\u0027, \u0027Asia\u0027, 10.5);", + "sql": "SELECT avg_duration FROM trip_duration WHERE destination_country \u003d \u0027South Africa\u0027 AND visitor_country \u003d \u0027Asia\u0027;", + "sql_explanation": "This query calculates the average trip duration for tourists visiting South Africa from Asia by selecting the avg_duration from the trip_duration table where the destination_country is South Africa and the visitor_country is Asia." +}, { + "id": "2894", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many international tourists visited Japan in the last 3 years?", + "sql_context": "CREATE TABLE tourists (tourist_id INT, visited_date DATE, country TEXT); INSERT INTO tourists (tourist_id, visited_date, country) VALUES (1, \u00272020-01-01\u0027, \u0027Japan\u0027), (2, \u00272019-05-05\u0027, \u0027USA\u0027), (3, \u00272018-12-31\u0027, \u0027Japan\u0027);", + "sql": "SELECT COUNT(*) FROM tourists WHERE country \u003d \u0027Japan\u0027 AND visited_date \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 3 YEAR);", + "sql_explanation": "This query counts the number of international tourists who visited Japan in the last 3 years by filtering the tourists table based on the country and visited_date columns, then counting the number of rows using the COUNT function." +}, { + "id": "2942", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of visitors to destinations in South America with sustainable tourism certifications?", + "sql_context": "CREATE TABLE destinations (name VARCHAR(255), country VARCHAR(255), continent VARCHAR(255), sustainable_certification BOOLEAN, visitors INT); INSERT INTO destinations (name, country, continent, sustainable_certification, visitors) VALUES (\u0027City J\u0027, \u0027Country J\u0027, \u0027South America\u0027, TRUE, 900000), (\u0027City K\u0027, \u0027Country K\u0027, \u0027South America\u0027, FALSE, 700000), (\u0027City L\u0027, \u0027Country L\u0027, \u0027South America\u0027, TRUE, 1000000);", + "sql": "SELECT AVG(visitors) FROM destinations WHERE continent \u003d \u0027South America\u0027 AND sustainable_certification \u003d TRUE;", + "sql_explanation": "This query calculates the average number of visitors to destinations in South America with sustainable tourism certifications (sustainable_certification \u003d TRUE) from the destinations table." +}, { + "id": "3369", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of tourists visiting Paris from the US in 2022?", + "sql_context": "CREATE TABLE tourism_data (id INT, name VARCHAR(50), country VARCHAR(50), age INT, visit_year INT); INSERT INTO tourism_data (id, name, country, age, visit_year) VALUES (1, \u0027John Doe\u0027, \u0027USA\u0027, 35, 2022), (2, \u0027Jane Smith\u0027, \u0027Canada\u0027, 28, 2022), (3, \u0027Mike Johnson\u0027, \u0027USA\u0027, 42, 2022);", + "sql": "SELECT AVG(age) FROM tourism_data WHERE country \u003d \u0027USA\u0027 AND visit_year \u003d 2022 AND name IS NOT NULL;", + "sql_explanation": "The SQL query calculates the average age of tourists from the USA who visited Paris (or any other destination represented in the tourism_data table) in 2022. It filters the table based on the country and visit_year, excludes NULL names (assuming the name column contains valid names), and then calculates the average age." +}, { + "id": "3372", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 3 sustainable destinations in Asia?", + "sql_context": "CREATE TABLE destinations (id INT, name VARCHAR(255), sustainability_score INT); INSERT INTO destinations (id, name, sustainability_score) VALUES (1, \u0027Japan\u0027, 90), (2, \u0027Thailand\u0027, 85), (3, \u0027Vietnam\u0027, 80), (4, \u0027India\u0027, 75), (5, \u0027China\u0027, 70);", + "sql": "SELECT name FROM destinations WHERE country IN (\u0027Asia\u0027) ORDER BY sustainability_score DESC LIMIT 3;", + "sql_explanation": "This query lists the top 3 sustainable destinations in Asia. The WHERE clause filters the results based on the country column and the ORDER BY clause sorts the results in descending order based on the sustainability_score column. The LIMIT clause restricts the number of results." +}, { + "id": "3394", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of destinations in Africa with sustainable tourism certifications", + "sql_context": "CREATE TABLE destinations (name VARCHAR(255), country VARCHAR(255), continent VARCHAR(255), sustainable_certification BOOLEAN); INSERT INTO destinations (name, country, continent, sustainable_certification) VALUES (\u0027City D\u0027, \u0027Country D\u0027, \u0027Africa\u0027, TRUE), (\u0027City E\u0027, \u0027Country E\u0027, \u0027Africa\u0027, FALSE), (\u0027City F\u0027, \u0027Country F\u0027, \u0027Africa\u0027, TRUE);", + "sql": "SELECT COUNT(*) FROM destinations WHERE continent \u003d \u0027Africa\u0027 AND sustainable_certification \u003d TRUE;", + "sql_explanation": "This query counts the number of destinations in the destinations table with sustainable tourism certifications (sustainable_certification \u003d TRUE) in Africa (continent \u003d \u0027Africa\u0027)." +}, { + "id": "3562", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the destination with the lowest carbon footprint in Asia.", + "sql_context": "CREATE TABLE IF NOT EXISTS carbon_footprint (id INT PRIMARY KEY, name TEXT, region TEXT, carbon_footprint FLOAT); INSERT INTO carbon_footprint (id, name, region, carbon_footprint) VALUES (1, \u0027EcoResort\u0027, \u0027Asia\u0027, 12.5), (2, \u0027GreenParadise\u0027, \u0027Asia\u0027, 11.3), (3, \u0027SustainableCity\u0027, \u0027Europe\u0027, 10.9);", + "sql": "SELECT name FROM carbon_footprint WHERE region \u003d \u0027Asia\u0027 ORDER BY carbon_footprint ASC LIMIT 1;", + "sql_explanation": "This SQL query retrieves the destination with the lowest carbon footprint in Asia by using the ORDER BY clause and the LIMIT keyword. It filters the carbon_footprint table for Asian destinations and sorts the results by carbon_footprint in ascending order." +}, { + "id": "3610", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of eco-lodges in Colombia, Costa Rica, and Ecuador, and how many of them have received a sustainability certification?", + "sql_context": "CREATE TABLE eco_lodges (country VARCHAR(50), certified INT); INSERT INTO eco_lodges (country, certified) VALUES (\u0027Colombia\u0027, 200), (\u0027Costa Rica\u0027, 300), (\u0027Ecuador\u0027, 250);", + "sql": "SELECT SUM(certified) FROM eco_lodges WHERE country IN (\u0027Colombia\u0027, \u0027Costa Rica\u0027, \u0027Ecuador\u0027);", + "sql_explanation": "This SQL query calculates the total number of eco-lodges in Colombia, Costa Rica, and Ecuador that have received a sustainability certification by summing the certified column values where the country is either \u0027Colombia\u0027, \u0027Costa Rica\u0027, or \u0027Ecuador\u0027." +}, { + "id": "3678", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average rating of accommodations in France that are not eco-friendly?", + "sql_context": "CREATE TABLE accommodations (id INT, name TEXT, country TEXT, rating FLOAT, is_eco_friendly BOOLEAN); INSERT INTO accommodations (id, name, country, rating, is_eco_friendly) VALUES (1, \u0027Hotel Laguna\u0027, \u0027France\u0027, 4.5, false), (2, \u0027Hotel Tropical\u0027, \u0027France\u0027, 3.2, true);", + "sql": "SELECT AVG(rating) FROM accommodations WHERE country \u003d \u0027France\u0027 AND is_eco_friendly \u003d false;", + "sql_explanation": "This query calculates the average rating of accommodations in France that are not eco-friendly. It does this by filtering the accommodations table to only include rows where the country is France and is_eco_friendly is false, and then calculating the average of the rating column." +}, { + "id": "3796", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of eco-certified accommodations in France", + "sql_context": "CREATE TABLE accommodations (id INT, country VARCHAR(50), is_eco_certified BOOLEAN); INSERT INTO accommodations (id, country, is_eco_certified) VALUES (1, \u0027France\u0027, TRUE), (2, \u0027Italy\u0027, FALSE);", + "sql": "SELECT COUNT(*) FROM accommodations WHERE country \u003d \u0027France\u0027 AND is_eco_certified \u003d TRUE;", + "sql_explanation": "This query counts the number of eco-certified accommodations in France by selecting all records with a country value of \u0027France\u0027 and is_eco_certified value of TRUE, then counting those records." +}, { + "id": "3909", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average CO2 emission per international flight arriving in Australia?", + "sql_context": "CREATE TABLE flight_emissions (flight_number VARCHAR(255), origin VARCHAR(255), destination VARCHAR(255), year INT, co2_emission INT); INSERT INTO flight_emissions (flight_number, origin, destination, year, co2_emission) VALUES (\u0027QF1\u0027, \u0027Los Angeles, USA\u0027, \u0027Sydney, Australia\u0027, 2015, 113000), (\u0027CX1\u0027, \u0027Hong Kong, China\u0027, \u0027Sydney, Australia\u0027, 2015, 97000);", + "sql": "SELECT AVG(co2_emission) FROM flight_emissions WHERE destination \u003d \u0027Sydney, Australia\u0027;", + "sql_explanation": "The SQL query calculates the average CO2 emission for all records in the flight_emissions table where the destination is \u0027Sydney, Australia\u0027." +}, { + "id": "4000", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of tourists visiting Yosemite National Park?", + "sql_context": "CREATE TABLE yosemite_visitors (id INT, name VARCHAR(50), age INT, nationality VARCHAR(50)); INSERT INTO yosemite_visitors (id, name, age, nationality) VALUES (1, \u0027John Doe\u0027, 35, \u0027American\u0027), (2, \u0027Jane Smith\u0027, 28, \u0027Canadian\u0027);", + "sql": "SELECT AVG(age) FROM yosemite_visitors WHERE nationality IN (\u0027American\u0027, \u0027Canadian\u0027);", + "sql_explanation": "This query calculates the average age of tourists visiting Yosemite National Park. It filters the data to only include visitors from America and Canada, as specified in the prompt. Then, it calculates the average age using the AVG function." +}, { + "id": "4044", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average hotel price in Sydney, Australia?", + "sql_context": "CREATE TABLE hotel_prices (id INT, city VARCHAR(20), country VARCHAR(20), price INT); INSERT INTO hotel_prices (id, city, country, price) VALUES (1, \u0027Sydney\u0027, \u0027Australia\u0027, 200), (2, \u0027Sydney\u0027, \u0027Australia\u0027, 250), (3, \u0027Melbourne\u0027, \u0027Australia\u0027, 150);", + "sql": "SELECT AVG(price) FROM hotel_prices WHERE city \u003d \u0027Sydney\u0027 AND country \u003d \u0027Australia\u0027;", + "sql_explanation": "The SQL query calculates the average hotel price in Sydney, Australia by selecting the AVG function on the price column, filtering the data where the city is Sydney and the country is Australia." +}, { + "id": "4219", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum rating of eco-friendly accommodations in Australia?", + "sql_context": "CREATE TABLE eco_accommodations_australia (id INT, country VARCHAR(50), rating DECIMAL(2,1)); INSERT INTO eco_accommodations_australia (id, country, rating) VALUES (1, \u0027Australia\u0027, 4.2), (2, \u0027Australia\u0027, 4.5), (3, \u0027Australia\u0027, 4.8);", + "sql": "SELECT MIN(rating) FROM eco_accommodations_australia WHERE country \u003d \u0027Australia\u0027;", + "sql_explanation": "This query calculates the minimum rating of eco-friendly accommodations in Australia by using the MIN function on the rating column, filtering the data for Australia using the WHERE clause." +}, { + "id": "4555", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of tourists visiting national parks in Canada and the US?", + "sql_context": "CREATE TABLE national_parks (country VARCHAR(20), name VARCHAR(50), visitors INT); INSERT INTO national_parks (country, name, visitors) VALUES (\u0027Canada\u0027, \u0027Banff\u0027, 4000000), (\u0027Canada\u0027, \u0027Jasper\u0027, 2500000), (\u0027US\u0027, \u0027Yosemite\u0027, 3000000), (\u0027US\u0027, \u0027Yellowstone\u0027, 4500000);", + "sql": "SELECT SUM(visitors) FROM national_parks WHERE country IN (\u0027Canada\u0027, \u0027US\u0027);", + "sql_explanation": "Summarize the total number of tourists visiting national parks in Canada and the US by summing the \u0027visitors\u0027 column for rows where \u0027country\u0027 is either \u0027Canada\u0027 or \u0027US\u0027." +}, { + "id": "4646", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of international tourists in the first quarter of each year?", + "sql_context": "CREATE TABLE tourism_stats (country VARCHAR(50), visitors INT, year INT, quarter INT); INSERT INTO tourism_stats (country, visitors, year, quarter) VALUES (\u0027Spain\u0027, 15, 2020, 1), (\u0027Germany\u0027, 18, 2020, 1), (\u0027Spain\u0027, 16, 2021, 1), (\u0027Germany\u0027, 19, 2021, 1);", + "sql": "SELECT AVG(visitors) as avg_visitors FROM tourism_stats WHERE quarter \u003d 1;", + "sql_explanation": "The SQL query calculates the average number of visitors for the first quarter of each year from the tourism_stats table." +}, { + "id": "4697", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the top 3 destinations for sustainable tourism in 2022.", + "sql_context": "CREATE TABLE destinations (id INT, name VARCHAR(255), sustainability_score INT); INSERT INTO destinations (id, name, sustainability_score) VALUES (1, \u0027Costa Rica\u0027, 90), (2, \u0027Norway\u0027, 85), (3, \u0027New Zealand\u0027, 80), (4, \u0027Iceland\u0027, 75), (5, \u0027Finland\u0027, 70);", + "sql": "SELECT name FROM destinations ORDER BY sustainability_score DESC LIMIT 3;", + "sql_explanation": "This query selects the top 3 destinations for sustainable tourism in 2022 based on their sustainability score. The ORDER BY clause sorts the results in descending order and the LIMIT clause restricts the number of results." +}, { + "id": "4731", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the travel_advisory table to set the status to \u0027Caution\u0027 for the record with the location \u0027Mexico\u0027", + "sql_context": "CREATE TABLE travel_advisory (location VARCHAR(255), status VARCHAR(255), last_updated DATE);", + "sql": "UPDATE travel_advisory SET status \u003d \u0027Caution\u0027 WHERE location \u003d \u0027Mexico\u0027;", + "sql_explanation": "* This query updates the travel_advisory table to set the status to \u0027Caution\u0027 for the record with the location \u0027Mexico\u0027. * It uses the UPDATE statement, specifies the travel_advisory table, and sets the status column value to \u0027Caution\u0027 for the record matching the location \u0027Mexico\u0027." +}, { + "id": "4734", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which countries had the highest visitor growth in 2021 compared to 2020?", + "sql_context": "CREATE TABLE visitor_growth (country VARCHAR(255), growth INT); INSERT INTO visitor_growth (country, growth) VALUES (\u0027Brazil\u0027, 250000), (\u0027India\u0027, 300000), (\u0027Indonesia\u0027, 400000), (\u0027Mexico\u0027, 200000), (\u0027South Korea\u0027, 350000), (\u0027United States\u0027, 150000);", + "sql": "SELECT country, growth FROM visitor_growth ORDER BY growth DESC LIMIT 5;", + "sql_explanation": "This query retrieves the five countries with the highest visitor growth in 2021 compared to 2020. It uses the ORDER BY clause to sort the results by growth in descending order and the LIMIT clause to limit the results to the top five." +}, { + "id": "5143", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum rating of accommodations in Brazil?", + "sql_context": "CREATE TABLE accommodations (id INT, name TEXT, country TEXT, rating FLOAT); INSERT INTO accommodations (id, name, country, rating) VALUES (1, \u0027Hotel Fasano Rio de Janeiro\u0027, \u0027Brazil\u0027, 4.7), (2, \u0027Belmond Copacabana Palace\u0027, \u0027Brazil\u0027, 4.6), (3, \u0027Hotel Emiliano Rio\u0027, \u0027Brazil\u0027, 4.5);", + "sql": "SELECT MIN(rating) FROM accommodations WHERE country \u003d \u0027Brazil\u0027;", + "sql_explanation": "This SQL query calculates the minimum rating of accommodations in Brazil by filtering the accommodations table with country set to Brazil, and then applying the MIN function to the rating column." +}, { + "id": "5168", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum arrival age of visitors from the United States?", + "sql_context": "CREATE TABLE tourism_data (visitor_id INT, country VARCHAR(50), arrival_age INT); INSERT INTO tourism_data (visitor_id, country, arrival_age) VALUES (1, \u0027USA\u0027, 35), (2, \u0027USA\u0027, 42), (3, \u0027Japan\u0027, 28), (4, \u0027Australia\u0027, 31), (5, \u0027UK\u0027, 29), (6, \u0027UK\u0027, 34), (7, \u0027Canada\u0027, 22), (8, \u0027Canada\u0027, 25); CREATE VIEW us_visitors AS SELECT * FROM tourism_data WHERE country \u003d \u0027USA\u0027;", + "sql": "SELECT MIN(arrival_age) FROM us_visitors WHERE country \u003d \u0027USA\u0027;", + "sql_explanation": "The SQL query calculates the minimum arrival age of visitors from the United States by using the MIN function on the arrival_age column of the us_visitors view, which filters the tourism_data table for US visitors." +}, { + "id": "5180", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all attractions in Sydney with a rating below 3.5.", + "sql_context": "CREATE TABLE attractions (id INT, name VARCHAR(50), city VARCHAR(20), rating FLOAT); INSERT INTO attractions (id, name, city, rating) VALUES (1, \u0027Opera House\u0027, \u0027Sydney\u0027, 4.6), (2, \u0027Bridge\u0027, \u0027Sydney\u0027, 3.8), (3, \u0027Tower\u0027, \u0027New York\u0027, 4.8);", + "sql": "DELETE FROM attractions WHERE city \u003d \u0027Sydney\u0027 AND rating \u003c 3.5;", + "sql_explanation": "This query deletes all attractions in Sydney with a rating below 3.5 by using the DELETE statement, filtering for rows where the city is \u0027Sydney\u0027 and the rating is less than 3.5." +}, { + "id": "5456", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue generated by tourism in Bali in 2020?", + "sql_context": "CREATE TABLE bali_tourism (id INT, year INT, revenue INT); INSERT INTO bali_tourism (id, year, revenue) VALUES (1, 2019, 10000000), (2, 2020, 5000000);", + "sql": "SELECT SUM(revenue) FROM bali_tourism WHERE year \u003d 2020;", + "sql_explanation": "This query calculates the total revenue generated by tourism in Bali in 2020. It filters the data to only include records where the year is 2020, and then uses the SUM function to calculate the total revenue from the matching records. This returns a single value representing the total revenue." +}, { + "id": "5561", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records in the travel_advisory table where the status is \u0027Caution\u0027", + "sql_context": "CREATE TABLE travel_advisory (location VARCHAR(255), status VARCHAR(255), last_updated DATE);", + "sql": "DELETE FROM travel_advisory WHERE status \u003d \u0027Caution\u0027;", + "sql_explanation": "* This query deletes all records in the travel_advisory table where the status is \u0027Caution\u0027. * It uses the DELETE statement and specifies the travel_advisory table and the condition to match rows for deletion." +}, { + "id": "5601", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average sustainability score for all destinations?", + "sql_context": "CREATE TABLE destinations (id INT, name VARCHAR(255), sustainability_score INT); INSERT INTO destinations (id, name, sustainability_score) VALUES (1, \u0027Costa Rica\u0027, 90), (2, \u0027Norway\u0027, 85), (3, \u0027New Zealand\u0027, 80), (4, \u0027Iceland\u0027, 75), (5, \u0027Finland\u0027, 70);", + "sql": "SELECT AVG(sustainability_score) FROM destinations;", + "sql_explanation": "This query calculates the average sustainability score for all destinations. The AVG function calculates the average sustainability_score." +}, { + "id": "5613", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of eco-tourism activities per country?", + "sql_context": "CREATE TABLE countries (country_name VARCHAR(50), eco_tourism_activities INT); INSERT INTO countries (country_name, eco_tourism_activities) VALUES (\u0027Costa Rica\u0027, 100), (\u0027Nepal\u0027, 80), (\u0027Bhutan\u0027, 60), (\u0027New Zealand\u0027, 120), (\u0027Galapagos\u0027, 150);", + "sql": "SELECT AVG(eco_tourism_activities) FROM countries;", + "sql_explanation": "Calculate the average number of eco-tourism activities per country by summing all eco-tourism activities and dividing by the count of countries." +}, { + "id": "5741", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records from the \u0027sustainable_tourism_practices\u0027 table.", + "sql_context": "CREATE TABLE sustainable_tourism_practices (id INT, title TEXT, description TEXT, country TEXT);", + "sql": "DELETE FROM sustainable_tourism_practices;", + "sql_explanation": "Deletes all records from the sustainable_tourism_practices table by using the DELETE statement without a WHERE clause." +}, { + "id": "5798", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum rating of any attraction?", + "sql_context": "CREATE TABLE attractions (id INT, name VARCHAR(50), city VARCHAR(20), rating FLOAT); INSERT INTO attractions (id, name, city, rating) VALUES (1, \u0027Opera House\u0027, \u0027Sydney\u0027, 4.6), (2, \u0027Bridge\u0027, \u0027Sydney\u0027, 3.8), (3, \u0027Tower\u0027, \u0027New York\u0027, 4.8);", + "sql": "SELECT MAX(rating) FROM attractions;", + "sql_explanation": "This query finds the maximum rating of any attraction by using the MAX function on the rating column, and returning the top value." +}, { + "id": "2918", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete any forest plots that are larger than 50 hectares and have a population of less than 500 for the associated species.", + "sql_context": "CREATE TABLE forest_plot (id INT PRIMARY KEY, size FLOAT, species_id INT, FOREIGN KEY (species_id) REFERENCES species(id)); CREATE TABLE species (id INT PRIMARY KEY, name VARCHAR(50), population INT);", + "sql": "DELETE FROM forest_plot fp USING species s WHERE fp.size \u003e 50 AND fp.species_id \u003d s.id AND s.population \u003c 500;", + "sql_explanation": "This query uses a delete statement with a USING clause to remove any forest plots that are larger than 50 hectares and have a population of less than 500 for the associated species. The USING clause is used to join the forest_plot table with the species table on the species_id column. The query then uses the WHERE clause to specify the conditions for deleting records from the forest_plot table." +}, { + "id": "3134", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum carbon sequestration in \u0027European Forests\u0027 in 2020?", + "sql_context": "CREATE TABLE EuropeanForests (region VARCHAR(20), year INT, carbon_sequestration FLOAT); INSERT INTO EuropeanForests (region, year, carbon_sequestration) VALUES (\u0027European Forests\u0027, 2015, 11.22), (\u0027European Forests\u0027, 2016, 22.33), (\u0027European Forests\u0027, 2017, 33.44), (\u0027European Forests\u0027, 2018, 44.55), (\u0027European Forests\u0027, 2019, 55.66), (\u0027European Forests\u0027, 2020, 66.77);", + "sql": "SELECT MIN(carbon_sequestration) FROM EuropeanForests WHERE region \u003d \u0027European Forests\u0027 AND year \u003d 2020;", + "sql_explanation": "This query retrieves the minimum carbon sequestration in \u0027European Forests\u0027 in 2020 by finding the minimum \u0027carbon_sequestration\u0027 value in the \u0027EuropeanForests\u0027 table that meets the specified conditions." +}, { + "id": "3510", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 5 tree species with the lowest carbon sequestration rate in the state_forests schema?", + "sql_context": "CREATE TABLE state_forests.carbon_sequestration (species VARCHAR(255), sequestration_rate DECIMAL(5,2));", + "sql": "SELECT species FROM state_forests.carbon_sequestration ORDER BY sequestration_rate ASC LIMIT 5;", + "sql_explanation": "This query lists the top 5 tree species with the lowest carbon sequestration rate in the state_forests schema by using the ORDER BY clause to sort the results by the sequestration_rate column in ascending order, and then using the LIMIT clause to only return the top 5 rows." +}, { + "id": "3640", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add a new record to the timber_production table for \u0027California\u0027 with an annual production of 2000", + "sql_context": "CREATE TABLE timber_production (id INT PRIMARY KEY, state TEXT, annual_production INT);", + "sql": "INSERT INTO timber_production (id, state, annual_production) VALUES (1, \u0027California\u0027, 2000);", + "sql_explanation": "* The SQL query creates a new record in the timber_production table for \u0027California\u0027 with an annual production of 2000.* The INSERT INTO statement is used to add the new data to the table.* The id field is automatically generated by the database." +}, { + "id": "4095", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all protected forests located in \u0027South America\u0027 with an area smaller than 600.", + "sql_context": "CREATE TABLE protected_forests_2 (id INT, name VARCHAR(50), area FLOAT, region VARCHAR(50)); INSERT INTO protected_forests_2 (id, name, area, region) VALUES (1, \u0027Sierra Forest\u0027, 550.0, \u0027South America\u0027), (2, \u0027Rainforest Reserve\u0027, 700.0, \u0027South America\u0027);", + "sql": "SELECT name FROM protected_forests_2 WHERE area \u003c 600 AND region \u003d \u0027South America\u0027;", + "sql_explanation": "The SQL query lists all protected forests located in \u0027South America\u0027 with an area smaller than 600 by selecting the \u0027name\u0027 column in the \u0027protected_forests_2\u0027 table where the \u0027area\u0027 is smaller than 600 and the \u0027region\u0027 is \u0027South America\u0027." +}, { + "id": "4661", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Provide the number of wildlife species in the \u0027Asian\u0027 region.", + "sql_context": "CREATE TABLE wildlife_species (region VARCHAR(255), species INT); INSERT INTO wildlife_species (region, species) VALUES (\u0027Asian\u0027, 500), (\u0027African\u0027, 400), (\u0027European\u0027, 300), (\u0027Australian\u0027, 600);", + "sql": "SELECT region, SUM(species) FROM wildlife_species WHERE region \u003d \u0027Asian\u0027;", + "sql_explanation": "This query provides the number of wildlife species in the \u0027Asian\u0027 region by summing the \u0027species\u0027 column where the \u0027region\u0027 is equal to \u0027Asian\u0027." +}, { + "id": "4810", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all the wildlife species that inhabit coniferous forests.", + "sql_context": "CREATE TABLE wildlife_habitat (species VARCHAR(255), forest_type VARCHAR(255));", + "sql": "SELECT species FROM wildlife_habitat WHERE forest_type \u003d \u0027coniferous\u0027;", + "sql_explanation": "The SQL query lists all the wildlife species that inhabit coniferous forests by using the WHERE clause to filter the results." +}, { + "id": "4841", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total carbon sequestration of all forests in the \u0027Amazon\u0027 region?", + "sql_context": "CREATE TABLE forests (id INT, name TEXT, area FLOAT, region TEXT, carbon_sequestration FLOAT); INSERT INTO forests (id, name, area, region, carbon_sequestration) VALUES (1, \u0027Amazon Rainforest\u0027, 6785845.2, \u0027Amazon\u0027, 123456.7), (2, \u0027Atlantic Forest\u0027, 123456.7, \u0027Amazon\u0027, 23456.7);", + "sql": "SELECT SUM(carbon_sequestration) FROM forests WHERE region \u003d \u0027Amazon\u0027;", + "sql_explanation": "This query calculates the total carbon sequestration of all forests in the \u0027Amazon\u0027 region by summing the \u0027carbon_sequestration\u0027 column in the \u0027forests\u0027 table where \u0027region\u0027 is \u0027Amazon\u0027." +}, { + "id": "5018", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the name of Habitat1 to \u0027New Habitat Name\u0027 in the wildlife habitat table.", + "sql_context": "CREATE TABLE wildlife_habitat (id INT, name VARCHAR(50), area FLOAT); INSERT INTO wildlife_habitat (id, name, area) VALUES (1, \u0027Habitat1\u0027, 150.3), (2, \u0027Habitat2\u0027, 250.8), (3, \u0027Habitat3\u0027, 175.5);", + "sql": "UPDATE wildlife_habitat SET name \u003d \u0027New Habitat Name\u0027 WHERE id \u003d 1;", + "sql_explanation": "This query modifies the \u0027wildlife_habitat\u0027 table by changing the name of the record with id 1 to \u0027New Habitat Name\u0027." +}, { + "id": "5334", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "list all wildlife habitats with their respective areas in descending order", + "sql_context": "CREATE SCHEMA forestry; CREATE TABLE habitats (id INT, name VARCHAR(50), area FLOAT); INSERT INTO habitats (id, name, area) VALUES (1, \u0027deer\u0027, 25.3), (2, \u0027wolf\u0027, 18.2), (3, \u0027bear\u0027, 31.5);", + "sql": "SELECT name, area FROM forestry.habitats ORDER BY area DESC;", + "sql_explanation": "This query retrieves the name and area columns from the habitats table within the forestry schema and sorts the results by the area column in descending order. The result is a list of wildlife habitats with their respective areas, sorted from largest to smallest." +}, { + "id": "5486", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get the number of trees in the \u0027old_growth_forest\u0027 table older than 200 years.", + "sql_context": "CREATE TABLE old_growth_forest (id INT, species VARCHAR(50), age INT);", + "sql": "SELECT COUNT(*) FROM old_growth_forest WHERE age \u003e 200;", + "sql_explanation": "This query calculates the number of trees in the \u0027old_growth_forest\u0027 table that are older than 200 years by using the WHERE clause and the COUNT function." +}, { + "id": "5623", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many species of wildlife inhabit \u0027Asian Forests\u0027?", + "sql_context": "CREATE TABLE Asian_Forests (species VARCHAR(50)); INSERT INTO Asian_Forests (species) VALUES (\u0027Tiger\u0027), (\u0027Leopard\u0027), (\u0027Elephant\u0027), (\u0027Rhinoceros\u0027), (\u0027Pangolin\u0027), (\u0027Monkey\u0027);", + "sql": "SELECT COUNT(DISTINCT species) FROM Asian_Forests;", + "sql_explanation": "Count the number of distinct species of wildlife in \u0027Asian Forests\u0027." +}, { + "id": "5627", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the current population of the \u0027Oak\u0027 species?", + "sql_context": "CREATE TABLE species (id INT PRIMARY KEY, name VARCHAR(255), population INT); INSERT INTO species (id, name, population) VALUES (1, \u0027Spruce\u0027, 5000000); INSERT INTO species (id, name, population) VALUES (2, \u0027Pine\u0027, 6000000); INSERT INTO species (id, name, population) VALUES (3, \u0027Oak\u0027, 4000000);", + "sql": "SELECT population FROM species WHERE name \u003d \u0027Oak\u0027;", + "sql_explanation": "Select the population column from the species table where the name is \u0027Oak\u0027." +}, { + "id": "5677", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all wildlife habitats that have an area greater than 60", + "sql_context": "CREATE TABLE wildlife_habitat (id INT, name VARCHAR(50), area FLOAT); INSERT INTO wildlife_habitat (id, name, area) VALUES (1, \u0027Habitat1\u0027, 50.23); INSERT INTO wildlife_habitat (id, name, area) VALUES (2, \u0027Habitat2\u0027, 75.64); INSERT INTO wildlife_habitat (id, name, area) VALUES (3, \u0027Habitat3\u0027, 85.34);", + "sql": "SELECT * FROM wildlife_habitat WHERE area \u003e 60;", + "sql_explanation": "This SQL query retrieves all rows from the \u0027wildlife_habitat\u0027 table where the \u0027area\u0027 column is greater than 60." +}, { + "id": "1129", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert records in the cargo_tracking table for vessel \"Sky Dancer\" with the following data: (2022-03-15, \u0027Los Angeles\u0027, \u0027New York\u0027, 1200)", + "sql_context": "CREATE TABLE cargo_tracking ( voyage_date DATE, departure_port VARCHAR(255), destination_port VARCHAR(255), cargo_weight INT, vessel_name VARCHAR(255));", + "sql": "INSERT INTO cargo_tracking (voyage_date, departure_port, destination_port, cargo_weight, vessel_name) VALUES (\u00272022-03-15\u0027, \u0027Los Angeles\u0027, \u0027New York\u0027, 1200, \u0027Sky Dancer\u0027);", + "sql_explanation": "This query inserts records in the cargo_tracking table for vessel \"Sky Dancer\" with the following data: March 15th, 2022, Los Angeles, New York, and 1200 cargo weight." +}, { + "id": "1810", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record for vessel \u0027VesselI\u0027 with a speed of 16.5 knots, located near the coast of Greece on October 1, 2021.", + "sql_context": "CREATE TABLE vessel_performance (id INT PRIMARY KEY, name VARCHAR(50), type VARCHAR(50), speed FLOAT, location VARCHAR(50), timestamp DATETIME);", + "sql": "INSERT INTO vessel_performance (name, type, speed, location, timestamp) VALUES (\u0027VesselI\u0027, \u0027Cargo\u0027, 16.5, \u0027Greece Coast\u0027, \u00272021-10-01 10:00:00\u0027);", + "sql_explanation": "This query inserts a new record for vessel \u0027VesselI\u0027 with a speed of 16.5 knots, located near the coast of Greece on October 1, 2021. It does this by specifying the values for each column in the vessel_performance table." +}, { + "id": "2063", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of vessels that docked in the port of San Francisco in the last week with a speed greater than 25 knots?", + "sql_context": "CREATE TABLE Vessels (ID INT, Name TEXT, Speed FLOAT, DockedAt DATETIME); INSERT INTO Vessels (ID, Name, Speed, DockedAt) VALUES (1, \u0027Vessel1\u0027, 20.5, \u00272022-01-01 10:00:00\u0027), (2, \u0027Vessel2\u0027, 25.3, \u00272022-01-05 14:30:00\u0027), (3, \u0027Vessel3\u0027, 30.0, \u00272022-02-01 11:00:00\u0027); CREATE TABLE Ports (ID INT, Name TEXT); INSERT INTO Ports (ID, Name) VALUES (1, \u0027Oakland\u0027), (2, \u0027San_Francisco\u0027);", + "sql": "SELECT COUNT(*) FROM Vessels WHERE Speed \u003e 25 AND DockedAt \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 WEEK) AND Ports.Name \u003d \u0027San_Francisco\u0027;", + "sql_explanation": "Count the number of vessels that docked in the San Francisco port in the past week with a speed greater than 25 knots by joining the Vessels and Ports tables based on the DockedAt column and the Name column respectively, and applying the COUNT function on the Vessels table." +}, { + "id": "2118", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many incidents were reported for VesselB in the last year?", + "sql_context": "CREATE TABLE incidents(id INT, vessel_id INT, incident_date DATE); INSERT INTO incidents VALUES (1, 2, \u00272021-09-15\u0027), (2, 2, \u00272022-02-03\u0027);", + "sql": "SELECT COUNT(*) FROM incidents WHERE vessel_id \u003d 2 AND incident_date BETWEEN DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) AND CURRENT_DATE;", + "sql_explanation": "Count the number of incidents reported for VesselB (id\u003d2) in the last year by filtering records between the current date minus 1 year and the current date." +}, { + "id": "2385", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average cargo weight for vessels in the Arctic region?", + "sql_context": "CREATE TABLE cargo_data (id INT, vessel_name TEXT, cargo_weight INT, region TEXT, loading_date DATE);", + "sql": "SELECT AVG(cargo_weight) FROM cargo_data WHERE region \u003d \u0027Arctic\u0027 AND loading_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH);", + "sql_explanation": "This SQL query calculates the average cargo weight for vessels in the Arctic region in the past month. It does this by filtering the rows with the WHERE clause based on the region and loading_date. The AVG function is then applied to the cargo_weight column to calculate the average." +}, { + "id": "2865", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Count the number of cargos loaded in the US region in 2021.", + "sql_context": "CREATE TABLE CargoTracking (CargoID INT, LoadDate DATE, LoadLocation VARCHAR(50)); INSERT INTO CargoTracking (CargoID, LoadDate, LoadLocation) VALUES (1, \u00272021-01-01\u0027, \u0027New York\u0027), (2, \u00272021-02-15\u0027, \u0027Miami\u0027), (3, \u00272020-12-31\u0027, \u0027Los Angeles\u0027);", + "sql": "SELECT COUNT(CargoID) FROM CargoTracking WHERE EXTRACT(YEAR FROM LoadDate) \u003d 2021 AND LoadLocation LIKE \u0027USA%\u0027;", + "sql_explanation": "Count the number of records in the CargoTracking table where the LoadDate\u0027s year is 2021 and LoadLocation starts with \u0027USA\u0027." +}, { + "id": "3679", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average cargo weight of vessels that had accidents and were registered in Panama in 2018?", + "sql_context": "CREATE TABLE Vessels (ID INT, Name TEXT, Cargo_Weight INT, Accidents INT, Registered_Country TEXT, Year INT);CREATE VIEW Panama_Registered_Vessels AS SELECT * FROM Vessels WHERE Registered_Country \u003d \u0027Panama\u0027;", + "sql": "SELECT AVG(Cargo_Weight) FROM Panama_Registered_Vessels WHERE Accidents \u003e 0 AND Year \u003d 2018;", + "sql_explanation": "Average the Cargo_Weight column for records in Panama_Registered_Vessels view with Accidents greater than 0 and Year equal to 2018." +}, { + "id": "4300", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many vessels were there in the maritime domain as of 2022-01-01?", + "sql_context": "CREATE TABLE Port (id INT, name TEXT, registration_date DATE); INSERT INTO Port (id, name, registration_date) VALUES (1, \u0027A\u0027, \u00272021-01-01\u0027), (2, \u0027B\u0027, \u00272022-01-01\u0027), (3, \u0027C\u0027, \u00272020-01-01\u0027); CREATE TABLE Vessel (id INT, name TEXT, registration_date DATE, port_id INT); INSERT INTO Vessel (id, name, registration_date, port_id) VALUES (1, \u0027V1\u0027, \u00272021-01-01\u0027, 1), (2, \u0027V2\u0027, \u00272022-01-01\u0027, 1), (3, \u0027V3\u0027, \u00272022-01-01\u0027, 2), (4, \u0027V4\u0027, \u00272020-01-01\u0027, 3);", + "sql": "SELECT COUNT(DISTINCT name) FROM Vessel WHERE registration_date \u003c\u003d \u00272022-01-01\u0027;", + "sql_explanation": "Counts the number of vessels that were registered on or before 2022-01-01 by filtering the \u0027registration_date\u0027 column and counting distinct vessel names." +}, { + "id": "4427", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total cargo weight loaded in the Brazilian region?", + "sql_context": "CREATE TABLE CargoTracking (CargoID INT, LoadDate DATE, LoadLocation VARCHAR(50), CargoWeight INT); INSERT INTO CargoTracking (CargoID, LoadDate, LoadLocation, CargoWeight) VALUES (1, \u00272021-01-01\u0027, \u0027Sao Paulo\u0027, 850), (2, \u00272021-02-15\u0027, \u0027Rio de Janeiro\u0027, 900), (3, \u00272021-12-31\u0027, \u0027Brasilia\u0027, 750);", + "sql": "SELECT SUM(CargoWeight) FROM CargoTracking WHERE LoadLocation LIKE \u0027Brazil%\u0027;", + "sql_explanation": "Calculate the sum of the CargoWeight column, but only for rows where LoadLocation starts with \u0027Brazil\u0027." +}, { + "id": "4446", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the average speed of vessel SS-V002 to 22 knots", + "sql_context": "vessel_performance(vessel_id, max_speed, average_speed)", + "sql": "UPDATE vessel_performance SET average_speed \u003d 22 WHERE vessel_id \u003d \u0027SS-V002\u0027;", + "sql_explanation": "This SQL query updates the average speed of vessel SS-V002 to 22 knots in the vessel_performance table." +}, { + "id": "4482", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all vessels in the \u0027vessel_summary\u0027 view that have a NULL safety_inspection_date", + "sql_context": "CREATE TABLE IF NOT EXISTS cargo (id INT PRIMARY KEY, vessel_name VARCHAR(255), average_speed DECIMAL(5,2)); CREATE TABLE IF NOT EXISTS vessel_safety (id INT PRIMARY KEY, vessel_name VARCHAR(255), safety_inspection_date DATE); INSERT INTO cargo (id, vessel_name, average_speed) VALUES (1, \u0027Poseidon\u0027, 26.3), (2, \u0027Oceanus\u0027, 28.1), (3, \u0027Neptune\u0027, 22.9), (4, \u0027Atlantis\u0027, 30.5), (5, \u0027Aquarius\u0027, 24.7); INSERT INTO vessel_safety (id, vessel_name, safety_inspection_date) VALUES (1, \u0027SS Great Britain\u0027, \u00272021-03-15\u0027), (2, \u0027Queen Mary 2\u0027, \u00272021-06-23\u0027), (3, \u0027Titanic\u0027, \u00272021-09-11\u0027), (4, \u0027Canberra\u0027, \u00272020-12-10\u0027), (5, \u0027France\u0027, \u00272020-08-18\u0027), (6, \u0027HMCS Haida\u0027, NULL); CREATE VIEW vessel_summary AS SELECT cargo.vessel_name, cargo.average_speed, vessel_safety.safety_inspection_date FROM cargo INNER JOIN vessel_safety ON cargo.vessel_name \u003d vessel_safety.vessel_name;", + "sql": "SELECT vessel_name FROM vessel_summary WHERE safety_inspection_date IS NULL;", + "sql_explanation": "This query lists all vessels in the \u0027vessel_summary\u0027 view that have a NULL safety_inspection_date." +}, { + "id": "4828", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records in the Vessel table that have not been serviced in the last 18 months", + "sql_context": "CREATE TABLE Vessel (Id INT PRIMARY KEY, Name VARCHAR(50), Type VARCHAR(50), LastService DATETIME);", + "sql": "DELETE FROM Vessel WHERE LastService \u003c DATEADD(MONTH, -18, GETDATE());", + "sql_explanation": "1. Filter records where LastService is older than 18 months by subtracting 18 months from the current date. 2. Delete all records that satisfy the condition." +}, { + "id": "4867", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of vessels with a construction country of \u0027China\u0027?", + "sql_context": "CREATE TABLE Vessel_Age (ID INT, Vessel_Name VARCHAR(50), Construction_Country VARCHAR(50), Age INT); INSERT INTO Vessel_Age (ID, Vessel_Name, Construction_Country, Age) VALUES (1, \u0027Vessel1\u0027, \u0027China\u0027, 10); INSERT INTO Vessel_Age (ID, Vessel_Name, Construction_Country, Age) VALUES (2, \u0027Vessel2\u0027, \u0027Japan\u0027, 8); INSERT INTO Vessel_Age (ID, Vessel_Name, Construction_Country, Age) VALUES (3, \u0027Vessel3\u0027, \u0027China\u0027, 12);", + "sql": "SELECT AVG(Age) FROM Vessel_Age WHERE Construction_Country \u003d \u0027China\u0027;", + "sql_explanation": "This query calculates the average age of vessels with a construction country of \u0027China\u0027 by using the AVG function. It considers only the rows where the Construction_Country is \u0027China\u0027." +}, { + "id": "5141", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total cargo weight for Vessel A?", + "sql_context": "CREATE TABLE Vessels (VesselID varchar(10), CargoWeight int); INSERT INTO Vessels (VesselID, CargoWeight) VALUES (\u0027VesselA\u0027, 5000), (\u0027VesselB\u0027, 7000);", + "sql": "SELECT SUM(CargoWeight) FROM Vessels WHERE VesselID \u003d \u0027VesselA\u0027;", + "sql_explanation": "This query calculates the total cargo weight for Vessel A by summing the CargoWeight values in the Vessels table where the VesselID is \u0027VesselA\u0027." +}, { + "id": "5160", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the average year of construction for the \"Cargo\" vessel type from the \"vessels_summary\" view.", + "sql_context": "CREATE TABLE vessels (vessel_id INT, name VARCHAR(50), type VARCHAR(50), year_built INT); CREATE VIEW vessels_summary AS SELECT type, AVG(year_built) AS avg_year_built FROM vessels GROUP BY type;", + "sql": "SELECT avg_year_built FROM vessels_summary WHERE type \u003d \u0027Cargo\u0027;", + "sql_explanation": "1. This statement queries the \"vessels_summary\" view. 2. It filters the records by \"type\" equal to \"Cargo\". 3. It displays the average year of construction (\"avg_year_built\") for the \"Cargo\" vessel type." +}, { + "id": "5198", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average speed of all passenger vessels?", + "sql_context": "CREATE TABLE Vessels (ID VARCHAR(20), Name VARCHAR(20), Type VARCHAR(20), AverageSpeed FLOAT); INSERT INTO Vessels VALUES (\u0027V009\u0027, \u0027Vessel I\u0027, \u0027Passenger\u0027, 22.5), (\u0027V010\u0027, \u0027Vessel J\u0027, \u0027Passenger\u0027, 25.0), (\u0027V011\u0027, \u0027Vessel K\u0027, \u0027Cargo\u0027, 15.5);", + "sql": "SELECT AVG(AverageSpeed) FROM Vessels WHERE Type \u003d \u0027Passenger\u0027;", + "sql_explanation": "* This SQL query selects the average of the AverageSpeed column * From the Vessels table * Where the Type is \u0027Passenger\u0027 * This will return the average speed of all passenger vessels." +}, { + "id": "5239", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum cargo weight for vessels that have a cargo weight greater than 5000 tons?", + "sql_context": "CREATE TABLE Vessels (ID INT PRIMARY KEY, Name TEXT, CargoWeight FLOAT); INSERT INTO Vessels (ID, Name, CargoWeight) VALUES (1, \u0027Cargo Ship 1\u0027, 5500), (2, \u0027Cargo Ship 2\u0027, 7000), (3, \u0027Cargo Ship 3\u0027, 4800);", + "sql": "SELECT MAX(CargoWeight) FROM Vessels WHERE CargoWeight \u003e 5000;", + "sql_explanation": "Calculate the maximum (MAX) of the CargoWeight column for records where CargoWeight is greater than 5000." +}, { + "id": "5664", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records from the \"accidents\" table where the date is after 2019-01-01", + "sql_context": "CREATE TABLE accidents (id INT, vessel_id INT, date DATE, description TEXT, severity INT);", + "sql": "DELETE FROM accidents WHERE date \u003e \u00272019-01-01\u0027;", + "sql_explanation": "This query deletes all records in the \"accidents\" table where the date is after 2019-01-01." +}, { + "id": "5735", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "Display vessels with a maximum speed of over 30 knots and their corresponding details.", + "sql_context": "CREATE TABLE vessels (vessel_id INT, vessel_name VARCHAR(255), length INT, year_built INT, max_speed FLOAT);", + "sql": "SELECT * FROM vessels WHERE max_speed \u003e 30;", + "sql_explanation": "The SQL query filters out any vessels with a \"max_speed\" less than or equal to 30 knots, leaving only vessels with a maximum speed over 30 knots and their corresponding details." +}, { + "id": "1670", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many startups were founded by underrepresented racial groups in 2020?", + "sql_context": "CREATE TABLE startups(id INT, name TEXT, founding_year INT, founder_race TEXT); INSERT INTO startups VALUES (1, \u0027Acme Inc\u0027, 2018, \u0027Asian\u0027); INSERT INTO startups VALUES (2, \u0027Beta Corp\u0027, 2019, \u0027White\u0027); INSERT INTO startups VALUES (3, \u0027Gamma Start\u0027, 2020, \u0027Black\u0027); INSERT INTO startups VALUES (4, \u0027Delta Initiative\u0027, 2020, \u0027Latinx\u0027);", + "sql": "SELECT COUNT(*) FROM startups WHERE founding_year \u003d 2020 AND founder_race IN (\u0027Black\u0027, \u0027Latinx\u0027, \u0027Indigenous\u0027, \u0027Native Hawaiian\u0027, \u0027Pacific Islander\u0027);", + "sql_explanation": "Counts the number of startups founded by underrepresented racial groups in 2020 by selecting all records from the startups table where the founding year is 2020 and the founder\u0027s race is in the set of underrepresented racial groups. The COUNT function then returns the number of rows in the resulting table." +}, { + "id": "1771", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of employees for startups founded by immigrants from Asia in the e-commerce sector?", + "sql_context": "CREATE TABLE companies (id INT, name TEXT, industry TEXT, founding_year INT, founder_immigration_status TEXT, num_employees INT); INSERT INTO companies (id, name, industry, founding_year, founder_immigration_status, num_employees) VALUES (1, \u0027ShopEase\u0027, \u0027E-commerce\u0027, 2019, \u0027Immigrant\u0027, 50); INSERT INTO companies (id, name, industry, founding_year, founder_immigration_status, num_employees) VALUES (2, \u0027MarketFlex\u0027, \u0027E-commerce\u0027, 2018, \u0027Non-immigrant\u0027, 75);", + "sql": "SELECT AVG(companies.num_employees) FROM companies WHERE companies.industry \u003d \u0027E-commerce\u0027 AND companies.founder_immigration_status \u003d \u0027Immigrant\u0027;", + "sql_explanation": "The SQL query calculates the average number of employees for startups founded by immigrants from Asia in the e-commerce sector by filtering the records based on the industry and founder_immigration_status columns and then calculating the average num_employees column." +}, { + "id": "1927", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the description of the racial diversity metric in the \"diversity_metrics\" table", + "sql_context": "CREATE TABLE diversity_metrics (metric_id INT PRIMARY KEY, name TEXT, description TEXT); INSERT INTO diversity_metrics (metric_id, name, description) VALUES (1, \u0027Gender diversity\u0027, \u0027Percentage of employees who identify as female or male\u0027); INSERT INTO diversity_metrics (metric_id, name, description) VALUES (2, \u0027Racial diversity\u0027, \u0027Percentage of employees who identify as a race other than Caucasian\u0027);", + "sql": "UPDATE diversity_metrics SET description \u003d \u0027Percentage of employees who identify as a race other than White\u0027 WHERE name \u003d \u0027Racial diversity\u0027;", + "sql_explanation": "This query updates the description of the racial diversity metric in the \"diversity_metrics\" table. It does this by using the WHERE clause to select the record where the \"name\" is \"Racial diversity\" and then updating the \"description\" of this record using the SET clause and the UPDATE statement." +}, { + "id": "2432", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average funding amount for startups founded by women in the \u0027Green\u0027 industry?", + "sql_context": "CREATE TABLE startups (id INT, name VARCHAR(255), founding_year INT, founder_gender VARCHAR(10), industry VARCHAR(255), average_funding INT); INSERT INTO startups (id, name, founding_year, founder_gender, industry, average_funding) VALUES (1, \u0027Foxtrot Corp\u0027, 2016, \u0027Female\u0027, \u0027Green\u0027, 600000), (2, \u0027Gamma Inc\u0027, 2018, \u0027Male\u0027, \u0027Green\u0027, 800000);", + "sql": "SELECT AVG(startups.average_funding) FROM startups WHERE startups.founder_gender \u003d \u0027Female\u0027 AND startups.industry \u003d \u0027Green\u0027;", + "sql_explanation": "The query filters the startups table to only include rows where the founder_gender column is equal to \u0027Female\u0027 and the industry column is equal to \u0027Green\u0027. It then applies the AVG function to the average_funding column in the filtered result to find the average funding amount for startups founded by women in the Green industry." +}, { + "id": "3028", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the names of all startups founded by individuals from Oceania in the transportation sector with funding over 5 million dollars.", + "sql_context": "CREATE TABLE startups (id INT, name TEXT, industry TEXT, founders TEXT, funding FLOAT); INSERT INTO startups (id, name, industry, founders, funding) VALUES (1, \u0027OceaniaTrans\u0027, \u0027Transportation\u0027, \u0027Oceania\u0027, 7000000);", + "sql": "SELECT name FROM startups WHERE industry \u003d \u0027Transportation\u0027 AND founders \u003d \u0027Oceania\u0027 AND funding \u003e 5000000;", + "sql_explanation": "This SQL query lists the names of all startups founded by individuals from Oceania in the transportation sector with funding over 5 million dollars by selecting the name column for rows where the industry is Transportation, founders is Oceania, and funding is greater than 5000000." +}, { + "id": "3508", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total funding received by startups founded by women in the healthcare sector?", + "sql_context": "CREATE TABLE companies (id INT, name TEXT, industry TEXT, founder_gender TEXT, funding FLOAT);", + "sql": "SELECT SUM(funding) FROM companies WHERE founder_gender \u003d \u0027female\u0027 AND industry \u003d \u0027healthcare\u0027;", + "sql_explanation": "The SQL query calculates the total funding received by startups founded by women in the healthcare sector by summing up the \u0027funding\u0027 column in the \u0027companies\u0027 table, filtering for rows where \u0027founder_gender\u0027 is \u0027female\u0027 and \u0027industry\u0027 is \u0027healthcare\u0027." +}, { + "id": "3554", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average funding amount for startups founded by people with disabilities in the USA?", + "sql_context": "CREATE TABLE startups (id INT, name TEXT, location TEXT, founder_disability BOOLEAN, funding_amount INT); INSERT INTO startups (id, name, location, founder_disability, funding_amount) VALUES (1, \u0027Startup A\u0027, \u0027USA\u0027, true, 3000000); INSERT INTO startups (id, name, location, founder_disability, funding_amount) VALUES (2, \u0027Startup B\u0027, \u0027Canada\u0027, false, 5000000); INSERT INTO startups (id, name, location, founder_disability, funding_amount) VALUES (3, \u0027Startup C\u0027, \u0027USA\u0027, true, 4000000);", + "sql": "SELECT AVG(funding_amount) FROM startups WHERE location \u003d \u0027USA\u0027 AND founder_disability \u003d true;", + "sql_explanation": "This SQL query calculates the average funding amount for startups founded by people with disabilities in the USA, specifically those where the founder_disability column is set to true and the location is set to \u0027USA\u0027." +}, { + "id": "3714", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of startups founded by people who identify as LGBTQ+ in the education technology industry?", + "sql_context": "CREATE TABLE company (id INT, name TEXT, founder_lgbtq INT, industry TEXT); INSERT INTO company (id, name, founder_lgbtq, industry) VALUES (1, \u0027EduTech\u0027, 1, \u0027Education Technology\u0027); INSERT INTO company (id, name, founder_lgbtq, industry) VALUES (2, \u0027LearningPlatforms\u0027, 0, \u0027Education Technology\u0027);", + "sql": "SELECT COUNT(*) FROM company WHERE founder_lgbtq \u003d 1 AND industry \u003d \u0027Education Technology\u0027;", + "sql_explanation": "This query counts the number of startups founded by people who identify as LGBTQ+ in the education technology industry. It does so by selecting all rows from the company table where the founder_lgbtq column is 1 and the industry is \u0027Education Technology\u0027, then counting the number of rows returned." +}, { + "id": "3849", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of female founders in technology companies", + "sql_context": "CREATE TABLE founders (id INT, gender VARCHAR(10), company_domain VARCHAR(20)); INSERT INTO founders (id, gender, company_domain) VALUES (1, \u0027Male\u0027, \u0027Finance\u0027); INSERT INTO founders (id, gender, company_domain) VALUES (2, \u0027Female\u0027, \u0027Technology\u0027);", + "sql": "SELECT COUNT(*) FROM founders WHERE gender \u003d \u0027Female\u0027 AND company_domain \u003d \u0027Technology\u0027;", + "sql_explanation": "This query counts the number of female founders in technology companies by selecting all records where gender is \u0027Female\u0027 and company_domain is \u0027Technology\u0027, then counting the number of rows returned." +}, { + "id": "3948", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Count the number of companies founded by underrepresented minorities", + "sql_context": "CREATE TABLE company_founding (company_name VARCHAR(255), founder_minority VARCHAR(10)); INSERT INTO company_founding VALUES (\u0027Acme Inc\u0027, \u0027Yes\u0027); INSERT INTO company_founding VALUES (\u0027Beta Corp\u0027, \u0027No\u0027); INSERT INTO company_founding VALUES (\u0027Charlie LLC\u0027, \u0027Yes\u0027); INSERT INTO company_founding VALUES (\u0027Delta Co\u0027, \u0027No\u0027);", + "sql": "SELECT COUNT(*) FROM company_founding WHERE company_founding.founder_minority \u003d \u0027Yes\u0027;", + "sql_explanation": "This query counts the number of companies founded by underrepresented minorities. It does so by filtering for rows where the founder_minority column is equal to \u0027Yes\u0027 and returns the count of rows." +}, { + "id": "4198", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the number of unique industries with companies founded by veterans", + "sql_context": "CREATE TABLE companies (company_id INT, company_name VARCHAR(50), industry VARCHAR(50), founder_veteran VARCHAR(10)); INSERT INTO companies VALUES (1, \u0027Eta Startup\u0027, \u0027Technology\u0027, \u0027Veteran\u0027); INSERT INTO companies VALUES (2, \u0027Theta Corp\u0027, \u0027Finance\u0027, NULL);", + "sql": "SELECT COUNT(DISTINCT industry) FROM companies WHERE founder_veteran \u003d \u0027Veteran\u0027;", + "sql_explanation": "This SQL query selects the count of distinct industry values from the \u0027companies\u0027 table where founder_veteran is \u0027Veteran\u0027, indicating the number of unique industries with companies founded by veterans." +}, { + "id": "4720", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the names of all startups that have received funding over 10 million dollars but have not yet defined their industry.", + "sql_context": "CREATE TABLE startups (id INT, name TEXT, industry TEXT, funding FLOAT); INSERT INTO startups (id, name, industry, funding) VALUES (1, \u0027InnoTech\u0027, NULL, 15000000);", + "sql": "SELECT name FROM startups WHERE funding \u003e 10000000 AND industry IS NULL;", + "sql_explanation": "This SQL query lists the names of all startups that have received funding over 10 million dollars but have not yet defined their industry by selecting the name column for rows where the funding is greater than 10000000 and the industry column is null." +}, { + "id": "4782", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average funding for startups founded by individuals from underrepresented communities?", + "sql_context": "CREATE TABLE startups (id INT, name TEXT, founder TEXT, community TEXT, funding FLOAT); INSERT INTO startups (id, name, founder, community, funding) VALUES (1, \u0027Acme\u0027, \u0027John Doe\u0027, \u0027Majority\u0027, 500000.00); INSERT INTO startups (id, name, founder, community, funding) VALUES (2, \u0027Beta Corp\u0027, \u0027Jane Smith\u0027, \u0027Underrepresented\u0027, 750000.00); INSERT INTO startups (id, name, founder, community, funding) VALUES (3, \u0027Gamma Inc\u0027, \u0027Alice\u0027, \u0027Underrepresented\u0027, 300000.00);", + "sql": "SELECT AVG(funding) FROM startups WHERE community \u003d \u0027Underrepresented\u0027;", + "sql_explanation": "This query calculates the average funding for startups founded by individuals from underrepresented communities by averaging the \u0027funding\u0027 column values where the \u0027community\u0027 column is \u0027Underrepresented\u0027." +}, { + "id": "5081", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update innovation_trends table to reflect AI as a trend for company_id 101", + "sql_context": "CREATE TABLE companies (id INT PRIMARY KEY, name VARCHAR(255)); CREATE TABLE innovation_trends (id INT PRIMARY KEY, company_id INT, trend VARCHAR(255));", + "sql": "UPDATE innovation_trends SET trend \u003d \u0027AI\u0027 WHERE company_id \u003d 101;", + "sql_explanation": "This SQL query updates the innovation_trends table to reflect AI as a trend for company_id 101. It does this by setting the trend to \u0027AI\u0027 for the company_id 101." +}, { + "id": "5227", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum funding amount received by a company founded by a person from the BIPOC community?", + "sql_context": "CREATE TABLE Companies (id INT, name TEXT, industry TEXT, founders TEXT, funding FLOAT, bipoc_founder BOOLEAN); INSERT INTO Companies (id, name, industry, founders, funding, bipoc_founder) VALUES (1, \u0027GreenTech\u0027, \u0027Green Energy\u0027, \u0027BIPOC Founder\u0027, 2000000.00, TRUE); INSERT INTO Companies (id, name, industry, founders, funding, bipoc_founder) VALUES (2, \u0027BlueInnovations\u0027, \u0027Ocean Technology\u0027, \u0027White Founder\u0027, 6000000.00, FALSE);", + "sql": "SELECT MIN(funding) FROM Companies WHERE bipoc_founder \u003d TRUE;", + "sql_explanation": "This SQL query finds the minimum funding amount received by a company founded by a person from the BIPOC community. It does this by using the MIN function on the \u0027funding\u0027 column, while filtering for rows where \u0027bipoc_founder\u0027 is TRUE." +}, { + "id": "5391", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total funding for startups that have more than one founder", + "sql_context": "CREATE TABLE companies (id INT, name TEXT, region TEXT, num_founders INT, funding FLOAT); INSERT INTO companies (id, name, region, num_founders, funding) VALUES (1, \u0027Startup A\u0027, \u0027west_coast\u0027, 2, 5000000), (2, \u0027Startup B\u0027, \u0027east_coast\u0027, 1, 3000000), (3, \u0027Startup C\u0027, \u0027west_coast\u0027, 3, 7000000), (4, \u0027Startup D\u0027, \u0027east_coast\u0027, 1, 8000000);", + "sql": "SELECT SUM(funding) FROM companies WHERE num_founders \u003e 1;", + "sql_explanation": "This query calculates the total funding for startups with more than one founder by summing the funding amounts for those startups." +}, { + "id": "5591", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete the diversity metric \u0027Age\u0027 from the database.", + "sql_context": "CREATE TABLE diversity_metrics (id INT, metric TEXT); INSERT INTO diversity_metrics (id, metric) VALUES (1, \u0027Gender\u0027); INSERT INTO diversity_metrics (id, metric) VALUES (2, \u0027Race\u0027);", + "sql": "DELETE FROM diversity_metrics WHERE metric \u003d \u0027Age\u0027;", + "sql_explanation": "The query deletes the row from the \u0027diversity_metrics\u0027 table where the \u0027metric\u0027 column is \u0027Age\u0027." +}, { + "id": "5619", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete the diversity metric with the ID 1 from the \"diversity_metrics\" table", + "sql_context": "CREATE TABLE diversity_metrics (metric_id INT PRIMARY KEY, name TEXT, description TEXT); INSERT INTO diversity_metrics (metric_id, name, description) VALUES (1, \u0027Gender diversity\u0027, \u0027Percentage of employees who identify as female or male\u0027); INSERT INTO diversity_metrics (metric_id, name, description) VALUES (2, \u0027Racial diversity\u0027, \u0027Percentage of employees who identify as a race other than Caucasian\u0027);", + "sql": "DELETE FROM diversity_metrics WHERE metric_id \u003d 1;", + "sql_explanation": "This query deletes the diversity metric with the ID 1 from the \"diversity_metrics\" table. It does this by using the WHERE clause to select the record where the \"metric_id\" is 1 and then deleting this record using the DELETE statement." +}, { + "id": "1865", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum and minimum production cost for aircraft manufactured by \u0027AeroSpace Industries\u0027?", + "sql_context": "CREATE TABLE Aircraft_Production (aircraft_id INT, manufacturer VARCHAR(255), production_cost FLOAT); INSERT INTO Aircraft_Production (aircraft_id, manufacturer, production_cost) VALUES (1, \u0027AeroSpace Industries\u0027, 22000000), (2, \u0027AeroSpace Industries\u0027, 25000000), (3, \u0027AeroSpace Industries\u0027, 28000000), (4, \u0027AeroSpace Industries\u0027, 20000000);", + "sql": "SELECT MAX(production_cost) as max_cost, MIN(production_cost) as min_cost FROM Aircraft_Production WHERE manufacturer \u003d \u0027AeroSpace Industries\u0027;", + "sql_explanation": "This SQL query calculates the maximum and minimum production cost for aircraft manufactured by \u0027AeroSpace Industries\u0027 by using the SELECT statement, MAX and MIN functions, and filtering rows with the WHERE clause for manufacturer equal to \u0027AeroSpace Industries\u0027." +}, { + "id": "1888", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the space_exploration table with the following data: mission_name \u003d \u0027Mars 2020\u0027, launch_date \u003d \u00272020-07-30\u0027, launch_site \u003d \u0027Cape Canaveral Air Force Station\u0027", + "sql_context": "CREATE TABLE space_exploration (mission_name VARCHAR(100), launch_date DATE, launch_site VARCHAR(100));", + "sql": "INSERT INTO space_exploration (mission_name, launch_date, launch_site) VALUES (\u0027Mars 2020\u0027, \u00272020-07-30\u0027, \u0027Cape Canaveral Air Force Station\u0027);", + "sql_explanation": "This query inserts a new record into the space_exploration table with the following data: mission_name \u003d \u0027Mars 2020\u0027, launch_date \u003d \u00272020-07-30\u0027, launch_site \u003d \u0027Cape Canaveral Air Force Station\u0027." +}, { + "id": "2463", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the flight_safety table with the following data: flight_number \u003d \u0027FL123\u0027, safety_rating \u003d \u0027excellent\u0027, last_inspection_date \u003d \u00272020-05-01\u0027", + "sql_context": "CREATE TABLE flight_safety (flight_number VARCHAR(50) PRIMARY KEY, safety_rating VARCHAR(20), last_inspection_date DATE);", + "sql": "INSERT INTO flight_safety (flight_number, safety_rating, last_inspection_date) VALUES (\u0027FL123\u0027, \u0027excellent\u0027, \u00272020-05-01\u0027);", + "sql_explanation": "This query inserts a new record into the flight_safety table with the following data: flight_number \u003d \u0027FL123\u0027, safety_rating \u003d \u0027excellent\u0027, last_inspection_date \u003d \u00272020-05-01\u0027." +}, { + "id": "2526", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average lifespan of satellites in space?", + "sql_context": "CREATE TABLE satellites (satellite_id INT, launch_date DATE, decommission_date DATE); INSERT INTO satellites (satellite_id, launch_date, decommission_date) VALUES (1, \u00272010-01-01\u0027, \u00272020-01-01\u0027), (2, \u00272015-01-01\u0027, \u00272022-01-01\u0027), (3, \u00272020-01-01\u0027, NULL);", + "sql": "SELECT AVG(DATEDIFF(decommission_date, launch_date)) as avg_lifespan FROM satellites WHERE decommission_date IS NOT NULL;", + "sql_explanation": "This query calculates the average lifespan of satellites in space by finding the difference between the decommission date and the launch date, and then averaging those differences for satellites that have been decommissioned." +}, { + "id": "2549", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which satellites were deployed by SpaceTech Inc. between 2000 and 2010?", + "sql_context": "CREATE TABLE satellites (satellite_id INT, name VARCHAR(100), manufacturer VARCHAR(100), launch_date DATE); INSERT INTO satellites (satellite_id, name, manufacturer, launch_date) VALUES (1, \u0027Sat1\u0027, \u0027SpaceTech Inc.\u0027, \u00272005-03-14\u0027); INSERT INTO satellites (satellite_id, name, manufacturer, launch_date) VALUES (2, \u0027Sat2\u0027, \u0027Aerospace Corp.\u0027, \u00272008-09-27\u0027); INSERT INTO satellites (satellite_id, name, manufacturer, launch_date) VALUES (3, \u0027Sat3\u0027, \u0027SpaceTech Inc.\u0027, \u00272002-11-17\u0027);", + "sql": "SELECT name FROM satellites WHERE manufacturer \u003d \u0027SpaceTech Inc.\u0027 AND launch_date BETWEEN \u00272000-01-01\u0027 AND \u00272010-12-31\u0027;", + "sql_explanation": "This query filters the satellites table to select the name of satellites where the manufacturer is SpaceTech Inc. and the launch_date is between 2000 and 2010." +}, { + "id": "2579", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names and launch dates of satellites launched by \u0027AgencyZ\u0027 in the year 2000 or 2001?", + "sql_context": "CREATE TABLE Satellites (id INT, name VARCHAR(50), launch_date DATE, agency VARCHAR(50)); INSERT INTO Satellites (id, name, launch_date, agency) VALUES (1, \u0027Sat1\u0027, \u00272000-01-01\u0027, \u0027AgencyZ\u0027), (2, \u0027Sat2\u0027, \u00271999-12-31\u0027, \u0027AgencyZ\u0027), (3, \u0027Sat3\u0027, \u00272001-01-01\u0027, \u0027AgencyY\u0027);", + "sql": "SELECT name, launch_date FROM Satellites WHERE launch_date BETWEEN \u00272000-01-01\u0027 AND \u00272001-12-31\u0027 AND agency \u003d \u0027AgencyZ\u0027;", + "sql_explanation": "This query selects the \u0027name\u0027 and \u0027launch_date\u0027 columns from the \u0027Satellites\u0027 table where the \u0027launch_date\u0027 is between \u00272000-01-01\u0027 and \u00272001-12-31\u0027 and the \u0027agency\u0027 is equal to \u0027AgencyZ\u0027." +}, { + "id": "2824", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of satellites launched by China and India between 2015 and 2022?", + "sql_context": "CREATE TABLE satellites_launch (satellite_id INT, launch_company VARCHAR(50), launch_year INT); INSERT INTO satellites_launch (satellite_id, launch_company, launch_year) VALUES (1, \u0027China\u0027, 2015), (2, \u0027China\u0027, 2017), (3, \u0027India\u0027, 2016), (4, \u0027India\u0027, 2020);", + "sql": "SELECT SUM(launch_company IN (\u0027China\u0027, \u0027India\u0027)) FROM satellites_launch WHERE launch_year BETWEEN 2015 AND 2022;", + "sql_explanation": "Calculate the total number of satellites launched by China and India between 2015 and 2022 by summing the boolean expression that evaluates to 1 if the launch company is either China or India, and 0 otherwise." +}, { + "id": "3464", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total cost of manufacturing the Boeing 787 Dreamliner?", + "sql_context": "CREATE TABLE Manufacturing_Costs (manufacturer VARCHAR(50), model VARCHAR(50), cost FLOAT); INSERT INTO Manufacturing_Costs (manufacturer, model, cost) VALUES (\u0027Boeing\u0027, \u0027787 Dreamliner\u0027, 2500000000);", + "sql": "SELECT cost FROM Manufacturing_Costs WHERE manufacturer \u003d \u0027Boeing\u0027 AND model \u003d \u0027787 Dreamliner\u0027;", + "sql_explanation": "This query selects the cost of manufacturing the Boeing 787 Dreamliner from the Manufacturing_Costs table." +}, { + "id": "3496", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of satellites deployed by SpaceComm in the Middle East?", + "sql_context": "CREATE TABLE SatelliteDeployment (satellite_id INT, company VARCHAR(255), region VARCHAR(255));", + "sql": "SELECT COUNT(*) FROM SatelliteDeployment WHERE company \u003d \u0027SpaceComm\u0027 AND region \u003d \u0027Middle East\u0027;", + "sql_explanation": "This query counts the number of records in the SatelliteDeployment table with a company value of \u0027SpaceComm\u0027 and a region value of \u0027Middle East\u0027." +}, { + "id": "3579", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the aircraft_manufacturing table to set the status of all records with manufacturing_date before \u00272010-01-01\u0027 to \u0027Inactive\u0027", + "sql_context": "CREATE TABLE aircraft_manufacturing (id INT, aircraft_name VARCHAR(255), manufacturer VARCHAR(255), manufacturing_date DATE, status VARCHAR(255));", + "sql": "UPDATE aircraft_manufacturing SET status \u003d \u0027Inactive\u0027 WHERE manufacturing_date \u003c \u00272010-01-01\u0027;", + "sql_explanation": "* This query will update the status of all records in the aircraft_manufacturing table with a manufacturing_date before \u00272010-01-01\u0027 to \u0027Inactive\u0027." +}, { + "id": "3690", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the aircraft_manufacturing table with the following data: model \u003d \u0027Boeing 797\u0027, manufacturing_year \u003d 2030", + "sql_context": "CREATE TABLE aircraft_manufacturing (id INT PRIMARY KEY, model VARCHAR(100), manufacturing_year INT);", + "sql": "INSERT INTO aircraft_manufacturing (model, manufacturing_year) VALUES (\u0027Boeing 797\u0027, 2030);", + "sql_explanation": "This query inserts a new record into the aircraft_manufacturing table with the following data: model \u003d \u0027Boeing 797\u0027, manufacturing_year \u003d 2030." +}, { + "id": "3818", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of successful satellite launches by company ABC", + "sql_context": "CREATE TABLE satellites (id INT, name VARCHAR(50), launch_status VARCHAR(50), manufacturer VARCHAR(50), launch_date DATE);", + "sql": "SELECT COUNT(*) FROM satellites WHERE launch_status \u003d \u0027Success\u0027 AND manufacturer \u003d \u0027ABC\u0027;", + "sql_explanation": "This query counts the number of records in the satellites table where the launch status is Success and the manufacturer is ABC." +}, { + "id": "3837", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the name and launch date of all satellites deployed by SpaceX and Rocket Lab?", + "sql_context": "CREATE TABLE satellites (id INT, name VARCHAR(255), manufacturer VARCHAR(255), launch_date DATE); INSERT INTO satellites (id, name, manufacturer, launch_date) VALUES (1, \u0027FalconSat\u0027, \u0027SpaceX\u0027, \u00272020-01-01\u0027), (2, \u0027Cubesat\u0027, \u0027Blue Origin\u0027, \u00272019-01-01\u0027), (3, \u0027Electron\u0027, \u0027Rocket Lab\u0027, \u00272021-01-01\u0027);", + "sql": "SELECT name, launch_date FROM satellites WHERE manufacturer IN (\u0027SpaceX\u0027, \u0027Rocket Lab\u0027);", + "sql_explanation": "This query lists the name and launch date of all satellites deployed by SpaceX and Rocket Lab by filtering the satellites table to only include records where the manufacturer is either SpaceX or Rocket Lab." +}, { + "id": "3990", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "Find incidents related to software issues in the Flight Operations department.", + "sql_context": "CREATE TABLE Incidents (IncidentID INT, Date DATE, Type VARCHAR(20), Description TEXT, Department VARCHAR(20)); INSERT INTO Incidents (IncidentID, Date, Type, Description, Department) VALUES (1, \u00272021-05-01\u0027, \u0027Safety\u0027, \u0027Landing gear malfunction\u0027, \u0027Flight Operations\u0027); INSERT INTO Incidents (IncidentID, Date, Type, Description, Department) VALUES (2, \u00272021-06-15\u0027, \u0027Technical\u0027, \u0027Software glitch\u0027, \u0027Flight Operations\u0027);", + "sql": "SELECT * FROM Incidents WHERE Department \u003d \u0027Flight Operations\u0027 AND Type \u003d \u0027Technical\u0027", + "sql_explanation": "This query retrieves incidents related to software issues in the Flight Operations department." +}, { + "id": "4194", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which space missions have been launched from the \u0027Vandenberg Air Force Base\u0027?", + "sql_context": "CREATE TABLE SpaceMissions (Mission VARCHAR(50), LaunchSite VARCHAR(50), Satellites INT); INSERT INTO SpaceMissions (Mission, LaunchSite, Satellites) VALUES (\u0027STS-1\u0027, \u0027Kennedy Space Center\u0027, 2), (\u0027STS-2\u0027, \u0027Kennedy Space Center\u0027, 0), (\u0027Spacelab-1\u0027, \u0027Kennedy Space Center\u0027, 7), (\u0027CZ-2C\u0027, \u0027Xichang Satellite Launch Center\u0027, 1), (\u0027CZ-2D\u0027, \u0027Jiuquan Satellite Launch Center\u0027, 3), (\u0027STS-3\u0027, \u0027Vandenberg Air Force Base\u0027, 0);", + "sql": "SELECT Mission FROM SpaceMissions WHERE LaunchSite \u003d \u0027Vandenberg Air Force Base\u0027;", + "sql_explanation": "This SQL query selects the missions that have been launched from the \u0027Vandenberg Air Force Base\u0027 by filtering the rows of the SpaceMissions table based on the LaunchSite column." +}, { + "id": "4314", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average cost of an aircraft manufactured by Manufacturer1?", + "sql_context": "CREATE TABLE Aircraft (aircraft_model VARCHAR(50), manufacturer VARCHAR(50), cost FLOAT); INSERT INTO Aircraft (aircraft_model, manufacturer, cost) VALUES (\u0027ModelA\u0027, \u0027Manufacturer1\u0027, 1000), (\u0027ModelB\u0027, \u0027Manufacturer1\u0027, 2000), (\u0027ModelC\u0027, \u0027Manufacturer2\u0027, 1500);", + "sql": "SELECT AVG(cost) as avg_cost FROM Aircraft WHERE manufacturer \u003d \u0027Manufacturer1\u0027", + "sql_explanation": "This query calculates the average cost of an aircraft manufactured by Manufacturer1 by averaging the cost column in the Aircraft table where the manufacturer is Manufacturer1." +}, { + "id": "4323", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new aircraft model data with specific flight hours", + "sql_context": "CREATE TABLE Aircraft (ID INT, Model VARCHAR(50), FlightHours INT); INSERT INTO Aircraft (ID, Model, FlightHours) VALUES (1, \u0027B747\u0027, 120000), (2, \u0027A320\u0027, 90000), (3, \u0027A380\u0027, 150000), (4, \u0027B777\u0027, 200000);", + "sql": "INSERT INTO Aircraft (ID, Model, FlightHours) VALUES (5, \u0027Dreamliner\u0027, 180000);", + "sql_explanation": "This query inserts a new record into the Aircraft table for the Dreamliner model with 180,000 flight hours." +}, { + "id": "4533", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the name and manufacturer of the most recently deployed satellite?", + "sql_context": "CREATE TABLE satellites (id INT, name VARCHAR(255), manufacturer VARCHAR(255), launch_date DATE); INSERT INTO satellites (id, name, manufacturer, launch_date) VALUES (1, \u0027FalconSat\u0027, \u0027SpaceX\u0027, \u00272020-01-01\u0027), (2, \u0027Cubesat\u0027, \u0027Blue Origin\u0027, \u00272019-01-01\u0027);", + "sql": "SELECT name, manufacturer FROM satellites ORDER BY launch_date DESC LIMIT 1;", + "sql_explanation": "This query lists the name and manufacturer of the most recently deployed satellite by ordering the satellites table by launch date in descending order and selecting the top record." +}, { + "id": "4564", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many space missions were carried out by China in 2022?", + "sql_context": "CREATE TABLE SpaceMissions(id INT, country VARCHAR(255), mission VARCHAR(255), year INT, success BOOLEAN); INSERT INTO SpaceMissions(id, country, mission, year, success) VALUES (1, \u0027China\u0027, \u0027Mission 1\u0027, 2021, true), (2, \u0027USA\u0027, \u0027Mission 2\u0027, 2022, false), (3, \u0027China\u0027, \u0027Mission 3\u0027, 2022, true), (4, \u0027Russia\u0027, \u0027Mission 4\u0027, 2021, true);", + "sql": "SELECT COUNT(*) FROM SpaceMissions WHERE country \u003d \u0027China\u0027 AND year \u003d 2022;", + "sql_explanation": "This SQL query calculates the number of space missions carried out by China in 2022 by using the COUNT function with no specified column (which counts all rows), where the \u0027country\u0027 is equal to \u0027China\u0027 and the \u0027year\u0027 is 2022." +}, { + "id": "4592", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average total flight hours for aircraft manufactured by Boeing?", + "sql_context": "CREATE TABLE Aircraft (id INT, model VARCHAR(255), manufacturer VARCHAR(255), year_manufactured INT, total_flight_hours INT); INSERT INTO Aircraft (id, model, manufacturer, year_manufactured, total_flight_hours) VALUES (1, \u0027B747\u0027, \u0027Boeing\u0027, 1990, 50000); INSERT INTO Aircraft (id, model, manufacturer, year_manufactured, total_flight_hours) VALUES (2, \u0027A320\u0027, \u0027Airbus\u0027, 2005, 30000);", + "sql": "SELECT AVG(total_flight_hours) FROM Aircraft WHERE manufacturer \u003d \u0027Boeing\u0027;", + "sql_explanation": "This query calculates the average total flight hours for aircraft manufactured by Boeing." +}, { + "id": "4812", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all satellites launched by the United States", + "sql_context": "CREATE TABLE satellite_deployment (id INT PRIMARY KEY, name VARCHAR(255), country VARCHAR(255), launch_date DATE); INSERT INTO satellite_deployment (id, name, country, launch_date) VALUES (1, \u0027Sentinel-1A\u0027, \u0027European Union\u0027, \u00272014-04-03\u0027), (2, \u0027TechSat\u0027, \u0027United States\u0027, \u00272022-09-01\u0027);", + "sql": "SELECT name FROM satellite_deployment WHERE country \u003d \u0027United States\u0027;", + "sql_explanation": "1. This statement selects all records from the \u0027satellite_deployment\u0027 table. 2. It filters the records to only display those where \u0027country\u0027 equals \u0027United States\u0027. 3. It returns the \u0027name\u0027 for each record that meets the filter criteria." +}, { + "id": "4921", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total weight of all satellites deployed by ULA?", + "sql_context": "CREATE TABLE Satellite_Weight (id INT, name VARCHAR(50), manufacturer VARCHAR(50), weight INT); INSERT INTO Satellite_Weight (id, name, manufacturer, weight) VALUES (1, \u0027Atlas V\u0027, \u0027ULA\u0027, 25000), (2, \u0027Delta IV\u0027, \u0027ULA\u0027, 30000), (3, \u0027Falcon 9\u0027, \u0027SpaceX\u0027, 28000);", + "sql": "SELECT SUM(weight) FROM Satellite_Weight WHERE manufacturer \u003d \u0027ULA\u0027;", + "sql_explanation": "This SQL query calculates the total weight of all satellites deployed by ULA by selecting SUM function on the weight column and filtering the data with the WHERE clause to only include satellites launched by ULA." +}, { + "id": "4946", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records from the flight_safety table where the last_inspection_date is before 2015-01-01", + "sql_context": "CREATE TABLE flight_safety (flight_number VARCHAR(50) PRIMARY KEY, safety_rating VARCHAR(20), last_inspection_date DATE);", + "sql": "DELETE FROM flight_safety WHERE last_inspection_date \u003c \u00272015-01-01\u0027;", + "sql_explanation": "This query deletes all records from the flight_safety table where the last_inspection_date is before 2015-01-01." +}, { + "id": "4960", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the safety record for flight \u0027AA123\u0027?", + "sql_context": "CREATE TABLE FlightSafety(flight_number VARCHAR(10), safety_record INT); INSERT INTO FlightSafety VALUES(\u0027AA123\u0027, 95),(\u0027AA456\u0027, 98);", + "sql": "SELECT safety_record FROM FlightSafety WHERE flight_number\u003d\u0027AA123\u0027;", + "sql_explanation": "The SQL query selects the safety record for flight \u0027AA123\u0027 from the FlightSafety table." +}, { + "id": "4983", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of passengers for Airbus aircraft?", + "sql_context": "CREATE TABLE Aircraft (AircraftID INT, Name VARCHAR(50), Manufacturer VARCHAR(50), Passengers INT); INSERT INTO Aircraft (AircraftID, Name, Manufacturer, Passengers) VALUES (1, \u0027A320\u0027, \u0027Airbus\u0027, 180), (2, \u0027A380\u0027, \u0027Airbus\u0027, 555);", + "sql": "SELECT MAX(Passengers) FROM Aircraft WHERE Manufacturer \u003d \u0027Airbus\u0027;", + "sql_explanation": "This query filters the Aircraft table based on the Manufacturer column and then calculates the maximum value of the Passengers column using the MAX function." +}, { + "id": "5050", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of flight hours for Boeing 747 aircraft?", + "sql_context": "CREATE TABLE Flight_Hours (aircraft VARCHAR(50), hours FLOAT); INSERT INTO Flight_Hours (aircraft, hours) VALUES (\u0027Boeing 747\u0027, 60000), (\u0027Boeing 737\u0027, 45000);", + "sql": "SELECT AVG(hours) FROM Flight_Hours WHERE aircraft \u003d \u0027Boeing 747\u0027;", + "sql_explanation": "This query calculates the average number of flight hours for Boeing 747 aircraft from the Flight_Hours table." +}, { + "id": "5076", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average year of foundation for manufacturers based in the United States?", + "sql_context": "CREATE TABLE Manufacturer (id INT, name VARCHAR(255), country VARCHAR(255), year_founded INT); INSERT INTO Manufacturer (id, name, country, year_founded) VALUES (1, \u0027Boeing\u0027, \u0027USA\u0027, 1916); INSERT INTO Manufacturer (id, name, country, year_founded) VALUES (2, \u0027Airbus\u0027, \u0027Europe\u0027, 1970); INSERT INTO Manufacturer (id, name, country, year_founded) VALUES (3, \u0027Lockheed Martin\u0027, \u0027USA\u0027, 1995);", + "sql": "SELECT AVG(year_founded) FROM Manufacturer WHERE country \u003d \u0027USA\u0027;", + "sql_explanation": "This query calculates the average year of foundation for manufacturers based in the United States." +}, { + "id": "5129", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names of all countries that have not launched any satellites?", + "sql_context": "CREATE TABLE Countries (country VARCHAR(255), launched_satellites BOOLEAN); INSERT INTO Countries (country, launched_satellites) VALUES (\u0027Country1\u0027, true); INSERT INTO Countries (country, launched_satellites) VALUES (\u0027Country2\u0027, true); INSERT INTO Countries (country, launched_satellites) VALUES (\u0027Country3\u0027, false); INSERT INTO Countries (country, launched_satellites) VALUES (\u0027Country4\u0027, false);", + "sql": "SELECT country FROM Countries WHERE launched_satellites \u003d false;", + "sql_explanation": "The SQL query selects all rows from the Countries table where the launched_satellites column is false, effectively returning the names of all countries that have not launched any satellites." +}, { + "id": "5310", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which satellites were deployed in the year 2010?", + "sql_context": "CREATE TABLE SatelliteDeployment(name VARCHAR(20), launch_year INT); INSERT INTO SatelliteDeployment VALUES(\u0027Satellite A\u0027, 2008),(\u0027Satellite B\u0027, 2010);", + "sql": "SELECT name FROM SatelliteDeployment WHERE launch_year\u003d2010;", + "sql_explanation": "The SQL query selects the names of satellites deployed in the year 2010 from the SatelliteDeployment table." +}, { + "id": "5410", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records related to the \u0027China\u0027 country in the \u0027flight_safety_records\u0027 table.", + "sql_context": "CREATE TABLE flight_safety_records (airline VARCHAR(50), country VARCHAR(50), incidents INT); INSERT INTO flight_safety_records (airline, country, incidents) VALUES (\u0027Air China\u0027, \u0027China\u0027, 5), (\u0027China Southern\u0027, \u0027China\u0027, 3), (\u0027China Eastern\u0027, \u0027China\u0027, 4), (\u0027United Airlines\u0027, \u0027USA\u0027, 2), (\u0027Delta Airlines\u0027, \u0027USA\u0027, 1);", + "sql": "DELETE FROM flight_safety_records WHERE country \u003d \u0027China\u0027;", + "sql_explanation": "The SQL query deletes all records related to the \u0027China\u0027 country in the \u0027flight_safety_records\u0027 table by using the WHERE clause to filter the specific country records." +}, { + "id": "5436", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total mass of all satellites launched by JAXA?", + "sql_context": "CREATE TABLE satellites (id INT, name TEXT, country TEXT, launch_date DATE, mass FLOAT); INSERT INTO satellites (id, name, country, launch_date, mass) VALUES (1, \u0027Hayabusa\u0027, \u0027Japan\u0027, \u00272003-05-09\u0027, 510), (2, \u0027Akatsuki\u0027, \u0027Japan\u0027, \u00272010-05-20\u0027, 1300);", + "sql": "SELECT SUM(mass) FROM satellites WHERE country \u003d \u0027Japan\u0027;", + "sql_explanation": "This query finds the total mass of all satellites launched by JAXA by summing the mass for all records where the country is Japan." +}, { + "id": "5553", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the manufacturers that have deployed more than 50 satellites?", + "sql_context": "CREATE TABLE Manufacturers (name VARCHAR(50), satellites INT); INSERT INTO Manufacturers (name, satellites) VALUES (\u0027SpaceX\u0027, 200), (\u0027Boeing\u0027, 100), (\u0027Lockheed Martin\u0027, 75), (\u0027Northrop Grumman\u0027, 55);", + "sql": "SELECT name FROM Manufacturers WHERE satellites \u003e 50;", + "sql_explanation": "This query selects the names of manufacturers from the Manufacturers table that have deployed more than 50 satellites." +}, { + "id": "5673", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all spacecraft records with the engine \u0027Raptor\u0027", + "sql_context": "CREATE TABLE spacecraft (id INT PRIMARY KEY, name VARCHAR(50), engine VARCHAR(50), status VARCHAR(10));", + "sql": "DELETE FROM spacecraft WHERE engine \u003d \u0027Raptor\u0027;", + "sql_explanation": "This query deletes all spacecraft records with the engine \u0027Raptor\u0027." +}, { + "id": "5769", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total cost of all space exploration missions in the \u0027space_exploration\u0027 table?", + "sql_context": "CREATE TABLE space_exploration (mission VARCHAR(50), cost FLOAT);", + "sql": "SELECT SUM(cost) FROM space_exploration;", + "sql_explanation": "The SQL query calculates the total cost of all space exploration missions by summing the cost column in the space_exploration table." +}, { + "id": "1013", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which chemical substances have a production volume greater than 10000 in the South America region for the month of June?", + "sql_context": "CREATE TABLE ChemicalSubstances (SubstanceID INT, SubstanceName VARCHAR(50), ProductionVolume INT, Region VARCHAR(50), ProductionDate DATE); INSERT INTO ChemicalSubstances (SubstanceID, SubstanceName, ProductionVolume, Region, ProductionDate) VALUES (1, \u0027Ethylene\u0027, 12000, \u0027South America\u0027, \u00272021-06-01\u0027), (2, \u0027Propylene\u0027, 8000, \u0027South America\u0027, \u00272021-06-01\u0027), (3, \u0027Benzenene\u0027, 11000, \u0027South America\u0027, \u00272021-06-01\u0027);", + "sql": "SELECT SubstanceName, ProductionVolume FROM ChemicalSubstances WHERE Region \u003d \u0027South America\u0027 AND ProductionDate BETWEEN \u00272021-06-01\u0027 AND \u00272021-06-30\u0027 AND ProductionVolume \u003e 10000;", + "sql_explanation": "This query identifies the chemical substances with a production volume greater than 10000 in the South America region for the month of June. It does this by selecting the SubstanceName and ProductionVolume columns from the ChemicalSubstances table, filtering for rows where the Region is \u0027South America\u0027, the ProductionDate is in June 2021, and the ProductionVolume is greater than 10000." +}, { + "id": "1335", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 5 chemicals with the highest production rate in the last month?", + "sql_context": "CREATE TABLE Chemical_Production (Chemical_Name VARCHAR(255), Production_Date DATE, Production_Rate INT); INSERT INTO Chemical_Production (Chemical_Name, Production_Date, Production_Rate) VALUES (\u0027Chemical A\u0027, \u00272022-03-01\u0027, 1000), (\u0027Chemical B\u0027, \u00272022-03-01\u0027, 1200), (\u0027Chemical C\u0027, \u00272022-03-01\u0027, 1500), (\u0027Chemical D\u0027, \u00272022-03-01\u0027, 800), (\u0027Chemical E\u0027, \u00272022-03-01\u0027, 1300);", + "sql": "SELECT Chemical_Name, Production_Rate FROM Chemical_Production WHERE Production_Date \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 1 MONTH) ORDER BY Production_Rate DESC LIMIT 5;", + "sql_explanation": "The SQL query selects the top 5 chemicals with the highest production rate in the last month from the Chemical_Production table, by filtering the records for production dates within the last month, sorting the production rates in descending order, and limiting the results to the top 5." +}, { + "id": "1398", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total waste produced by the Asia region in the last quarter?", + "sql_context": "CREATE TABLE WasteProductionByRegion (WasteID INT, Plant VARCHAR(255), WasteQuantity DECIMAL(5,2), Timestamp DATETIME, Region VARCHAR(255));", + "sql": "SELECT SUM(WasteQuantity) FROM WasteProductionByRegion WHERE Region \u003d \u0027Asia\u0027 AND Timestamp BETWEEN DATE_SUB(CURRENT_DATE(), INTERVAL 3 MONTH) AND CURRENT_DATE();", + "sql_explanation": "The SQL query calculates the total waste produced by the Asia region by using the SUM function on the WasteQuantity column. It filters the records for the last quarter using the BETWEEN operator with DATE_SUB and CURRENT_DATE functions." +}, { + "id": "1738", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average amount of water consumed daily by the chemical manufacturing plant in Jakarta in the past year?", + "sql_context": "CREATE TABLE water_consumption (id INT, plant_location VARCHAR(50), consumption_date DATE, amount_consumed FLOAT);", + "sql": "SELECT AVG(amount_consumed) FROM water_consumption WHERE plant_location \u003d \u0027Jakarta\u0027 AND consumption_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR);", + "sql_explanation": "This SQL query calculates the average amount of water consumed daily by the chemical manufacturing plant in Jakarta in the past year. It does this by using the AVG function on the amount_consumed column and filtering the rows using the WHERE clause to only include records from Jakarta and from the past year. It then groups the rows using the GROUP BY clause to group them by date, and calculates the average for each day." +}, { + "id": "1822", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Which safety protocols were updated in the last quarter for each manufacturing plant?", + "sql_context": "CREATE TABLE ManufacturingPlants (PlantID INT, PlantName TEXT, SafetyProtocols TEXT, LastUpdate DATE); INSERT INTO ManufacturingPlants (PlantID, PlantName, SafetyProtocols, LastUpdate) VALUES (1, \u0027Plant A\u0027, \u0027Protocol 1, Protocol 2\u0027, \u00272022-01-01\u0027), (2, \u0027Plant B\u0027, \u0027Protocol 2, Protocol 3\u0027, \u00272022-03-15\u0027), (3, \u0027Plant C\u0027, \u0027Protocol 1, Protocol 3\u0027, \u00272022-02-05\u0027);", + "sql": "SELECT PlantName, SafetyProtocols FROM ManufacturingPlants WHERE LastUpdate \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 3 MONTH) ORDER BY LastUpdate DESC;", + "sql_explanation": "This query lists the safety protocols updated in the last quarter for each manufacturing plant. It filters the ManufacturingPlants table for rows where the LastUpdate is within the last 3 months and orders the rows by LastUpdate in descending order. Finally, it returns the PlantName and SafetyProtocols columns from the filtered rows." +}, { + "id": "2012", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which chemical products were discontinued in the past year and what were their environmental impact scores?", + "sql_context": "CREATE TABLE ChemicalProducts (ProductID INT, Chemical TEXT, ManufacturerID INT, ProductLaunchDate DATE, ProductDiscontinuationDate DATE, EnvironmentalImpactScore DECIMAL(3,2)); INSERT INTO ChemicalProducts (ProductID, Chemical, ManufacturerID, ProductLaunchDate, ProductDiscontinuationDate, EnvironmentalImpactScore) VALUES (1, \u0027Acetone\u0027, 1, \u00272019-01-01\u0027, \u00272021-01-01\u0027, 3.2), (2, \u0027Ethanol\u0027, 1, \u00272020-01-01\u0027, NULL, 4.5), (3, \u0027Methanol\u0027, 2, \u00272019-01-01\u0027, \u00272021-01-01\u0027, 5.0);", + "sql": "SELECT CP.Chemical, CP.EnvironmentalImpactScore FROM ChemicalProducts CP WHERE YEAR(CP.ProductDiscontinuationDate) \u003d YEAR(CURDATE()) - 1;", + "sql_explanation": "The SQL query retrieves the chemical name and environmental impact score for all chemical products discontinued in the past year. It does this by filtering the records for the past year, and returning the chemical name and environmental impact score for each discontinued product." +}, { + "id": "2178", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the safety protocol for \u0027Ammonia\u0027 in the \"safety_protocols\" table", + "sql_context": "CREATE TABLE safety_protocols (id INT PRIMARY KEY, chemical_name VARCHAR(255), safety_protocol VARCHAR(255), date_implemented DATE); INSERT INTO safety_protocols (id, chemical_name, safety_protocol, date_implemented) VALUES (1, \u0027Ammonia\u0027, \u0027Always wear safety goggles when handling\u0027, \u00272022-01-01\u0027);", + "sql": "UPDATE safety_protocols SET safety_protocol \u003d \u0027Always wear safety goggles and gloves when handling\u0027 WHERE chemical_name \u003d \u0027Ammonia\u0027;", + "sql_explanation": "This query updates the safety protocol for \u0027Ammonia\u0027 in the \"safety_protocols\" table. It finds the record with a chemical_name of \u0027Ammonia\u0027 and changes the safety_protocol to \u0027Always wear safety goggles and gloves when handling\u0027." +}, { + "id": "2310", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum pressure (in bar) recorded for any chemical storage unit located in Japan, for the month of August?", + "sql_context": "CREATE TABLE StorageUnits (id INT, location VARCHAR(50), pressure FLOAT); INSERT INTO StorageUnits (id, location, pressure) VALUES (1, \u0027Japan\u0027, 5.1), (2, \u0027Japan\u0027, 4.6), (3, \u0027Japan\u0027, 5.7);", + "sql": "SELECT MIN(pressure) FROM StorageUnits WHERE location \u003d \u0027Japan\u0027 AND EXTRACT(MONTH FROM DATE \u00272022-08-01\u0027 + INTERVAL id DAY) \u003d 8;", + "sql_explanation": "This query calculates the minimum pressure recorded for Japanese storage units in August. It uses the StorageUnits table, which includes location and pressure data. The DATE \u00272022-08-01\u0027 + INTERVAL id DAY expression generates a list of dates for each storage unit (id) to extract the month and check if it\u0027s August." +}, { + "id": "2452", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the \"employee_training\" table to reflect that employee E002 has completed the training for \"Chemical Safety\" on January 3, 2022.", + "sql_context": "CREATE TABLE employee_training (employee_id varchar(10),training_topic varchar(255),training_date date);", + "sql": "UPDATE employee_training SET training_date \u003d \u00272022-01-03\u0027 WHERE employee_id \u003d \u0027E002\u0027 AND training_topic \u003d \u0027Chemical Safety\u0027;", + "sql_explanation": "This SQL query updates the \"employee_training\" table to reflect that employee E002 has completed the training for \"Chemical Safety\" on January 3, 2022." +}, { + "id": "2890", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average environmental impact score of all products that were launched in the last year?", + "sql_context": "CREATE TABLE Products (product_id INT, product_name VARCHAR(20), launch_date DATE, environmental_impact_score DECIMAL(3,2));", + "sql": "SELECT AVG(environmental_impact_score) FROM Products WHERE launch_date \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 1 YEAR);", + "sql_explanation": "The query calculates the average environmental impact score of all products that were launched in the last year by averaging the \u0027environmental_impact_score\u0027 where the \u0027launch_date\u0027 is within the last year." +}, { + "id": "3024", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many new products were introduced in the last quarter?", + "sql_context": "CREATE TABLE ProductInnovation (id INT, product VARCHAR(255), introduction_date DATE); INSERT INTO ProductInnovation (id, product, introduction_date) VALUES (1, \u0027product A\u0027, \u00272023-01-01\u0027), (2, \u0027product B\u0027, \u00272023-04-15\u0027);", + "sql": "SELECT COUNT(*) FROM ProductInnovation WHERE introduction_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 3 MONTH);", + "sql_explanation": "This query counts the number of new products introduced in the last quarter by using the COUNT function on the entire row (represented by the *) and filtering for dates within the last 3 months." +}, { + "id": "3635", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the environmental impact assessments for each chemical in Plant A?", + "sql_context": "CREATE TABLE Chemical_Environmental_Impact (Plant_Name VARCHAR(255), Chemical_Name VARCHAR(255), Environmental_Impact VARCHAR(255)); INSERT INTO Chemical_Environmental_Impact (Plant_Name, Chemical_Name, Environmental_Impact) VALUES (\u0027Plant A\u0027, \u0027Chemical X\u0027, \u0027High water consumption\u0027), (\u0027Plant A\u0027, \u0027Chemical Y\u0027, \u0027Air pollution\u0027);", + "sql": "SELECT Environmental_Impact FROM Chemical_Environmental_Impact WHERE Plant_Name \u003d \u0027Plant A\u0027;", + "sql_explanation": "The SQL query selects the environmental impact assessments for each chemical in Plant A from the Chemical_Environmental_Impact table, by filtering the records for Plant A." +}, { + "id": "3767", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average temperature in the chemical storage facilities in New York and New Jersey combined?", + "sql_context": "CREATE TABLE storage_facilities (id INT, name TEXT, state TEXT, temperature FLOAT); INSERT INTO storage_facilities (id, name, state, temperature) VALUES (1, \u0027Facility1\u0027, \u0027New York\u0027, 20.5), (2, \u0027Facility2\u0027, \u0027New York\u0027, 21.3), (3, \u0027Facility3\u0027, \u0027New Jersey\u0027, 18.8), (4, \u0027Facility4\u0027, \u0027New Jersey\u0027, 19.9);", + "sql": "SELECT AVG(temperature) FROM storage_facilities WHERE state IN (\u0027New York\u0027, \u0027New Jersey\u0027);", + "sql_explanation": "This query calculates the average temperature in storage facilities located in New York and New Jersey by using the AVG() function on the temperature column and the IN operator to filter the results to only include facilities located in New York or New Jersey." +}, { + "id": "3979", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum production rate of a single chemical in the African region?", + "sql_context": "CREATE TABLE regional_chemical_production (id INT, chemical_name VARCHAR(255), region VARCHAR(255), production_rate FLOAT); INSERT INTO regional_chemical_production (id, chemical_name, region, production_rate) VALUES (1, \u0027Ethanol\u0027, \u0027Africa\u0027, 650.0);", + "sql": "SELECT MIN(production_rate) FROM regional_chemical_production WHERE region \u003d \u0027Africa\u0027;", + "sql_explanation": "This SQL query finds the minimum production rate of a single chemical in the African region by using the MIN() function on the production_rate column and filtering the rows based on the region column with the WHERE clause." +}, { + "id": "4600", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the chemical products with no production rate?", + "sql_context": "CREATE TABLE product_production (product_id INT, product_name VARCHAR(30), production_rate INT); INSERT INTO product_production (product_id, product_name, production_rate) VALUES (1, \u0027Chemical A\u0027, 1000), (2, \u0027Chemical B\u0027, 2500), (3, \u0027Chemical C\u0027, NULL);", + "sql": "SELECT product_name FROM product_production WHERE production_rate IS NULL;", + "sql_explanation": "The SQL query identifies the chemical products with no production rate by selecting the product_name values from the product_production table where the production_rate column is null." +}, { + "id": "4933", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records in the \"product_innovation\" table where the \"innovation_date\" is after December 31, 2022.", + "sql_context": "CREATE TABLE product_innovation (innovation_id int,innovation_date date,innovation_description varchar(255),impact_level varchar(50));", + "sql": "DELETE FROM product_innovation WHERE innovation_date \u003e \u00272022-12-31\u0027;", + "sql_explanation": "This SQL query deletes records from the \"product_innovation\" table where the \"innovation_date\" is after December 31, 2022." +}, { + "id": "5351", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names and quantities of all chemical substances produced by the \u0027Manufacturing Plant A\u0027?", + "sql_context": "CREATE TABLE Manufacturing_Plant_A (Substance_Name VARCHAR(255), Quantity INT); INSERT INTO Manufacturing_Plant_A (Substance_Name, Quantity) VALUES (\u0027Ethanol\u0027, 500), (\u0027Methanol\u0027, 300), (\u0027Propanol\u0027, 200);", + "sql": "SELECT Substance_Name, Quantity FROM Manufacturing_Plant_A;", + "sql_explanation": "The SQL query selects the names and quantities of all chemical substances produced by \u0027Manufacturing Plant A\u0027 from the Manufacturing_Plant_A table." +}, { + "id": "5630", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the highest production rate for a single compound in any factory?", + "sql_context": "CREATE TABLE factories (id INT, name VARCHAR(255)); CREATE TABLE production_rates (factory_id INT, compound_name VARCHAR(255), production_rate INT); INSERT INTO factories (id, name) VALUES (1, \u0027Factory A\u0027), (2, \u0027Factory B\u0027), (3, \u0027Factory C\u0027); INSERT INTO production_rates (factory_id, compound_name, production_rate) VALUES (1, \u0027Compound X\u0027, 200), (1, \u0027Compound Y\u0027, 180), (2, \u0027Compound X\u0027, 250), (2, \u0027Compound Y\u0027, 220), (3, \u0027Compound X\u0027, 300), (3, \u0027Compound Y\u0027, 280);", + "sql": "SELECT MAX(production_rate) FROM production_rates;", + "sql_explanation": "This query calculates the highest production rate for a single compound in any factory by filtering rows where the production_rate is the maximum value using the MAX() function." +}, { + "id": "5649", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show total safety stock for all chemicals in \u0027chemical_inventory\u0027 table", + "sql_context": "CREATE TABLE chemical_inventory (id INT, chemical_name VARCHAR(50), safety_stock INT);", + "sql": "SELECT SUM(safety_stock) FROM chemical_inventory;", + "sql_explanation": "This query calculates and displays the total safety_stock for all chemicals in the chemical_inventory table." +}, { + "id": "1908", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of ingredients used in products that are not certified as cruelty-free and are sourced from \u0027Large Scale Producers\u0027?", + "sql_context": "CREATE TABLE product_ingredients_lsp (product_name VARCHAR(50), ingredient VARCHAR(50), ingredient_source VARCHAR(50), is_cruelty_free BOOLEAN); INSERT INTO product_ingredients_lsp (product_name, ingredient, ingredient_source, is_cruelty_free) VALUES (\u0027Clean Slate\u0027, \u0027Water\u0027, \u0027Organic Farms\u0027, true), (\u0027Clean Slate\u0027, \u0027Mineral Powder\u0027, \u0027Organic Farms\u0027, true), (\u0027Clean Slate\u0027, \u0027Water\u0027, \u0027Large Scale Producers\u0027, false), (\u0027Eye Have You\u0027, \u0027Water\u0027, \u0027Large Scale Producers\u0027, false), (\u0027Eye Have You\u0027, \u0027Mineral Powder\u0027, \u0027Large Scale Producers\u0027, false);", + "sql": "SELECT COUNT(DISTINCT ingredient) FROM product_ingredients_lsp WHERE is_cruelty_free \u003d false AND ingredient_source \u003d \u0027Large Scale Producers\u0027;", + "sql_explanation": "This SQL query counts the number of distinct ingredients used in products that are not certified as cruelty-free and are sourced from \u0027Large Scale Producers\u0027. It filters the results to only include rows where the is_cruelty_free is false and ingredient_source is \u0027Large Scale Producers\u0027. This results in the total number of ingredients used in products that are not certified as cruelty-free and are sourced from \u0027Large Scale Producers\u0027." +}, { + "id": "2064", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average cruelty-free certification date for products certified by Leaping Bunny.", + "sql_context": "CREATE TABLE CrueltyFreeCertification (id INT, product_id INT, certification_date DATE, certification_authority VARCHAR(255)); INSERT INTO CrueltyFreeCertification (id, product_id, certification_date, certification_authority) VALUES (1, 1, \u00272022-03-01\u0027, \u0027Leaping Bunny\u0027), (2, 2, \u00272021-09-15\u0027, \u0027Leaping Bunny\u0027), (3, 3, \u00272022-06-05\u0027, \u0027Choose Cruelty Free\u0027);", + "sql": "SELECT AVG(certification_date) as avg_certification_date FROM CrueltyFreeCertification WHERE certification_authority \u003d \u0027Leaping Bunny\u0027;", + "sql_explanation": "This query calculates the average cruelty-free certification date for products certified by Leaping Bunny. It uses the AVG function to find the average certification date, and filters the results to only include rows with a certification authority of Leaping Bunny." +}, { + "id": "2248", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 5 cruelty-free cosmetic products with the highest consumer preference ratings?", + "sql_context": "CREATE TABLE cosmetics (product_id INT, product_name TEXT, cruelty_free BOOLEAN, consumer_rating FLOAT); INSERT INTO cosmetics VALUES (1, \u0027Lipstick A\u0027, true, 4.6), (2, \u0027Foundation B\u0027, false, 4.3), (3, \u0027Mascara C\u0027, true, 4.7), (4, \u0027Eyeshadow D\u0027, true, 4.5), (5, \u0027Blush E\u0027, false, 4.4);", + "sql": "SELECT product_name, cruelty_free, consumer_rating FROM cosmetics WHERE cruelty_free \u003d true ORDER BY consumer_rating DESC LIMIT 5;", + "sql_explanation": "The SQL query selects the product_name, cruelty_free, and consumer_rating columns from the cosmetics table, filters the rows where the cruelty_free column is true, orders the result by consumer_rating in descending order, and limits the output to the top 5 records." +}, { + "id": "2514", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average report age for products that have a safety rating of \u0027Excellent\u0027?", + "sql_context": "CREATE TABLE Product (id INT, productName VARCHAR(50), price DECIMAL(5,2)); INSERT INTO Product (id, productName, price) VALUES (4, \u0027Blush\u0027, 14.99), (5, \u0027Foundation\u0027, 29.99), (6, \u0027Lip Liner\u0027, 16.99); CREATE TABLE SafetyRecord (id INT, productId INT, safetyRating VARCHAR(10), reportDate DATE); INSERT INTO SafetyRecord (id, productId, safetyRating, reportDate) VALUES (5, 4, \u0027Excellent\u0027, \u00272021-04-01\u0027), (6, 5, \u0027Good\u0027, \u00272021-05-01\u0027), (7, 6, \u0027Excellent\u0027, \u00272021-06-01\u0027);", + "sql": "SELECT AVG(DATEDIFF(day, S.reportDate, GETDATE())) as avgReportAge FROM SafetyRecord S WHERE S.safetyRating \u003d \u0027Excellent\u0027;", + "sql_explanation": "This query calculates the average number of days since the last safety report for products that have a safety rating of \u0027Excellent\u0027 in the SafetyRecord table." +}, { + "id": "2681", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many cruelty-free cosmetic products were launched in the first half of 2020?", + "sql_context": "CREATE TABLE products (product_id INT, product_name VARCHAR(100), is_cruelty_free BOOLEAN, region VARCHAR(50), sales INT, launch_year INT, launch_quarter INT); INSERT INTO products (product_id, product_name, is_cruelty_free, region, sales, launch_year, launch_quarter) VALUES (1, \u0027Lipstick\u0027, true, \u0027Canada\u0027, 500, 2020, 1), (2, \u0027Mascara\u0027, false, \u0027Canada\u0027, 700, 2019, 4), (3, \u0027Foundation\u0027, true, \u0027USA\u0027, 800, 2020, 2), (4, \u0027Eyeshadow\u0027, true, \u0027USA\u0027, 600, 2019, 3), (5, \u0027Blush\u0027, false, \u0027Canada\u0027, 400, 2020, 1);", + "sql": "SELECT COUNT(*) FROM products WHERE is_cruelty_free \u003d true AND launch_year \u003d 2020 AND launch_quarter BETWEEN 1 AND 2;", + "sql_explanation": "This query calculates the number of cruelty-free cosmetic products launched in the first half of 2020 by counting the number of records that meet the specified conditions." +}, { + "id": "2691", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all ingredients sourced from France for cosmetic products launched in 2020 with a \u0027natural\u0027 label.", + "sql_context": "CREATE TABLE ingredients (ingredient_id INT, product_id INT, ingredient_name VARCHAR(100), source_country VARCHAR(50), launch_year INT, label VARCHAR(50)); INSERT INTO ingredients (ingredient_id, product_id, ingredient_name, source_country, launch_year, label) VALUES (1, 1, \u0027Beeswax\u0027, \u0027France\u0027, 2020, \u0027natural\u0027), (2, 2, \u0027Water\u0027, \u0027Canada\u0027, 2019, \u0027organic\u0027), (3, 3, \u0027Coconut Oil\u0027, \u0027Sri Lanka\u0027, 2020, \u0027natural\u0027), (4, 4, \u0027Shea Butter\u0027, \u0027Ghana\u0027, 2018, \u0027natural\u0027), (5, 5, \u0027Jojoba Oil\u0027, \u0027Argentina\u0027, 2020, \u0027natural\u0027);", + "sql": "SELECT ingredient_name FROM ingredients WHERE source_country \u003d \u0027France\u0027 AND launch_year \u003d 2020 AND label \u003d \u0027natural\u0027;", + "sql_explanation": "This query selects the ingredient_name from the ingredients table, filters for ingredients sourced from France, launched in 2020, and labeled as \u0027natural\u0027." +}, { + "id": "3008", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total price of cruelty-free skin care products?", + "sql_context": "CREATE TABLE Products (id INT, name VARCHAR(50), category VARCHAR(50), price DECIMAL(5,2), cruelty_free BOOLEAN); INSERT INTO Products (id, name, category, price, cruelty_free) VALUES (1, \u0027Hydrating Cleanser\u0027, \u0027Skin Care\u0027, 12.99, true), (2, \u0027Vitamin C Serum\u0027, \u0027Skin Care\u0027, 39.99, false), (3, \u0027Tinted Moisturizer\u0027, \u0027Makeup\u0027, 24.99, true);", + "sql": "SELECT SUM(p.price) as total_price FROM Products p WHERE p.category \u003d \u0027Skin Care\u0027 AND p.cruelty_free \u003d true;", + "sql_explanation": "This query calculates the total price of cruelty-free skin care products by filtering the data based on the category and cruelty-free status using the WHERE clause." +}, { + "id": "3228", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update \u0027safety_rating\u0027 to \u00273\u0027 for all records in \u0027safety_records\u0027 table where \u0027record_date\u0027 is in \u00272019\u0027", + "sql_context": "CREATE TABLE safety_records (record_id INT PRIMARY KEY, record_date DATE, safety_rating INT);", + "sql": "UPDATE safety_records SET safety_rating \u003d \u00273\u0027 WHERE record_date BETWEEN \u00272019-01-01\u0027 AND \u00272019-12-31\u0027;", + "sql_explanation": "* This query updates the \u0027safety_rating\u0027 to \u00273\u0027 for all records in \u0027safety_records\u0027 table where the \u0027record_date\u0027 is in \u00272019\u0027 by using the UPDATE statement, specifying the table name and the new value to set, and the WHERE clause to filter the records to update." +}, { + "id": "3341", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many products are sourced from fair trade suppliers in the African market?", + "sql_context": "CREATE TABLE products (product_id INT, product_name VARCHAR(255), region VARCHAR(255), fair_trade BOOLEAN); INSERT INTO products (product_id, product_name, region, fair_trade) VALUES (1, \u0027Nourishing Cream\u0027, \u0027Asia Pacific\u0027, false), (2, \u0027Revitalizing Serum\u0027, \u0027Europe\u0027, false), (3, \u0027Gentle Cleanser\u0027, \u0027Asia Pacific\u0027, true), (4, \u0027Hydrating Lotion\u0027, \u0027North America\u0027, true), (5, \u0027Soothing Toner\u0027, \u0027Asia Pacific\u0027, true), (6, \u0027Brightening Essence\u0027, \u0027Europe\u0027, false), (7, \u0027Rejuvenating Mask\u0027, \u0027Africa\u0027, true), (8, \u0027Nourishing Lotion\u0027, \u0027Africa\u0027, true), (9, \u0027Revitalizing Shampoo\u0027, \u0027Africa\u0027, true);", + "sql": "SELECT COUNT(*) AS fair_trade_products FROM products WHERE region \u003d \u0027Africa\u0027 AND fair_trade \u003d true;", + "sql_explanation": "The query counts the number of products sourced from fair trade suppliers in the African market by counting the number of products in the African market that are sourced from fair trade suppliers." +}, { + "id": "4005", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum price of organic cosmetics sourced from Italy?", + "sql_context": "CREATE TABLE products (product_id INT, name TEXT, is_organic BOOLEAN, price DECIMAL, source_country TEXT); INSERT INTO products (product_id, name, is_organic, price, source_country) VALUES (1, \u0027Lipstick\u0027, TRUE, 20.99, \u0027Italy\u0027); INSERT INTO products (product_id, name, is_organic, price, source_country) VALUES (2, \u0027Eye Shadow\u0027, FALSE, 18.49, \u0027Spain\u0027);", + "sql": "SELECT MAX(price) FROM products WHERE is_organic \u003d TRUE AND source_country \u003d \u0027Italy\u0027;", + "sql_explanation": "The SQL query finds the maximum price of organic cosmetics sourced from Italy by using the MAX aggregation function. It filters the rows where is_organic is TRUE and source_country is \u0027Italy\u0027." +}, { + "id": "4019", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many consumers have made a purchase in the last month?", + "sql_context": "CREATE TABLE consumer (consumer_id INT, name TEXT); CREATE TABLE purchase (purchase_id INT, consumer_id INT, purchase_date DATE); INSERT INTO consumer VALUES (1, \u0027Alice\u0027), (2, \u0027Bob\u0027), (3, \u0027Charlie\u0027); INSERT INTO purchase VALUES (1, 1, \u00272022-01-10\u0027), (2, 2, \u00272022-01-15\u0027), (3, 3, \u00272022-02-05\u0027);", + "sql": "SELECT COUNT(DISTINCT consumer_id) FROM purchase WHERE purchase_date \u003e\u003d \u00272022-02-01\u0027;", + "sql_explanation": "This query calculates the number of consumers who have made a purchase in the last month. It does so by selecting the count of distinct consumer_ids from the purchase table, and filtering for rows where the purchase_date is within the last month." +}, { + "id": "4070", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names of all skincare products that are not certified organic?", + "sql_context": "CREATE TABLE products (id INT, name VARCHAR(255), category VARCHAR(255), price DECIMAL(5,2), certified_organic BOOLEAN); INSERT INTO products (id, name, category, price, certified_organic) VALUES (1, \u0027Cleanser\u0027, \u0027skincare\u0027, 19.99, true), (2, \u0027Toner\u0027, \u0027skincare\u0027, 24.99, false), (3, \u0027Moisturizer\u0027, \u0027skincare\u0027, 34.99, true);", + "sql": "SELECT name FROM products WHERE category \u003d \u0027skincare\u0027 AND certified_organic \u003d false;", + "sql_explanation": "Find the names of all skincare products that are not certified organic by filtering the products table where category is \u0027skincare\u0027 and certified_organic is false, then returning the name field." +}, { + "id": "4342", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum price of organic skincare products in France?", + "sql_context": "CREATE TABLE skincare (id INT, name TEXT, price DECIMAL, is_organic BOOLEAN, country TEXT); INSERT INTO skincare (id, name, price, is_organic, country) VALUES (1, \u0027Cleanser\u0027, 19.99, true, \u0027France\u0027); INSERT INTO skincare (id, name, price, is_organic, country) VALUES (2, \u0027Toner\u0027, 15.99, true, \u0027France\u0027); INSERT INTO skincare (id, name, price, is_organic, country) VALUES (3, \u0027Moisturizer\u0027, 29.99, true, \u0027France\u0027);", + "sql": "SELECT MAX(price) FROM skincare WHERE is_organic \u003d true AND country \u003d \u0027France\u0027;", + "sql_explanation": "This SQL query calculates the maximum price of organic skincare products sold in France by filtering the skincare table to only include organic products sold in France and then calculating the maximum price of those products." +}, { + "id": "4440", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average price of vegan cosmetics sold in Canada?", + "sql_context": "CREATE TABLE products (product_id INT, name VARCHAR(100), is_vegan BOOLEAN, price DECIMAL(5,2), country VARCHAR(50)); INSERT INTO products (product_id, name, is_vegan, price, country) VALUES (1, \u0027Lipstick\u0027, true, 25.99, \u0027Canada\u0027); INSERT INTO products (product_id, name, is_vegan, price, country) VALUES (2, \u0027Foundation\u0027, false, 34.99, \u0027Canada\u0027);", + "sql": "SELECT AVG(price) FROM products WHERE is_vegan \u003d true AND country \u003d \u0027Canada\u0027;", + "sql_explanation": "This query calculates the average price of vegan cosmetics by filtering the products table for vegan items sold in Canada and then calculating the average price using the AVG function." +}, { + "id": "5281", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average price of products with a cruelty-free certification?", + "sql_context": "CREATE TABLE products (product_id INT, price DECIMAL(5,2), is_cruelty_free BOOLEAN); INSERT INTO products (product_id, price, is_cruelty_free) VALUES (1, 25.99, true), (2, 30.99, false), (3, 15.99, true);", + "sql": "SELECT AVG(price) FROM products WHERE is_cruelty_free \u003d true;", + "sql_explanation": "This SQL query calculates the average price of products that have a cruelty-free certification (is_cruelty_free \u003d true) by using the AVG function on the price column." +}, { + "id": "5343", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records from the \u0027safety_records\u0027 table where \u0027record_date\u0027 is before \u00272019-01-01\u0027", + "sql_context": "CREATE TABLE safety_records (record_id INT PRIMARY KEY, record_date DATE, safety_rating INT);", + "sql": "DELETE FROM safety_records WHERE record_date \u003c \u00272019-01-01\u0027;", + "sql_explanation": "* This query deletes all records from the \u0027safety_records\u0027 table where the \u0027record_date\u0027 is before \u00272019-01-01\u0027 by using the DELETE statement and specifying the conditions in the WHERE clause." +}, { + "id": "5388", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total price of products in the Organic segment?", + "sql_context": "CREATE TABLE products (product_id INT, segment VARCHAR(20), price DECIMAL(5,2)); INSERT INTO products (product_id, segment, price) VALUES (1, \u0027Natural\u0027, 15.99), (2, \u0027Organic\u0027, 20.99), (3, \u0027Natural\u0027, 12.49);", + "sql": "SELECT SUM(price) FROM products WHERE segment \u003d \u0027Organic\u0027;", + "sql_explanation": "The SQL query calculates the total price of products in the \u0027Organic\u0027 segment by using the SUM function on the \u0027price\u0027 column, filtering for rows with \u0027Organic\u0027 in the \u0027segment\u0027 column." +}, { + "id": "5459", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of cruelty-free certified products?", + "sql_context": "CREATE TABLE products (product_id INT PRIMARY KEY, cruelty_free BOOLEAN); INSERT INTO products (product_id, cruelty_free) VALUES (1, true), (2, true), (3, false), (4, true);", + "sql": "SELECT COUNT(*) FROM products WHERE cruelty_free \u003d true;", + "sql_explanation": "This query counts the total number of records in the products table where the cruelty_free column is true, returning the total number of cruelty-free certified products." +}, { + "id": "5522", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of products certified by PETA?", + "sql_context": "CREATE TABLE Certifications (ProductID INT, PETA BOOLEAN); INSERT INTO Certifications (ProductID, PETA) VALUES (1, TRUE), (2, FALSE), (3, TRUE), (4, FALSE), (5, TRUE);", + "sql": "SELECT COUNT(*) FROM Certifications WHERE PETA \u003d TRUE;", + "sql_explanation": "The SQL query counts the number of records in the Certifications table where PETA is set to TRUE." +}, { + "id": "5603", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records from the \u0027brands\u0027 table where \u0027brand_country\u0027 is \u0027Germany\u0027", + "sql_context": "CREATE TABLE brands (brand_id INT PRIMARY KEY, brand_name VARCHAR(255), brand_country VARCHAR(100));", + "sql": "DELETE FROM brands WHERE brand_country \u003d \u0027Germany\u0027;", + "sql_explanation": "* This query deletes all records from the \u0027brands\u0027 table where the \u0027brand_country\u0027 is \u0027Germany\u0027 by using the DELETE statement and specifying the conditions in the WHERE clause." +}, { + "id": "5832", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the total sales of cosmetic products in the database?", + "sql_context": "CREATE TABLE products (product_id INT, product_name VARCHAR(100), sales INT); INSERT INTO products VALUES (1, \u0027Mascara\u0027, 5000), (2, \u0027Lipstick\u0027, 7000), (3, \u0027Foundation\u0027, 6000);", + "sql": "SELECT SUM(sales) FROM products;", + "sql_explanation": "The query calculates the sum of the \u0027sales\u0027 column in the products table." +}, { + "id": "1490", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which open pedagogy initiatives have the lowest student engagement in the Humanities?", + "sql_context": "CREATE TABLE open_pedagogy (initiative_id INT, subject_area VARCHAR(50), student_engagement INT); INSERT INTO open_pedagogy (initiative_id, subject_area, student_engagement) VALUES (1, \u0027Mathematics\u0027, 25), (2, \u0027Science\u0027, 30), (3, \u0027History\u0027, 35), (4, \u0027Arts\u0027, 50), (5, \u0027English\u0027, 40), (6, \u0027Philosophy\u0027, 20), (7, \u0027Literature\u0027, 22);", + "sql": "SELECT subject_area, student_engagement FROM open_pedagogy WHERE subject_area \u003d \u0027Philosophy\u0027 OR subject_area \u003d \u0027Literature\u0027 ORDER BY student_engagement ASC;", + "sql_explanation": "The query selects the subject area and student engagement for Philosophy and Literature and orders by student engagement in ascending order, returning all rows." +}, { + "id": "1922", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of students who have completed lifelong learning programs?", + "sql_context": "CREATE TABLE students (id INT, name VARCHAR(255), num_lifelong_learning_programs INT); CREATE TABLE lifelong_learning_programs (id INT, name VARCHAR(255), num_students INT); INSERT INTO students (id, name, num_lifelong_learning_programs) VALUES (1, \u0027Student A\u0027, 2), (2, \u0027Student B\u0027, 1), (3, \u0027Student C\u0027, 0); INSERT INTO lifelong_learning_programs (id, name, num_students) VALUES (1, \u0027Program 1\u0027, 3), (2, \u0027Program 2\u0027, 2);", + "sql": "SELECT 100.0 * SUM(CASE WHEN s.num_lifelong_learning_programs \u003e 0 THEN 1 ELSE 0 END) / COUNT(s.id) AS pct_completed_programs FROM students s;", + "sql_explanation": "Count the number of students who have completed at least one lifelong learning program, and divide by the total number of students to find the percentage." +}, { + "id": "2097", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of students in the mental health program who have had exactly 2 absences in the past month?", + "sql_context": "CREATE TABLE students (id INT, name VARCHAR(50), program VARCHAR(50), absences INT, last_visit DATE);", + "sql": "SELECT COUNT(*) FROM students WHERE program \u003d \u0027mental health\u0027 AND absences \u003d 2 AND last_visit \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 1 MONTH);", + "sql_explanation": "This query counts the number of students in the mental health program who have had exactly 2 absences in the past month. It does this by filtering the students table for rows where the program is mental health, absences is 2, and last_visit is within the past month, and then counting the number of rows that meet those criteria." +}, { + "id": "2581", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of professional development hours for teachers who identify as Asian or Pacific Islander?", + "sql_context": "CREATE TABLE teachers (id INT PRIMARY KEY, name VARCHAR(50), professional_development_hours INT, gender VARCHAR(50), race VARCHAR(50));", + "sql": "SELECT AVG(t.professional_development_hours) as avg_hours FROM teachers t WHERE t.race IN (\u0027Asian\u0027, \u0027Pacific Islander\u0027);", + "sql_explanation": "Calculating the average number of professional development hours for teachers who identify as Asian or Pacific Islander by filtering the teachers table by race and calculating the average professional development hours." +}, { + "id": "2613", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new student record into the \u0027Students\u0027 table", + "sql_context": "CREATE TABLE Students (StudentID int, FirstName varchar(20), LastName varchar(20), Age int, Gender varchar(10), Grade int);", + "sql": "INSERT INTO Students (StudentID, FirstName, LastName, Age, Gender, Grade) VALUES (1234, \u0027John\u0027, \u0027Doe\u0027, 16, \u0027Male\u0027, 11);", + "sql_explanation": "This query inserts a new record into the \u0027Students\u0027 table for a student named John Doe who is 16 years old, Male, and in the 11th grade." +}, { + "id": "3243", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of students who passed the mental health screening?", + "sql_context": "CREATE TABLE MentalHealthScreening (StudentID INT, Age INT, Gender VARCHAR(10), PassedScreening BOOLEAN); INSERT INTO MentalHealthScreening (StudentID, Age, Gender, PassedScreening) VALUES (1, 22, \u0027Male\u0027, true); INSERT INTO MentalHealthScreening (StudentID, Age, Gender, PassedScreening) VALUES (2, 20, \u0027Female\u0027, false); INSERT INTO MentalHealthScreening (StudentID, Age, Gender, PassedScreening) VALUES (3, 25, \u0027Male\u0027, true);", + "sql": "SELECT (COUNT(*) FILTER (WHERE PassedScreening \u003d true)) * 100.0 / COUNT(*) FROM MentalHealthScreening;", + "sql_explanation": "The SQL query calculates the percentage of students who passed the mental health screening by filtering the MentalHealthScreening table based on the PassedScreening column and then calculating the percentage using the COUNT function with the FILTER clause." +}, { + "id": "3438", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of hours spent on professional development by a single teacher in the last year?", + "sql_context": "CREATE TABLE TeacherProfessionalDevelopment (TeacherID INT, Date DATE, Hours INT); INSERT INTO TeacherProfessionalDevelopment (TeacherID, Date, Hours) VALUES (1, \u00272021-12-15\u0027, 10);", + "sql": "SELECT MAX(Hours) FROM TeacherProfessionalDevelopment WHERE Date \u003e\u003d DATEADD(year, -1, GETDATE());", + "sql_explanation": "The SQL query finds the maximum number of hours spent on professional development by a single teacher in the last year. It uses the MAX function to find the maximum value of the Hours column for rows where Date is within the last year, as determined by the DATEADD function." +}, { + "id": "3651", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the teachers that have not taken any professional development courses and have more than 5 years of experience?", + "sql_context": "CREATE TABLE Teachers (TeacherID INT, Name VARCHAR(50), ProfessionalDevelopmentCourse INT, YearsOfExperience INT); INSERT INTO Teachers (TeacherID, Name, ProfessionalDevelopmentCourse, YearsOfExperience) VALUES (5, \u0027Ava Red\u0027, 0, 6); INSERT INTO Teachers (TeacherID, Name, ProfessionalDevelopmentCourse, YearsOfExperience) VALUES (6, \u0027Benjamin Orange\u0027, 0, 3);", + "sql": "SELECT Name FROM Teachers WHERE ProfessionalDevelopmentCourse \u003d 0 AND YearsOfExperience \u003e 5;", + "sql_explanation": "This query returns the names of teachers who have not taken any professional development courses and have more than 5 years of experience. It uses the AND operator to combine the conditions in the WHERE clause." +}, { + "id": "3697", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of teachers who have completed a professional development course in the past year?", + "sql_context": "CREATE TABLE teachers (id INT, name VARCHAR(50), age INT, last_pd_course DATE);", + "sql": "SELECT AVG(age) FROM teachers WHERE last_pd_course \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 1 YEAR);", + "sql_explanation": "This query calculates the average age of teachers who have completed a professional development course in the past year. It does this by filtering the teachers table for rows where the last_pd_course is within the last year, and then calculating the average age of those rows." +}, { + "id": "3965", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many professional development courses were completed by teachers in the \"Eastwood\" school last year?", + "sql_context": "CREATE TABLE teachers (teacher_id INT, school VARCHAR(20), courses_completed INT, year INT); INSERT INTO teachers (teacher_id, school, courses_completed, year) VALUES (1, \u0027Eastwood\u0027, 12, 2021), (2, \u0027Eastwood\u0027, 15, 2021), (3, \u0027Westwood\u0027, 10, 2021);", + "sql": "SELECT SUM(courses_completed) FROM teachers WHERE school \u003d \u0027Eastwood\u0027 AND year \u003d 2021;", + "sql_explanation": "The SQL query calculates the total number of professional development courses completed by teachers in the \"Eastwood\" school last year by using the SUM function on the courses_completed column, and filtering the data by the school and year columns with the values \u0027Eastwood\u0027 and 2021." +}, { + "id": "4218", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of students in a classroom for teachers in the \u0027ClassroomSize\u0027 table who teach in urban schools?", + "sql_context": "CREATE TABLE ClassroomSize (id INT, name TEXT, school_type TEXT, students_in_classroom INT); INSERT INTO ClassroomSize (id, name, school_type, students_in_classroom) VALUES (1, \u0027Pam\u0027, \u0027Suburban\u0027, 28), (2, \u0027Sam\u0027, \u0027Urban\u0027, 35), (3, \u0027Terry\u0027, \u0027Rural\u0027, 25);", + "sql": "SELECT MAX(students_in_classroom) FROM ClassroomSize WHERE school_type \u003d \u0027Urban\u0027;", + "sql_explanation": "Filter the ClassroomSize table to only show teachers who teach in urban schools, then find the maximum number of students in a classroom for this filtered set." +}, { + "id": "4257", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many students have experienced mental health issues in \u0027OpenMindedSchool\u0027 district in the last year?", + "sql_context": "CREATE TABLE Student (StudentID INT, District VARCHAR(20)); CREATE TABLE MentalHealth (StudentID INT, Issue DATE); INSERT INTO Student (StudentID, District) VALUES (1, \u0027OpenMindedSchool\u0027); INSERT INTO Student (StudentID, District) VALUES (2, \u0027ClosedMindedSchool\u0027); INSERT INTO MentalHealth (StudentID, Issue) VALUES (1, \u00272022-01-01\u0027); CREATE VIEW StudentMentalHealthView AS SELECT * FROM Student s JOIN MentalHealth m ON s.StudentID \u003d m.StudentID WHERE m.Issue \u003e\u003d DATE(CURRENT_DATE) - 365;", + "sql": "SELECT COUNT(*) FROM StudentMentalHealthView WHERE District \u003d \u0027OpenMindedSchool\u0027;", + "sql_explanation": "The SQL query calculates the total number of students who have experienced mental health issues in \u0027OpenMindedSchool\u0027 district in the last year by selecting the count of all records in the \u0027StudentMentalHealthView\u0027 view that have \u0027OpenMindedSchool\u0027 as the district value in the \u0027Student\u0027 table and have a corresponding record in the \u0027MentalHealth\u0027 table with an issue date within the last year." +}, { + "id": "4424", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a table for student mental health data", + "sql_context": "CREATE TABLE student_mental_health (student_id INT, mental_health_score INT);", + "sql": "CREATE TABLE student_mental_health (student_id INT, mental_health_score INT);", + "sql_explanation": "This SQL statement creates a table called \u0027student_mental_health\u0027 with two columns: \u0027student_id\u0027 and \u0027mental_health_score\u0027, both of integer type." +}, { + "id": "4544", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the names and dates of all lifelong learning events taking place in 2023.", + "sql_context": "CREATE TABLE lifelong_learning_events (id INT, name VARCHAR(255), date DATE); INSERT INTO lifelong_learning_events (id, name, date) VALUES (1, \u0027Python Programming Workshop\u0027, \u00272023-06-01\u0027);", + "sql": "SELECT name, date FROM lifelong_learning_events WHERE date \u003e\u003d \u00272023-01-01\u0027;", + "sql_explanation": "This query selects the name and date columns from the lifelong_learning_events table and filters the results to only include events taking place in 2023." +}, { + "id": "4667", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many students participated in open pedagogy projects in \u0027Fall 2020\u0027?", + "sql_context": "CREATE TABLE open_pedagogy (student_id INT, project_title VARCHAR(100), semester VARCHAR(10)); INSERT INTO open_pedagogy (student_id, project_title, semester) VALUES (1, \u0027Open Source Software Development\u0027, \u0027Fall 2020\u0027), (2, \u0027Digital Storytelling\u0027, \u0027Fall 2020\u0027), (3, \u0027Data Visualization for Social Change\u0027, \u0027Fall 2020\u0027), (4, \u0027Citizen Science and Community Engagement\u0027, \u0027Fall 2020\u0027);", + "sql": "SELECT COUNT(student_id) FROM open_pedagogy WHERE semester \u003d \u0027Fall 2020\u0027;", + "sql_explanation": "This SQL query counts the number of students who participated in open pedagogy projects during the \u0027Fall 2020\u0027 semester by selecting all student_id values from the \u0027open_pedagogy\u0027 table where the semester is \u0027Fall 2020\u0027, and then counting the number of selected values." +}, { + "id": "4668", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List open pedagogy courses with enrollment lower than 200", + "sql_context": "CREATE TABLE courses (id INT, name VARCHAR(50), open_pedagogy BOOLEAN, enrollment INT); INSERT INTO courses (id, name, open_pedagogy, enrollment) VALUES (1, \u0027Data Science\u0027, true, 150);", + "sql": "SELECT name FROM courses WHERE open_pedagogy \u003d true AND enrollment \u003c 200;", + "sql_explanation": "This query selects the names of all courses that use open pedagogy and have an enrollment lower than 200." +}, { + "id": "4726", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the unique professional development courses attended by teachers in \u0027New York\u0027?", + "sql_context": "CREATE TABLE teacher_pd (teacher_id INT, course_name VARCHAR(50), location VARCHAR(20)); INSERT INTO teacher_pd (teacher_id, course_name, location) VALUES (101, \u0027Python for Educators\u0027, \u0027New York\u0027), (102, \u0027Data Science for Teachers\u0027, \u0027Chicago\u0027), (103, \u0027Open Pedagogy\u0027, \u0027New York\u0027);", + "sql": "SELECT DISTINCT course_name FROM teacher_pd WHERE location \u003d \u0027New York\u0027;", + "sql_explanation": "The query selects and returns distinct course names attended by teachers in \u0027New York\u0027." +}, { + "id": "4997", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of hours spent on open pedagogy projects by students in the \u0027Science\u0027 department?", + "sql_context": "CREATE TABLE projects (id INT, project_name TEXT, department TEXT, hours_spent INT); INSERT INTO projects (id, project_name, department, hours_spent) VALUES (1, \u0027Climate Change Experiment\u0027, \u0027Science\u0027, 20), (2, \u0027Renewable Energy Research\u0027, \u0027Science\u0027, 30), (3, \u0027Biology Lab Report\u0027, \u0027Science\u0027, 15);", + "sql": "SELECT SUM(hours_spent) FROM projects WHERE department \u003d \u0027Science\u0027;", + "sql_explanation": "This query calculates the total number of hours spent on open pedagogy projects by students in the \u0027Science\u0027 department. It uses the SUM() function to calculate the total number of hours spent on projects and the WHERE clause to filter the results to only include projects completed by students in the \u0027Science\u0027 department." +}, { + "id": "5115", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "Show all students with \u0027mental_health_concerns\u0027 in the \u0027students\u0027 table", + "sql_context": "CREATE TABLE students (student_id INT, name VARCHAR(50), mental_health_concerns VARCHAR(20));", + "sql": "SELECT * FROM students WHERE mental_health_concerns IS NOT NULL;", + "sql_explanation": "This SQL query shows all students with \u0027mental_health_concerns\u0027 in the \u0027students\u0027 table by using the WHERE clause." +}, { + "id": "5430", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of hours spent in professional development workshops in 2022?", + "sql_context": "CREATE TABLE workshops (workshop_id INT, year INT, hours_spent INT); INSERT INTO workshops (workshop_id, year, hours_spent) VALUES (1, 2022, 3), (2, 2022, 4), (3, 2022, 5);", + "sql": "SELECT SUM(hours_spent) FROM workshops WHERE year \u003d 2022;", + "sql_explanation": "The SQL query calculates the sum of \u0027hours_spent\u0027 in the \u0027workshops\u0027 table for workshops in 2022." +}, { + "id": "5464", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of students and teachers who have ever participated in professional development programs, regardless of completion status, in the education schema?", + "sql_context": "CREATE SCHEMA education;CREATE TABLE professional_development (id INT, role VARCHAR(10), name VARCHAR(50), completed BOOLEAN);INSERT INTO professional_development (id, role, name, completed) VALUES (1, \u0027student\u0027, \u0027John Doe\u0027, FALSE), (2, \u0027teacher\u0027, \u0027Jane Smith\u0027, TRUE);", + "sql": "SELECT COUNT(*) FROM education.professional_development;", + "sql_explanation": "This query counts all rows in the professional_development table, providing the total number of students and teachers who have ever participated in professional development programs." +}, { + "id": "5770", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete the record of the student with ID 3 from \u0027Student\u0027 table, if such a record exists.", + "sql_context": "CREATE TABLE Student (StudentID INT, District VARCHAR(20)); INSERT INTO Student (StudentID, District) VALUES (1, \u0027OpenSchool\u0027); INSERT INTO Student (StudentID, District) VALUES (2, \u0027ClosedSchool\u0027); CREATE TABLE MentalHealth (StudentID INT, Issue DATE); INSERT INTO MentalHealth (StudentID, Issue) VALUES (1, \u00272020-01-01\u0027); INSERT INTO MentalHealth (StudentID, Issue) VALUES (2, \u00272019-01-01\u0027);", + "sql": "DELETE FROM Student WHERE StudentID \u003d 3;", + "sql_explanation": "The SQL query deletes the record of the student with ID 3 from the \u0027Student\u0027 table, if such a record exists, by specifying the DELETE statement with the WHERE clause that filters the record based on the StudentID." +}, { + "id": "1461", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many textile artworks were created by South Asian artists between 1950 and 1999?", + "sql_context": "CREATE TABLE Artworks (id INT, name TEXT, artist TEXT, year INT, price FLOAT, country TEXT, category TEXT, is_textile BOOLEAN); INSERT INTO Artworks (id, name, artist, year, price, country, category, is_textile) VALUES (1, \u0027Textile1\u0027, \u0027SouthAsianArtist1\u0027, 1960, 1500.00, \u0027India\u0027, \u0027textile\u0027, true), (2, \u0027Painting2\u0027, \u0027AmericanArtist1\u0027, 2005, 8000.00, \u0027USA\u0027, \u0027painting\u0027, false), (3, \u0027Sculpture3\u0027, \u0027GermanArtist1\u0027, 2010, 12000.00, \u0027Germany\u0027, \u0027sculpture\u0027, false);", + "sql": "SELECT COUNT(*) FROM Artworks WHERE country IN (\u0027India\u0027, \u0027Pakistan\u0027, \u0027Bangladesh\u0027, \u0027Sri Lanka\u0027, \u0027Nepal\u0027) AND is_textile \u003d true AND year BETWEEN 1950 AND 1999;", + "sql_explanation": "First, we filter the records with the WHERE clause to only include textile artworks created by South Asian artists between 1950 and 1999. Then, we count the number of records that satisfy the condition using the COUNT function." +}, { + "id": "1607", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average price of ceramic artworks created by Black artists from Africa or the African Diaspora?", + "sql_context": "CREATE TABLE Artworks (id INT, name TEXT, artist TEXT, year INT, price FLOAT, country TEXT, category TEXT, is_black BOOLEAN); INSERT INTO Artworks (id, name, artist, year, price, country, category, is_black) VALUES (1, \u0027Ceramic1\u0027, \u0027BlackArtist1\u0027, 2000, 1000.00, \u0027SouthAfrica\u0027, \u0027ceramic\u0027, true), (2, \u0027Painting2\u0027, \u0027AmericanArtist1\u0027, 2005, 8000.00, \u0027USA\u0027, \u0027painting\u0027, false), (3, \u0027Sculpture3\u0027, \u0027GermanArtist1\u0027, 2010, 12000.00, \u0027Germany\u0027, \u0027sculpture\u0027, false);", + "sql": "SELECT AVG(price) FROM Artworks WHERE (country IN (\u0027Africa\u0027, \u0027SouthAfrica\u0027, \u0027USA\u0027, \u0027Caribbean\u0027, \u0027Brazil\u0027) AND is_black \u003d true AND category \u003d \u0027ceramic\u0027);", + "sql_explanation": "First, we filter the records with the WHERE clause to only include ceramic artworks created by Black artists from Africa or the African Diaspora. Then, we calculate the average price of the filtered records using the AVG function." +}, { + "id": "2027", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Number of modern art exhibitions held in New York since 2000 with more than 500 visitors?", + "sql_context": "CREATE TABLE Exhibitions (id INT, exhibition_name VARCHAR(50), location VARCHAR(30), visitors INT, art_period VARCHAR(20), start_date DATE); INSERT INTO Exhibitions (id, exhibition_name, location, visitors, art_period, start_date) VALUES (1, \u0027Exhibition1\u0027, \u0027New York\u0027, 600, \u0027Modern\u0027, \u00272005-01-01\u0027);", + "sql": "SELECT COUNT(*) FROM Exhibitions WHERE location \u003d \u0027New York\u0027 AND art_period \u003d \u0027Modern\u0027 AND visitors \u003e 500 AND start_date \u003e\u003d \u00272000-01-01\u0027;", + "sql_explanation": "This query counts the number of modern art exhibitions held in New York with more than 500 visitors since the year 2000 by using the COUNT function and filtering rows with location as New York, art_period as Modern, visitors more than 500 and start_date after 2000." +}, { + "id": "2150", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add records of new galleries into the \u0027Galleries\u0027 table.", + "sql_context": "CREATE TABLE Galleries (gallery_id INT, gallery_name VARCHAR(255)); INSERT INTO Galleries (gallery_id, gallery_name) VALUES (1, \u0027Guggenheim Museum\u0027), (2, \u0027Louvre Museum\u0027);", + "sql": "INSERT INTO Galleries (gallery_id, gallery_name) VALUES (3, \u0027Museum of Modern Art, Paris\u0027), (4, \u0027Museum of Contemporary Art, Tokyo\u0027);", + "sql_explanation": "The query inserts two new records into the \u0027Galleries\u0027 table using the INSERT INTO statement with VALUES clause. The new records represent galleries from Paris and Tokyo." +}, { + "id": "3005", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average price of pottery from Indigenous artists in Canada?", + "sql_context": "CREATE TABLE ArtPieces (id INT, title VARCHAR(255), type VARCHAR(255), price DECIMAL(10,2), sale_year INT, artist_nationality VARCHAR(255)); INSERT INTO ArtPieces (id, title, type, price, sale_year, artist_nationality) VALUES (1, \u0027Pottery1\u0027, \u0027Pottery\u0027, 300, 2021, \u0027Canada - Indigenous\u0027);", + "sql": "SELECT AVG(price) FROM ArtPieces WHERE type \u003d \u0027Pottery\u0027 AND artist_nationality LIKE \u0027%Canada - Indigenous%\u0027;", + "sql_explanation": "The SQL query calculates the average price of pottery from Indigenous artists in Canada by using the AVG function on the price column and filtering the rows with the WHERE clause for the type and artist_nationality." +}, { + "id": "3222", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the earliest and latest date of exhibitions held in \u0027Paris\u0027?", + "sql_context": "CREATE TABLE Exhibitions (ExhibitionID INT PRIMARY KEY, Title TEXT, Date DATE, City TEXT);", + "sql": "SELECT MIN(Exhibitions.Date), MAX(Exhibitions.Date) FROM Exhibitions WHERE Exhibitions.City \u003d \u0027Paris\u0027;", + "sql_explanation": "This query filters the records to only those exhibitions held in \u0027Paris\u0027. The final result is the earliest and latest date of such exhibitions." +}, { + "id": "3881", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total revenue for all artworks sold by the \u0027Impressionist\u0027 movement in the year 2010?", + "sql_context": "CREATE TABLE Artworks (artwork_id INT, movement VARCHAR(255), sale_year INT, revenue DECIMAL(10, 2));", + "sql": "SELECT SUM(revenue) FROM Artworks WHERE movement \u003d \u0027Impressionist\u0027 AND sale_year \u003d 2010;", + "sql_explanation": "This query calculates the total revenue for all artworks belonging to the \u0027Impressionist\u0027 movement that were sold in the year 2010 by summing up the revenue values in the Artworks table where movement is \u0027Impressionist\u0027 and sale_year is 2010." +}, { + "id": "4325", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the average ticket price for art exhibitions in Paris in 2020?", + "sql_context": "CREATE TABLE Exhibitions (id INT, city VARCHAR(50), year INT, ticket_price DECIMAL(5,2));INSERT INTO Exhibitions (id, city, year, ticket_price) VALUES (1, \u0027Paris\u0027, 2020, 25.50);", + "sql": "SELECT AVG(ticket_price) FROM Exhibitions WHERE city \u003d \u0027Paris\u0027 AND year \u003d 2020;", + "sql_explanation": "We calculate the average ticket price for art exhibitions in Paris in 2020 by using the AVG function on the ticket_price column, filtering the data with the WHERE clause for the specified city and year." +}, { + "id": "4329", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the highest grossing exhibition in the \u0027Art Museum\u0027?", + "sql_context": "CREATE TABLE Art_Exhibition (exhibition_id INT, museum_name VARCHAR(255), gross_revenue FLOAT);", + "sql": "SELECT MAX(gross_revenue) FROM Art_Exhibition WHERE museum_name \u003d \u0027Art Museum\u0027;", + "sql_explanation": "This query calculates the maximum gross revenue from the \u0027Art_Exhibition\u0027 table for the museum named \u0027Art Museum\u0027." +}, { + "id": "4373", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all sculptures with their sale prices in descending order.", + "sql_context": "CREATE TABLE Artwork (ArtworkID INT, Title VARCHAR(100), Type VARCHAR(50), Price FLOAT); INSERT INTO Artwork VALUES (1, \u0027Guernica\u0027, \u0027Painting\u0027, 2000000); INSERT INTO Artwork VALUES (2, \u0027Venus de Milo\u0027, \u0027Sculpture\u0027, 1200000);", + "sql": "SELECT Title, Price FROM Artwork WHERE Type \u003d \u0027Sculpture\u0027 ORDER BY Price DESC;", + "sql_explanation": "The SQL query filters the records to only include sculptures and orders the results by sale price in descending order." +}, { + "id": "4389", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all exhibitions from the \u002719th Century\u0027 in the \u0027European\u0027 genre.", + "sql_context": "CREATE TABLE Exhibitions (id INT, title VARCHAR(255), genre VARCHAR(255), century VARCHAR(255)); INSERT INTO Exhibitions (id, title, genre, century) VALUES (1, \u0027Impressionism Exhibition\u0027, \u0027European\u0027, \u002719th Century\u0027);", + "sql": "DELETE FROM Exhibitions WHERE genre \u003d \u0027European\u0027 AND century \u003d \u002719th Century\u0027;", + "sql_explanation": "This query deletes all records from the Exhibitions table where both genre is \u0027European\u0027 and century is \u002719th Century\u0027." +}, { + "id": "4850", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the latest artwork creation date for \u0027Gallery_B\u0027?", + "sql_context": "CREATE TABLE Artworks_10 (ArtworkID INT, Title VARCHAR(50), Museum VARCHAR(50), Creation_Date DATE); INSERT INTO Artworks_10 (ArtworkID, Title, Museum, Creation_Date) VALUES (1, \u0027The Starry Night\u0027, \u0027Museum_X\u0027, \u00271889-06-18\u0027), (2, \u0027The Scream\u0027, \u0027Museum_Y\u0027, \u00271893-05-22\u0027), (3, \u0027The Persistence of Memory\u0027, \u0027Museum_Y\u0027, \u00271937-08-26\u0027), (4, \u0027Guernica\u0027, \u0027Museum_Z\u0027, \u00271937-04-18\u0027), (5, \u0027The Son of Man\u0027, \u0027Gallery_B\u0027, \u00271964-04-01\u0027);", + "sql": "SELECT MAX(Creation_Date) FROM Artworks_10 WHERE Museum \u003d \u0027Gallery_B\u0027;", + "sql_explanation": "This query selects the maximum Creation_Date from \u0027Artworks_10\u0027 where Museum is \u0027Gallery_B\u0027." +}, { + "id": "4885", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the title of the painting with PaintingID 1 to \u0027The Starry Night\u0027.", + "sql_context": "CREATE TABLE Paintings (PaintingID INT, Title VARCHAR(50), ArtistID INT, YearCreated INT); INSERT INTO Paintings (PaintingID, Title, ArtistID, YearCreated) VALUES (1, \u0027Starry Night Sketch\u0027, 1, 1889);", + "sql": "UPDATE Paintings SET Title \u003d \u0027The Starry Night\u0027 WHERE PaintingID \u003d 1;", + "sql_explanation": "The SQL query updates the title of the painting with PaintingID 1 to \u0027The Starry Night\u0027." +}, { + "id": "5029", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "When was \u0027The Persistence of Memory\u0027 created?", + "sql_context": "CREATE TABLE Dali (artwork VARCHAR(50), year INT); INSERT INTO Dali (artwork, year) VALUES (\u0027The Persistence of Memory\u0027, 1931), (\u0027Swans Reflecting Elephants\u0027, 1937);", + "sql": "SELECT year FROM Dali WHERE artwork \u003d \u0027The Persistence of Memory\u0027;", + "sql_explanation": "This query selects the \u0027year\u0027 column from the \u0027Dali\u0027 table, filtering for rows where the artwork is \u0027The Persistence of Memory\u0027, returning the year when \u0027The Persistence of Memory\u0027 was created." +}, { + "id": "5246", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total value of all \u0027Expressionist\u0027 artworks?", + "sql_context": "CREATE TABLE Artworks (artwork_id INT, style VARCHAR(20), price DECIMAL(10,2)); INSERT INTO Artworks (artwork_id, style, price) VALUES (1, \u0027Impressionist\u0027, 1200.00), (2, \u0027Expressionist\u0027, 2000.00), (3, \u0027Impressionist\u0027, 1800.00), (4, \u0027Expressionist\u0027, 2500.00), (5, \u0027Impressionist\u0027, 1500.00);", + "sql": "SELECT SUM(price) FROM Artworks WHERE style \u003d \u0027Expressionist\u0027;", + "sql_explanation": "This SQL query calculates the total value of all \u0027Expressionist\u0027 artworks. It does this by using the SUM() function on the \u0027price\u0027 column, while filtering the records to only include those with the \u0027style\u0027 of \u0027Expressionist\u0027." +}, { + "id": "5275", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the value of art pieces by artist \u0027Pablo\u0027 by 5%", + "sql_context": "CREATE TABLE ArtPieces (id INT, title VARCHAR(50), galleryId INT, year INT, value INT, artistId INT, artist VARCHAR(50)); INSERT INTO ArtPieces (id, title, galleryId, year, value, artistId, artist) VALUES (1, \u0027Piece 1\u0027, 1, 2000, 10000, 1, \u0027Pablo\u0027), (2, \u0027Piece 2\u0027, 1, 2010, 15000, 2, \u0027Dali\u0027), (3, \u0027Piece 3\u0027, 2, 2020, 20000, 3, \u0027Picasso\u0027), (4, \u0027Piece 4\u0027, 3, 1990, 5000, 1, \u0027Pablo\u0027), (5, \u0027Piece 5\u0027, NULL, 1984, 25000, 4, \u0027Monet\u0027), (6, \u0027Piece 6\u0027, NULL, 2014, 30000, 5, \u0027Warhol\u0027), (7, \u0027Piece 7\u0027, NULL, 1964, 15000, 1, \u0027Pablo\u0027);", + "sql": "UPDATE ArtPieces SET value \u003d value * 1.05 WHERE artistId \u003d 1;", + "sql_explanation": "This query filters art pieces by the \u0027artistId\u0027 column and updates the \u0027value\u0027 column by multiplying it by 1.05 (5%) where \u0027artistId\u0027 is \u0027Pablo\u0027. It increases the value of art pieces by \u0027Pablo\u0027 by 5%." +}, { + "id": "5454", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Exclude exhibitions that have already ended.", + "sql_context": "CREATE TABLE Exhibitions (ExhibitionID INT, Title VARCHAR(50), StartDate DATE, EndDate DATE); INSERT INTO Exhibitions (ExhibitionID, Title, StartDate, EndDate) VALUES (1, \u0027Impressionist Exhibition\u0027, \u00271874-04-15\u0027, \u00271874-05-15\u0027), (2, \u0027Expressionist Exhibition\u0027, \u00271919-07-01\u0027, \u00271919-09-30\u0027);", + "sql": "SELECT Title FROM Exhibitions WHERE EndDate \u003e CURDATE();", + "sql_explanation": "This query excludes exhibitions that have already ended by checking the EndDate column in the Exhibitions table against the current date." +}, { + "id": "2345", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum claim amount for home insurance in California in the current month?", + "sql_context": "CREATE TABLE claims (id INT, state VARCHAR(2), policy_type VARCHAR(20), claim_amount DECIMAL(10,2), claim_date DATE); INSERT INTO claims (id, state, policy_type, claim_amount, claim_date) VALUES (1, \u0027CA\u0027, \u0027Home\u0027, 5000, \u00272022-08-02\u0027), (2, \u0027CA\u0027, \u0027Auto\u0027, 3500, \u00272022-06-23\u0027), (3, \u0027TX\u0027, \u0027Home\u0027, 1200, \u00272022-01-14\u0027);", + "sql": "SELECT MIN(claim_amount) FROM claims WHERE state \u003d \u0027CA\u0027 AND policy_type \u003d \u0027Home\u0027 AND MONTH(claim_date) \u003d MONTH(CURRENT_DATE());", + "sql_explanation": "This query calculates the minimum claim amount for home insurance in California in the current month by selecting the claim_amount column from the claims table, filtering for California and Home policy_type, and applying the MIN function, checking if the claim_date is in the current month using the MONTH function." +}, { + "id": "2796", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the policy numbers, policyholder names, and car makes for policyholders who have a policy effective date on or after \u00272022-01-01\u0027", + "sql_context": "CREATE TABLE policyholders (policy_number INT, policyholder_name VARCHAR(50), car_make VARCHAR(20), policy_effective_date DATE);", + "sql": "SELECT policy_number, policyholder_name, car_make FROM policyholders WHERE policy_effective_date \u003e\u003d \u00272022-01-01\u0027;", + "sql_explanation": "The query starts by selecting the policy_number, policyholder_name, and car_make columns from the policyholders table. It then filters the records where the policy_effective_date is on or after \u00272022-01-01\u0027. The \u003e\u003d operator checks if the policy_effective_date is greater than or equal to the specified date." +}, { + "id": "3492", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the policy type and corresponding risk score for each policy, ordered by risk score in ascending order, for policies issued in \u0027Illinois\u0027?", + "sql_context": "CREATE TABLE Policies (PolicyID INT, PolicyType VARCHAR(20), IssueState VARCHAR(20), RiskScore DECIMAL(5,2)); INSERT INTO Policies (PolicyID, PolicyType, IssueState, RiskScore) VALUES (1, \u0027Auto\u0027, \u0027Illinois\u0027, 0.10), (2, \u0027Home\u0027, \u0027Illinois\u0027, 0.05), (3, \u0027Life\u0027, \u0027Illinois\u0027, 0.15);", + "sql": "SELECT PolicyType, RiskScore FROM Policies WHERE IssueState \u003d \u0027Illinois\u0027 ORDER BY RiskScore ASC;", + "sql_explanation": "The SQL query selects the PolicyType and RiskScore columns from the Policies table where the IssueState is \u0027Illinois\u0027. It then orders the results by the RiskScore column in ascending order." +}, { + "id": "3676", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of policyholders with life insurance in the state of New York?", + "sql_context": "CREATE TABLE Policyholder_Info (ID INT, Age INT, State VARCHAR(20), Insurance_Type VARCHAR(20)); INSERT INTO Policyholder_Info (ID, Age, State, Insurance_Type) VALUES (1, 45, \u0027New York\u0027, \u0027Life\u0027), (2, 35, \u0027California\u0027, \u0027Health\u0027), (3, 60, \u0027New York\u0027, \u0027Life\u0027), (4, 25, \u0027Texas\u0027, \u0027Auto\u0027), (5, 50, \u0027New York\u0027, \u0027Life\u0027), (6, 40, \u0027California\u0027, \u0027Life\u0027);", + "sql": "SELECT AVG(Age) FROM Policyholder_Info WHERE State \u003d \u0027New York\u0027 AND Insurance_Type \u003d \u0027Life\u0027;", + "sql_explanation": "This SQL query calculates the average age of policyholders with life insurance in the state of New York by using the AVG function to find the average age for policyholders in the Policyholder_Info table who have life insurance and reside in New York." +}, { + "id": "3740", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total claim amount processed in January 2021?", + "sql_context": "CREATE TABLE claims (id INT, processed_date DATE, amount DECIMAL(10, 2)); INSERT INTO claims (id, processed_date, amount) VALUES (1, \u00272021-01-01\u0027, 500.00), (2, \u00272021-02-01\u0027, 750.00), (3, \u00272021-01-15\u0027, 250.00);", + "sql": "SELECT SUM(amount) FROM claims WHERE processed_date BETWEEN \u00272021-01-01\u0027 AND \u00272021-01-31\u0027;", + "sql_explanation": "This SQL query sums up the total claim amount (SUM(amount)) processed in January 2021 (processed_date BETWEEN \u00272021-01-01\u0027 AND \u00272021-01-31\u0027) in the claims table." +}, { + "id": "3752", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add a new risk assessment model for policy type \u0027Health\u0027.", + "sql_context": "CREATE TABLE Policy (PolicyID INT, PolicyType VARCHAR(50)); INSERT INTO Policy VALUES (1, \u0027Auto\u0027), (2, \u0027Home\u0027), (3, \u0027Life\u0027), (4, \u0027Travel\u0027), (5, \u0027Health\u0027); CREATE TABLE RiskAssessment (AssessmentID INT, PolicyID INT, Model VARCHAR(50));", + "sql": "INSERT INTO RiskAssessment (AssessmentID, PolicyID, Model) VALUES (1, 5, \u0027GeneralHealth\u0027);", + "sql_explanation": "This query inserts a new record into the RiskAssessment table with PolicyID 5 and Model \u0027GeneralHealth\u0027 for risk assessment model of policy type \u0027Health\u0027." +}, { + "id": "4003", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update \u0027John Smith\u0027s\u0027 risk assessment score to 700 in the risk_assessment_table", + "sql_context": "CREATE TABLE risk_assessment_table (assessment_id INT, policy_holder TEXT, risk_score INT); INSERT INTO risk_assessment_table (assessment_id, policy_holder, risk_score) VALUES (1, \u0027John Smith\u0027, 650), (2, \u0027Jane Doe\u0027, 500), (3, \u0027Mike Johnson\u0027, 800);", + "sql": "UPDATE risk_assessment_table SET risk_score \u003d 700 WHERE policy_holder \u003d \u0027John Smith\u0027;", + "sql_explanation": "The SQL query first selects records from the risk_assessment_table table where policy_holder is \u0027John Smith\u0027. It then updates the risk_score for those records to 700." +}, { + "id": "4113", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the count of policies issued to policyholders in \u0027Washington\u0027 and \u0027Oregon\u0027?", + "sql_context": "CREATE TABLE policies (policy_id INT, policyholder_state VARCHAR(20)); INSERT INTO policies (policy_id, policyholder_state) VALUES (1, \u0027Washington\u0027), (2, \u0027Oregon\u0027), (3, \u0027Washington\u0027);", + "sql": "SELECT COUNT(*) FROM policies WHERE policyholder_state IN (\u0027Washington\u0027, \u0027Oregon\u0027);", + "sql_explanation": "This query returns the count of policies issued to policyholders in Washington and Oregon by counting all records where policyholder_state is either \u0027Washington\u0027 or \u0027Oregon\u0027." +}, { + "id": "4253", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum and maximum age of policyholders who have a policy with a premium between $1500 and $5000?", + "sql_context": "CREATE TABLE Policyholders (PolicyholderID INT, Age INT, Premium DECIMAL(10, 2)); INSERT INTO Policyholders (PolicyholderID, Age, Premium) VALUES (1, 35, 5000), (2, 45, 1500), (3, 50, 3000), (4, 25, 2000);", + "sql": "SELECT MIN(Age), MAX(Age) FROM Policyholders WHERE Premium BETWEEN 1500 AND 5000;", + "sql_explanation": "This query calculates the minimum and maximum age of policyholders who have a policy with a premium between $1500 and $5000. It does so by filtering the Policyholders table for rows where Premium is between 1500 and 5000, then calculating the minimum of the Age column and the maximum of the Age column." +}, { + "id": "4383", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a view named \u0027young_policyholders\u0027 that shows policyholders with age less than 30", + "sql_context": "CREATE TABLE if not exists policyholders (policyholder_id INT PRIMARY KEY, name VARCHAR(255), age INT, gender VARCHAR(10), policy_type VARCHAR(50), premium DECIMAL(10,2));", + "sql": "CREATE VIEW young_policyholders AS SELECT * FROM policyholders WHERE age \u003c 30;", + "sql_explanation": "A view named \u0027young_policyholders\u0027 is created which will show the policyholders having age less than 30 from the \u0027policyholders\u0027 table." +}, { + "id": "4567", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average claim amount paid to policyholders in \u0027New York\u0027?", + "sql_context": "CREATE TABLE claims (policyholder_id INT, claim_amount DECIMAL(10,2), policyholder_state VARCHAR(20)); INSERT INTO claims (policyholder_id, claim_amount, policyholder_state) VALUES (1, 500.00, \u0027New York\u0027), (2, 600.00, \u0027New York\u0027), (3, 700.00, \u0027New York\u0027);", + "sql": "SELECT AVG(claim_amount) FROM claims WHERE policyholder_state \u003d \u0027New York\u0027;", + "sql_explanation": "This query calculates the average claim amount paid to policyholders in New York by averaging the claim_amount for all records where policyholder_state is \u0027New York\u0027." +}, { + "id": "4737", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the policy table to change the effective date for policy number 6 to \u00272020-01-01\u0027", + "sql_context": "CREATE TABLE policy (policy_number INT, policy_type VARCHAR(255), effective_date DATE); INSERT INTO policy (policy_number, policy_type, effective_date) VALUES (6, \u0027Home\u0027, \u00272019-01-01\u0027);", + "sql": "UPDATE policy SET effective_date \u003d \u00272020-01-01\u0027 WHERE policy_number \u003d 6;", + "sql_explanation": "This query updates the policy table to change the effective date for policy number 6 to \u00272020-01-01\u0027." +}, { + "id": "5098", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all policies for policyholders who have a car make of \u0027Honda\u0027 and are older than 40.", + "sql_context": "CREATE TABLE Policyholders (PolicyholderID INT, Age INT, Premium DECIMAL(10, 2), CarMake VARCHAR(20)); INSERT INTO Policyholders (PolicyholderID, Age, Premium, CarMake) VALUES (1, 35, 5000, \u0027Toyota\u0027), (2, 45, 1500, \u0027Honda\u0027), (3, 50, 3000, \u0027Tesla\u0027);", + "sql": "SELECT * FROM Policyholders WHERE Age \u003e 40 AND CarMake \u003d \u0027Honda\u0027;", + "sql_explanation": "This query lists all policies for policyholders who have a car make of Honda and are older than 40. It does so by filtering the Policyholders table for rows where Age is greater than 40 and CarMake is \u0027Honda\u0027." +}, { + "id": "5237", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of policyholders living in California?", + "sql_context": "CREATE TABLE Policyholders (ID INT, Age INT, State VARCHAR(50)); INSERT INTO Policyholders (ID, Age, State) VALUES (1, 35, \u0027California\u0027), (2, 45, \u0027Texas\u0027), (3, 30, \u0027California\u0027), (4, 50, \u0027New York\u0027);", + "sql": "SELECT AVG(Age) FROM Policyholders WHERE State \u003d \u0027California\u0027;", + "sql_explanation": "This query calculates the average age of policyholders living in California using the AVG aggregation function and WHERE clause." +}, { + "id": "5290", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum age of policyholders with a car insurance policy?", + "sql_context": "CREATE TABLE policyholders (id INT, age INT, state VARCHAR(2), policy_type VARCHAR(10)); INSERT INTO policyholders (id, age, state, policy_type) VALUES (1, 35, \u0027NY\u0027, \u0027car\u0027), (2, 45, \u0027CA\u0027, \u0027home\u0027), (3, 28, \u0027NY\u0027, \u0027car\u0027), (4, 30, \u0027TX\u0027, \u0027car\u0027);", + "sql": "SELECT MIN(age) FROM policyholders WHERE policy_type \u003d \u0027car\u0027;", + "sql_explanation": "This query calculates the minimum age of policyholders with a car insurance policy. It uses the MIN function to find the minimum value of the \u0027age\u0027 column, and filters the data using the WHERE clause to only consider policyholders with a car insurance policy." +}, { + "id": "5453", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the gender of policyholder with id 1 to \u0027Female\u0027.", + "sql_context": "CREATE TABLE policyholders (id INT, name TEXT, age INT, gender TEXT, state TEXT); INSERT INTO policyholders (id, name, age, gender, state) VALUES (1, \u0027John Doe\u0027, 36, \u0027Male\u0027, \u0027New York\u0027); INSERT INTO policyholders (id, name, age, gender, state) VALUES (2, \u0027Jane Smith\u0027, 42, \u0027Female\u0027, \u0027New York\u0027);", + "sql": "UPDATE policyholders SET gender \u003d \u0027Female\u0027 WHERE id \u003d 1;", + "sql_explanation": "This query updates the gender of the policyholder with id 1 to \u0027Female\u0027 in the policyholders table." +}, { + "id": "5469", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum age of policyholders who have a policy with a premium greater than $3000?", + "sql_context": "CREATE TABLE Policyholders (PolicyholderID INT, Age INT, Premium DECIMAL(10, 2)); INSERT INTO Policyholders (PolicyholderID, Age, Premium) VALUES (1, 35, 5000), (2, 45, 1500), (3, 50, 3000);", + "sql": "SELECT MIN(Age) FROM Policyholders WHERE Premium \u003e 3000;", + "sql_explanation": "This query calculates the minimum age of policyholders who have a policy with a premium greater than $3000. It does so by filtering the Policyholders table for rows where Premium is greater than 3000 and then calculating the minimum of the Age column." +}, { + "id": "5484", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records from the policy table with an effective date before \u00272018-01-01\u0027", + "sql_context": "CREATE TABLE policy (policy_number INT, policy_type VARCHAR(255), effective_date DATE); INSERT INTO policy (policy_number, policy_type, effective_date) VALUES (1, \u0027Commercial Auto\u0027, \u00272018-01-01\u0027); INSERT INTO policy (policy_number, policy_type, effective_date) VALUES (2, \u0027Home\u0027, \u00272020-01-01\u0027);", + "sql": "DELETE FROM policy WHERE effective_date \u003c \u00272018-01-01\u0027;", + "sql_explanation": "This query deletes all records from the policy table with an effective date before \u00272018-01-01\u0027." +}, { + "id": "5544", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which policyholders have more than 3 cars insured with us?", + "sql_context": "CREATE TABLE Policyholders (ID INT, Name VARCHAR(50), CarsInsured INT); INSERT INTO Policyholders (ID, Name, CarsInsured) VALUES (1, \u0027John Doe\u0027, 4), (2, \u0027Jane Smith\u0027, 2), (3, \u0027Mike Johnson\u0027, 3);", + "sql": "SELECT Name FROM Policyholders WHERE CarsInsured \u003e 3;", + "sql_explanation": "This query filters the Policyholders table to find people with more than 3 cars insured by using the WHERE clause and selecting the Name column." +}, { + "id": "5606", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all unique policy types for policyholders in \u0027UnderwritingTable1\u0027.", + "sql_context": "CREATE TABLE UnderwritingTable1 (PolicyID INT, PolicyType VARCHAR(20)); INSERT INTO UnderwritingTable1 (PolicyID, PolicyType) VALUES (1, \u0027Auto\u0027), (2, \u0027Home\u0027), (3, \u0027Auto\u0027);", + "sql": "SELECT DISTINCT PolicyType FROM UnderwritingTable1;", + "sql_explanation": "The SQL query selects distinct policy types from the UnderwritingTable1 table, providing a list of unique policy types." +}, { + "id": "5643", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the unique insurance types offered by the company.", + "sql_context": "CREATE TABLE InsuranceType (TypeID INT, InsuranceType TEXT); INSERT INTO InsuranceType (TypeID, InsuranceType) VALUES (1, \u0027Automotive\u0027); INSERT INTO InsuranceType (TypeID, InsuranceType) VALUES (2, \u0027Homeowners\u0027); INSERT INTO InsuranceType (TypeID, InsuranceType) VALUES (3, \u0027Life\u0027);", + "sql": "SELECT DISTINCT InsuranceType FROM InsuranceType;", + "sql_explanation": "The SQL query selects distinct InsuranceType from InsuranceType table." +}, { + "id": "5698", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all claims with a claim amount greater than 5000.", + "sql_context": "CREATE TABLE policyholders (id INT, name TEXT, age INT, gender TEXT, state TEXT); INSERT INTO policyholders (id, name, age, gender, state) VALUES (1, \u0027John Doe\u0027, 36, \u0027Male\u0027, \u0027California\u0027); INSERT INTO policyholders (id, name, age, gender, state) VALUES (2, \u0027Jane Smith\u0027, 42, \u0027Female\u0027, \u0027California\u0027); CREATE TABLE claims (id INT, policyholder_id INT, claim_amount INT); INSERT INTO claims (id, policyholder_id, claim_amount) VALUES (1, 1, 2500); INSERT INTO claims (id, policyholder_id, claim_amount) VALUES (2, 1, 3000); INSERT INTO claims (id, policyholder_id, claim_amount) VALUES (3, 2, 1500); INSERT INTO claims (id, policyholder_id, claim_amount) VALUES (4, 2, 6000);", + "sql": "DELETE FROM claims WHERE claim_amount \u003e 5000;", + "sql_explanation": "This query deletes all claims with a claim amount greater than 5000 from the claims table." +}, { + "id": "1260", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many pallets were shipped from the EMEA region to the Americas via the ocean freight route in Q2 2022?", + "sql_context": "CREATE TABLE Shipments (id INT, customer VARCHAR(255), region_origin VARCHAR(255), region_destination VARCHAR(255), route_type VARCHAR(255), quantity INT, quarter INT, year INT);", + "sql": "SELECT SUM(quantity) FROM Shipments WHERE (region_origin \u003d \u0027EMEA\u0027 AND region_destination \u003d \u0027Americas\u0027) AND route_type \u003d \u0027ocean freight\u0027 AND quarter \u003d 2 AND year \u003d 2022;", + "sql_explanation": "This query calculates the number of pallets shipped from the EMEA region to the Americas via the ocean freight route in Q2 2022 by summing the quantity values in the Shipments table that meet the specified conditions." +}, { + "id": "1919", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of shipments from Mexico to the United States or Canada?", + "sql_context": "CREATE TABLE shipments (id INT, source VARCHAR(20), destination VARCHAR(20), weight FLOAT); INSERT INTO shipments (id, source, destination, weight) VALUES (1, \u0027China\u0027, \u0027United States\u0027, 50.5), (2, \u0027China\u0027, \u0027Canada\u0027, 30.3), (3, \u0027Mexico\u0027, \u0027United States\u0027, 45.6), (4, \u0027Canada\u0027, \u0027United States\u0027, 25.8), (5, \u0027Canada\u0027, \u0027Mexico\u0027, 38.2), (6, \u0027Mexico\u0027, \u0027Canada\u0027, 40.1);", + "sql": "SELECT COUNT(*) FROM shipments WHERE (destination \u003d \u0027United States\u0027 AND source \u003d \u0027Mexico\u0027) OR (destination \u003d \u0027Canada\u0027 AND source \u003d \u0027Mexico\u0027);", + "sql_explanation": "This query counts the number of shipments from Mexico to the United States or Canada by counting all rows in the shipments table where the source is Mexico and the destination is either the United States or Canada." +}, { + "id": "2031", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total delivery time for shipments with a weight greater than 100 kg that were sent to \u0027Oceania\u0027 in the last week?", + "sql_context": "CREATE TABLE shipments (id INT, shipped_date DATE, destination VARCHAR(20), weight INT, delivery_time INT); INSERT INTO shipments (id, shipped_date, destination, weight, delivery_time) VALUES (1, \u00272022-03-15\u0027, \u0027Oceania\u0027, 120, 4), (2, \u00272022-03-20\u0027, \u0027Oceania\u0027, 180, 6), (3, \u00272022-03-03\u0027, \u0027Oceania\u0027, 200, 5);", + "sql": "SELECT SUM(delivery_time) FROM shipments WHERE shipped_date \u003e\u003d DATEADD(day, -7, GETDATE()) AND destination \u003d \u0027Oceania\u0027 AND weight \u003e 100;", + "sql_explanation": "This query calculates the total delivery time for shipments with a weight greater than 100 kg that were sent to Oceania in the last week by using the SUM function on the delivery_time column and filtering the data where the shipped_date is within the last 7 days, the destination is Oceania and the weight is greater than 100 kg." +}, { + "id": "2069", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum weight of shipments sent to \u0027South America\u0027 in March?", + "sql_context": "CREATE TABLE shipments (id INT, shipped_date DATE, destination VARCHAR(20), weight INT); INSERT INTO shipments (id, shipped_date, destination, weight) VALUES (1, \u00272022-03-05\u0027, \u0027South America\u0027, 150), (2, \u00272022-03-07\u0027, \u0027North America\u0027, 200), (3, \u00272022-03-16\u0027, \u0027South America\u0027, 250);", + "sql": "SELECT MAX(weight) FROM shipments WHERE shipped_date \u003e\u003d \u00272022-03-01\u0027 AND shipped_date \u003c \u00272022-04-01\u0027 AND destination \u003d \u0027South America\u0027;", + "sql_explanation": "This query calculates the maximum weight of shipments to South America in March by using the MAX function on the weight column and filtering the data where the shipped_date is in March and the destination is South America." +}, { + "id": "2325", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total revenue from shipments to South America in Q3 2022?", + "sql_context": "CREATE TABLE shipments (shipment_id INT, shipment_date DATE, revenue DECIMAL(10,2), shipment_country VARCHAR(20)); INSERT INTO shipments (shipment_id, shipment_date, revenue, shipment_country) VALUES (1, \u00272022-07-01\u0027, 1000, \u0027Brazil\u0027), (2, \u00272022-08-15\u0027, 2000, \u0027USA\u0027), (3, \u00272022-09-03\u0027, 1500, \u0027Argentina\u0027);", + "sql": "SELECT SUM(revenue) FROM shipments WHERE shipment_country LIKE \u0027South%\u0027 AND shipment_date BETWEEN \u00272022-07-01\u0027 AND \u00272022-09-30\u0027;", + "sql_explanation": "This SQL query calculates the total revenue from shipments to South America in Q3 2022 by summing the revenue column in the shipments table where the shipment_country starts with \u0027South\u0027 and the shipment_date is in Q3 2022." +}, { + "id": "2854", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average delivery time for reverse logistics shipments in the European Union in Q3 2022?", + "sql_context": "CREATE TABLE ReverseLogistics (id INT, customer VARCHAR(255), region VARCHAR(255), delivery_time FLOAT, quarter INT, year INT);", + "sql": "SELECT AVG(delivery_time) FROM ReverseLogistics WHERE region \u003d \u0027European Union\u0027 AND quarter \u003d 3 AND year \u003d 2022;", + "sql_explanation": "This query calculates the average delivery time for reverse logistics shipments in the European Union in Q3 2022 by averaging the delivery_time values in the ReverseLogistics table that meet the specified conditions." +}, { + "id": "3288", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue generated from domestic shipments in the Midwest region for the year 2021?", + "sql_context": "CREATE SCHEMA IF NOT EXISTS logistics;CREATE TABLE IF NOT EXISTS shipments (shipment_id INT,region VARCHAR(20),revenue DECIMAL(10,2));INSERT INTO shipments (shipment_id, region, revenue) VALUES (1, \u0027Midwest\u0027, 5000.00), (2, \u0027Northeast\u0027, 7000.00);", + "sql": "SELECT SUM(revenue) FROM logistics.shipments WHERE region \u003d \u0027Midwest\u0027 AND YEAR(shipment_date) \u003d 2021;", + "sql_explanation": "This query calculates the total revenue generated from domestic shipments in the Midwest region for the year 2021. It does this by filtering the shipments table to only include rows where the region is \u0027Midwest\u0027 and the shipment date is in the year 2021. It then calculates the sum of the revenue column for these rows." +}, { + "id": "3356", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total weight of packages shipped to California from warehouse 1?", + "sql_context": "CREATE TABLE warehouses (warehouse_id INT, warehouse_state VARCHAR(50)); INSERT INTO warehouses (warehouse_id, warehouse_state) VALUES (1, \u0027New York\u0027); CREATE TABLE packages (package_id INT, package_weight INT, warehouse_id INT, recipient_state VARCHAR(50)); INSERT INTO packages (package_id, package_weight, warehouse_id, recipient_state) VALUES (1, 5, 1, \u0027California\u0027);", + "sql": "SELECT SUM(package_weight) FROM packages WHERE warehouse_id \u003d 1 AND recipient_state \u003d \u0027California\u0027;", + "sql_explanation": "This SQL query calculates the total weight of packages by summing the package_weight column for records where the warehouse_id is 1 and the recipient_state is California." +}, { + "id": "3957", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of item \u0027A101\u0027 in warehouse \u0027CHI\u0027?", + "sql_context": "CREATE TABLE warehouses (id VARCHAR(5), name VARCHAR(5), location VARCHAR(10)); INSERT INTO warehouses (id, name, location) VALUES (\u0027CHI\u0027, \u0027Chicago\u0027, \u0027USA\u0027); CREATE TABLE inventory (item_code VARCHAR(5), warehouse_id VARCHAR(5), quantity INT); INSERT INTO inventory (item_code, warehouse_id, quantity) VALUES (\u0027A101\u0027, \u0027CHI\u0027, 300), (\u0027A102\u0027, \u0027CHI\u0027, 200);", + "sql": "SELECT SUM(quantity) FROM inventory WHERE item_code \u003d \u0027A101\u0027 AND warehouse_id \u003d \u0027CHI\u0027;", + "sql_explanation": "Summarize the quantity of item \u0027A101\u0027 in warehouse \u0027CHI\u0027 by adding up all records in the inventory table that meet the conditions." +}, { + "id": "4240", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total weight of shipments that were delayed due to issues with the carrier?", + "sql_context": "CREATE TABLE Shipments (id INT, weight INT, delay_reason VARCHAR(50), delivery_date DATE, shipped_date DATE); INSERT INTO Shipments (id, weight, delay_reason, delivery_date, shipped_date) VALUES (1, 100, \u0027Carrier\u0027, \u00272022-01-05\u0027, \u00272022-01-03\u0027), (2, 150, \u0027Mechanical\u0027, \u00272022-01-07\u0027, \u00272022-01-06\u0027), (3, 200, \u0027Customs\u0027, \u00272022-02-12\u0027, \u00272022-02-10\u0027);", + "sql": "SELECT SUM(weight) AS total_weight FROM Shipments WHERE delay_reason \u003d \u0027Carrier\u0027;", + "sql_explanation": "This query calculates the total weight of shipments that were delayed due to issues with the carrier. It uses the SUM function to add up the weight column and filters the results to only include rows where the delay reason is \u0027Carrier\u0027." +}, { + "id": "4289", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find freight forwarding costs for Route 3 and Route 5", + "sql_context": "CREATE TABLE FreightForwarding (id INT, route VARCHAR(50), cost INT); INSERT INTO FreightForwarding (id, route, cost) VALUES (1, \u0027Route 3\u0027, 400), (2, \u0027Route 5\u0027, 600);", + "sql": "SELECT route, cost FROM FreightForwarding WHERE route IN (\u0027Route 3\u0027, \u0027Route 5\u0027);", + "sql_explanation": "This SQL query finds freight forwarding costs for Route 3 and Route 5 by selecting the route and cost columns and filtering for rows where the route is either \u0027Route 3\u0027 or \u0027Route 5\u0027." +}, { + "id": "4312", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of pallets handled in the Los Angeles warehouse?", + "sql_context": "CREATE TABLE warehouse_stats (id INT, warehouse VARCHAR(20), total_pallets INT); INSERT INTO warehouse_stats (id, warehouse, total_pallets) VALUES (1, \u0027Atlanta\u0027, 2500), (2, \u0027Los Angeles\u0027, 3000), (3, \u0027Houston\u0027, 2000);", + "sql": "SELECT SUM(total_pallets) FROM warehouse_stats WHERE warehouse \u003d \u0027Los Angeles\u0027;", + "sql_explanation": "Calculates the total number of pallets handled in the Los Angeles warehouse by adding up all total_pallets values in the warehouse_stats table where warehouse is \u0027Los Angeles\u0027." +}, { + "id": "4336", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum number of packages shipped per route in South America?", + "sql_context": "CREATE TABLE RoutesSouthAmerica (id INT, packages INT, continent VARCHAR(20)); INSERT INTO RoutesSouthAmerica (id, packages, continent) VALUES (1, 40, \u0027South America\u0027), (2, 50, \u0027North America\u0027);", + "sql": "SELECT MIN(packages) FROM RoutesSouthAmerica WHERE continent \u003d \u0027South America\u0027;", + "sql_explanation": "This SQL query finds the minimum number of packages shipped per route in South America by selecting all records with the continent \u0027South America\u0027 and calculating the minimum packages using the MIN() function." +}, { + "id": "4392", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average delivery time for shipments to the Northeast region?", + "sql_context": "CREATE TABLE shipment_deliveries (id INT, shipment_id INT, region VARCHAR(10), delivery_time INT); INSERT INTO shipment_deliveries (id, shipment_id, region, delivery_time) VALUES (1, 1001, \u0027Northeast\u0027, 3), (2, 1002, \u0027Northeast\u0027, 5), (3, 1003, \u0027Southeast\u0027, 4);", + "sql": "SELECT AVG(delivery_time) FROM shipment_deliveries WHERE region \u003d \u0027Northeast\u0027;", + "sql_explanation": "Calculates the average delivery time for shipments to the Northeast region by finding the mean delivery time value in the shipment_deliveries table where region is \u0027Northeast\u0027." +}, { + "id": "4577", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum value of returns from Italy?", + "sql_context": "CREATE TABLE Italy_Returns (id INT, return_country VARCHAR(50), return_value FLOAT); INSERT INTO Italy_Returns (id, return_country, return_value) VALUES (1, \u0027Italy\u0027, 800), (2, \u0027Italy\u0027, 600), (3, \u0027Spain\u0027, 700);", + "sql": "SELECT MIN(return_value) FROM Italy_Returns WHERE return_country \u003d \u0027Italy\u0027;", + "sql_explanation": "The SQL query calculates the minimum value of returns from Italy by using the MIN function on the \u0027return_value\u0027 column. It filters the records to only those where the return_country is \u0027Italy\u0027." +}, { + "id": "4839", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the distinct item_type values in the warehouse_inventory table, excluding records with a quantity of 0?", + "sql_context": "CREATE TABLE warehouse_inventory (item_id INT, item_type VARCHAR(255), quantity INT); INSERT INTO warehouse_inventory (item_id, item_type, quantity) VALUES (1, \u0027furniture\u0027, 300), (2, \u0027electronics\u0027, 500), (3, \u0027clothing\u0027, 700), (4, \u0027toys\u0027, 0), (5, \u0027books\u0027, 200), (6, \u0027games\u0027, 100);", + "sql": "SELECT DISTINCT item_type FROM warehouse_inventory WHERE quantity \u003e 0;", + "sql_explanation": "This SQL query selects the distinct item_type values in the warehouse_inventory table, excluding records with a quantity of 0. It uses the DISTINCT keyword to select only unique values from the item_type column, and the WHERE clause to filter out records with a quantity of 0." +}, { + "id": "4937", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total weight of parcels shipped from South Korea to Mexico in April?", + "sql_context": "CREATE TABLE sk_mx_parcels (id INT, weight FLOAT, shipped_date DATE); INSERT INTO sk_mx_parcels (id, weight, shipped_date) VALUES (1, 2.8, \u00272022-04-02\u0027), (2, 3.5, \u00272022-04-15\u0027);", + "sql": "SELECT SUM(weight) FROM sk_mx_parcels WHERE MONTH(shipped_date) \u003d 4;", + "sql_explanation": "Calculate the total weight of parcels shipped from South Korea to Mexico in April by filtering the sk_mx_parcels table based on shipped_date\u0027s month and then applying the SUM function on the weight column." +}, { + "id": "5000", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the earliest shipped date from warehouse \u0027FRA\u0027?", + "sql_context": "CREATE TABLE shipments (shipment_id int, warehouse_id varchar(5), shipped_date date); INSERT INTO shipments (shipment_id, warehouse_id, shipped_date) VALUES (1, \u0027FRA\u0027, \u00272022-02-01\u0027), (2, \u0027FRA\u0027, \u00272022-02-03\u0027), (3, \u0027FRA\u0027, \u00272022-02-05\u0027);", + "sql": "SELECT MIN(shipped_date) FROM shipments WHERE warehouse_id \u003d \u0027FRA\u0027;", + "sql_explanation": "This query finds the earliest shipped date from warehouse \u0027FRA\u0027 by selecting the minimum \u0027shipped_date\u0027 in the \u0027shipments\u0027 table where the \u0027warehouse_id\u0027 is \u0027FRA\u0027." +}, { + "id": "5250", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum weight of shipments to Brazil?", + "sql_context": "CREATE TABLE Shipments (id INT, weight FLOAT, destination VARCHAR(20)); INSERT INTO Shipments (id, weight, destination) VALUES (1, 10, \u0027Brazil\u0027), (2, 20, \u0027USA\u0027), (3, 15, \u0027Brazil\u0027);", + "sql": "SELECT MIN(weight) FROM Shipments WHERE destination \u003d \u0027Brazil\u0027", + "sql_explanation": "Find the minimum weight of shipments to Brazil by selecting all records with the destination \u0027Brazil\u0027 and then finding the minimum value of the weight column." +}, { + "id": "5370", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of items in warehouse 2, 3, and 4?", + "sql_context": "CREATE TABLE warehouses (id INT, location VARCHAR(10), item VARCHAR(10), quantity INT); INSERT INTO warehouses (id, location, item, quantity) VALUES (1, \u0027NY\u0027, \u0027A101\u0027, 200), (2, \u0027NJ\u0027, \u0027A101\u0027, 300), (3, \u0027CA\u0027, \u0027B203\u0027, 150), (4, \u0027NY\u0027, \u0027C304\u0027, 50);", + "sql": "SELECT SUM(quantity) FROM warehouses WHERE id IN (2, 3, 4);", + "sql_explanation": "This SQL query calculates the total quantity of items in warehouse 2, 3, and 4 by summing up the \u0027quantity\u0027 values in the \u0027warehouses\u0027 table where the \u0027id\u0027 is either 2, 3, or 4." +}, { + "id": "5413", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the capacity of the warehouse in Miami to 1200", + "sql_context": "CREATE TABLE warehouse (id INT, city VARCHAR(20), capacity INT); INSERT INTO warehouse (id, city, capacity) VALUES (1, \u0027Chicago\u0027, 1000), (2, \u0027Houston\u0027, 1500), (3, \u0027Miami\u0027, 800);", + "sql": "UPDATE warehouse SET capacity \u003d 1200 WHERE city \u003d \u0027Miami\u0027;", + "sql_explanation": "The SQL query updates the capacity of the warehouse in Miami to 1200. This is achieved by specifying the UPDATE keyword, followed by the table name and the SET keyword to set the new capacity value. A WHERE clause is added to filter records based on the city." +}, { + "id": "5483", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum distance traveled for a single shipment in the freight forwarding data?", + "sql_context": "CREATE TABLE RouteExtreme (route_id INT, shipment_id INT, distance FLOAT, delivery_date DATE); INSERT INTO RouteExtreme (route_id, shipment_id, distance, delivery_date) VALUES (1, 1, 100, \u00272022-01-01\u0027), (2, 2, 200, \u00272022-02-01\u0027), (3, 3, 150, \u00272022-03-01\u0027);", + "sql": "SELECT MAX(distance) as max_distance FROM RouteExtreme;", + "sql_explanation": "This query calculates the maximum distance traveled for a single shipment in the freight forwarding data by finding the maximum value in the distance column of the RouteExtreme table." +}, { + "id": "5538", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many unique customers are there in the \u0027customer_data\u0027 table?", + "sql_context": "CREATE TABLE customer_data (customer_id INT, customer_name VARCHAR(50), city VARCHAR(50)); INSERT INTO customer_data (customer_id, customer_name, city) VALUES (1, \u0027John Smith\u0027, \u0027New York\u0027), (2, \u0027Jane Doe\u0027, \u0027Los Angeles\u0027);", + "sql": "SELECT COUNT(DISTINCT customer_id) FROM customer_data;", + "sql_explanation": "Calculates the number of unique customers in the customer_data table by counting the distinct customer_id column values." +}, { + "id": "5609", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all the unique warehouse locations and their corresponding capacities?", + "sql_context": "CREATE TABLE warehouses (warehouse_id INT, location TEXT, capacity INT); INSERT INTO warehouses (warehouse_id, location, capacity) VALUES (1, \u0027NYC\u0027, 5000), (2, \u0027LAX\u0027, 6000), (3, \u0027ORD\u0027, 7000), (4, \u0027DFW\u0027, 4000), (5, \u0027SFO\u0027, 8000);", + "sql": "SELECT DISTINCT location, capacity FROM warehouses;", + "sql_explanation": "The SQL query selects distinct locations and their corresponding capacities from the warehouses table." +}, { + "id": "5639", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records with a city of \u0027Los Angeles\u0027 from the warehouse table", + "sql_context": "CREATE TABLE warehouse (warehouse_id INT, warehouse_name VARCHAR(50), city VARCHAR(50), country VARCHAR(50));", + "sql": "DELETE FROM warehouse WHERE city \u003d \u0027Los Angeles\u0027;", + "sql_explanation": "This query deletes records from the warehouse table where the city is \u0027Los Angeles\u0027." +}, { + "id": "5671", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records from the \"warehouses\" table where the country is \"China\"", + "sql_context": "CREATE TABLE warehouses (id INT PRIMARY KEY, name VARCHAR(50), city VARCHAR(50), country VARCHAR(50));", + "sql": "DELETE FROM warehouses WHERE country \u003d \u0027China\u0027;", + "sql_explanation": "This SQL query deletes records from the \"warehouses\" table where the \"country\" column is \"China\". It performs a simple delete operation without any joins." +}, { + "id": "5761", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of packages shipped in a single day?", + "sql_context": "CREATE TABLE Shipments (id INT, warehouse_id INT, shipped_date DATE, packages INT); INSERT INTO Shipments (id, warehouse_id, shipped_date, packages) VALUES (1, 1, \u00272022-01-01\u0027, 50), (2, 1, \u00272022-01-02\u0027, 75), (3, 2, \u00272022-01-03\u0027, 100);", + "sql": "SELECT MAX(s.packages) FROM Shipments s;", + "sql_explanation": "This query calculates the maximum number of packages shipped in a single day." +}, { + "id": "5774", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the correlation between the number of items sold and the revenue generated by each salesperson in the sales database?", + "sql_context": "CREATE TABLE sales (salesperson VARCHAR(20), items INT, revenue INT); INSERT INTO sales (salesperson, items, revenue) VALUES (\u0027John\u0027, 50, 5000), (\u0027Jane\u0027, 70, 7000), (\u0027Doe\u0027, 60, 6000);", + "sql": "SELECT CORR(items, revenue) FROM sales;", + "sql_explanation": "This query calculates the correlation between the number of items sold and the revenue generated by each salesperson by using the CORR function, which calculates the correlation between two sets of values." +}, { + "id": "1252", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of volunteers per education-focused NPO in South Asia?", + "sql_context": "CREATE TABLE organizations (org_id INT, org_name VARCHAR(50), sector VARCHAR(50), country VARCHAR(50), num_volunteers INT); INSERT INTO organizations (org_id, org_name, sector, country, num_volunteers) VALUES (1, \u0027UNESCO\u0027, \u0027Education\u0027, \u0027India\u0027, 500), (2, \u0027Red Cross\u0027, \u0027Healthcare\u0027, \u0027India\u0027, 700), (3, \u0027Greenpeace\u0027, \u0027Environment\u0027, \u0027India\u0027, 300), (4, \u0027Save the Children\u0027, \u0027Education\u0027, \u0027Pakistan\u0027, 400), (5, \u0027Plan International\u0027, \u0027Education\u0027, \u0027Bangladesh\u0027, 600);", + "sql": "SELECT AVG(num_volunteers) FROM organizations WHERE sector \u003d \u0027Education\u0027 AND country IN (\u0027India\u0027, \u0027Pakistan\u0027, \u0027Bangladesh\u0027, \u0027Sri Lanka\u0027, \u0027Nepal\u0027, \u0027Bhutan\u0027, \u0027Maldives\u0027);", + "sql_explanation": "This query calculates the average number of volunteers per education-focused NPO in South Asia by selecting the average of the number of volunteers from the organizations table where the sector is education and the country is in South Asia." +}, { + "id": "1838", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Count the number of unique volunteers who have participated in disaster relief programs in the last 5 years?", + "sql_context": "CREATE TABLE volunteers (id INT, name TEXT, program TEXT, participation_date DATE); INSERT INTO volunteers (id, name, program, participation_date) VALUES (1, \u0027Alice Johnson\u0027, \u0027Disaster Relief\u0027, \u00272017-09-01\u0027); INSERT INTO volunteers (id, name, program, participation_date) VALUES (2, \u0027Bob Williams\u0027, \u0027Disaster Relief\u0027, \u00272018-11-15\u0027);", + "sql": "SELECT COUNT(DISTINCT name) FROM volunteers WHERE program \u003d \u0027Disaster Relief\u0027 AND participation_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 5 YEAR);", + "sql_explanation": "The SQL query calculates the number of unique volunteers who have participated in disaster relief programs in the last 5 years by using COUNT function with DISTINCT keyword on name column and WHERE clause to filter records based on program and participation_date." +}, { + "id": "2223", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new nonprofit organization into the NonprofitOrganizations table.", + "sql_context": "CREATE TABLE NonprofitOrganizations (OrgID INT, OrgName VARCHAR(50), OrgType VARCHAR(50), OrgLocation VARCHAR(50));", + "sql": "INSERT INTO NonprofitOrganizations (OrgID, OrgName, OrgType, OrgLocation) VALUES (2, \u0027Sunshine Shelter\u0027, \u0027Shelter\u0027, \u0027Los Angeles\u0027);", + "sql_explanation": "This SQL query inserts a new record into the NonprofitOrganizations table with the OrgID of 2, OrgName of Sunshine Shelter, OrgType of Shelter, and OrgLocation of Los Angeles." +}, { + "id": "2560", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total number of volunteer hours in the \u0027Education\u0027 category in 2021?", + "sql_context": "CREATE TABLE volunteer_hours (id INT, volunteer_id INT, category VARCHAR(20), hours INT, hour_date DATE); INSERT INTO volunteer_hours (id, volunteer_id, category, hours, hour_date) VALUES (1, 1, \u0027Education\u0027, 5, \u00272021-01-05\u0027), (2, 2, \u0027Health\u0027, 7, \u00272021-01-10\u0027), (3, 3, \u0027Education\u0027, 6, \u00272021-02-15\u0027), (4, 4, \u0027Arts \u0026 Culture\u0027, 3, \u00272021-03-01\u0027), (5, 5, \u0027Health\u0027, 8, \u00272021-01-20\u0027), (6, 6, \u0027Education\u0027, 9, \u00272021-02-25\u0027), (7, 7, \u0027Arts \u0026 Culture\u0027, 4, \u00272021-03-10\u0027);", + "sql": "SELECT SUM(hours) as total_volunteer_hours FROM volunteer_hours WHERE category \u003d \u0027Education\u0027 AND YEAR(hour_date) \u003d 2021;", + "sql_explanation": "This SQL query calculates the total number of volunteer hours in the \u0027Education\u0027 category in 2021 by summing the \u0027hours\u0027 column for each record in the \u0027volunteer_hours\u0027 table that falls within the year 2021 and has a \u0027category\u0027 value of \u0027Education\u0027." +}, { + "id": "2841", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many unique donors have contributed to Canadian non-profit organizations since January 1, 2020?", + "sql_context": "CREATE TABLE donors_canada (id INT, donor_name TEXT, country TEXT, donation_amount DECIMAL, donation_date DATE); INSERT INTO donors_canada (id, donor_name, country, donation_amount, donation_date) VALUES (1, \u0027Alexander Smith\u0027, \u0027Canada\u0027, 100.00, \u00272020-08-03\u0027); INSERT INTO donors_canada (id, donor_name, country, donation_amount, donation_date) VALUES (2, \u0027Sophia Johnson\u0027, \u0027Canada\u0027, 75.00, \u00272020-11-12\u0027);", + "sql": "SELECT COUNT(DISTINCT donor_name) FROM donors_canada WHERE country \u003d \u0027Canada\u0027 AND donation_date \u003e\u003d \u00272020-01-01\u0027;", + "sql_explanation": "This query calculates the number of unique donors that have contributed to Canadian non-profit organizations since January 1, 2020 by using the COUNT(DISTINCT) function on the donor_name column, while filtering the records by country and date using the WHERE clause." +}, { + "id": "3309", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total donation amount in \u0027South Africa\u0027 for the year 2022?", + "sql_context": "CREATE TABLE Donors (DonorID int, DonorName varchar(100), Country varchar(50), DonationAmount decimal(10,2)); INSERT INTO Donors (DonorID, DonorName, Country, DonationAmount) VALUES (1, \u0027John Doe\u0027, \u0027South Africa\u0027, 500.00);", + "sql": "SELECT SUM(DonationAmount) FROM Donors WHERE Country \u003d \u0027South Africa\u0027 AND YEAR(DonationDate) \u003d 2022;", + "sql_explanation": "This query calculates the total donation amount in South Africa for the year 2022 by summing the DonationAmount column where the Country is \u0027South Africa\u0027 and the YEAR of DonationDate is 2022." +}, { + "id": "3709", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the record for \u0027volunteer_management\u0027 table, where \u0027volunteer_id\u0027 is 001, and increase the \u0027hours_served\u0027 by 5.", + "sql_context": "CREATE TABLE volunteer_management (volunteer_id VARCHAR(10), hours_served INT); INSERT INTO volunteer_management (volunteer_id, hours_served) VALUES (\u0027001\u0027, 25), (\u0027002\u0027, 30);", + "sql": "UPDATE volunteer_management SET hours_served \u003d hours_served + 5 WHERE volunteer_id \u003d \u0027001\u0027;", + "sql_explanation": "1. Update the \u0027hours_served\u0027 column in the volunteer_management table by adding 5 to the value where volunteer_id is \u0027001\u0027." +}, { + "id": "3785", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many volunteers have joined non-profit organizations in Japan in 2020?", + "sql_context": "CREATE TABLE volunteers_japan (id INT, volunteer_name TEXT, country TEXT, join_date DATE); INSERT INTO volunteers_japan (id, volunteer_name, country, join_date) VALUES (1, \u0027Taro Yamada\u0027, \u0027Japan\u0027, \u00272020-03-05\u0027); INSERT INTO volunteers_japan (id, volunteer_name, country, join_date) VALUES (2, \u0027Hana Mori\u0027, \u0027Japan\u0027, \u00272020-08-18\u0027);", + "sql": "SELECT COUNT(*) FROM volunteers_japan WHERE country \u003d \u0027Japan\u0027 AND YEAR(join_date) \u003d 2020;", + "sql_explanation": "This query calculates the number of volunteers that have joined non-profit organizations in Japan in 2020 by using the COUNT() function and filtering the records by country and year using the WHERE clause and the YEAR() function." +}, { + "id": "3885", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the average gift size in Africa?", + "sql_context": "CREATE TABLE donations (id INT, country TEXT, amount DECIMAL(10,2)); INSERT INTO donations (id, country, amount) VALUES (4, \u0027Nigeria\u0027, 25.00), (5, \u0027South Africa\u0027, 75.50), (6, \u0027Egypt\u0027, 100.00);", + "sql": "SELECT AVG(amount) FROM donations WHERE country IN (\u0027Nigeria\u0027, \u0027South Africa\u0027, \u0027Egypt\u0027);", + "sql_explanation": "The SQL query calculates the average of the \u0027amount\u0027 column from the \u0027donations\u0027 table where \u0027country\u0027 is \u0027Nigeria\u0027, \u0027South Africa\u0027, or \u0027Egypt\u0027." +}, { + "id": "3967", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the \u0027Volunteers\u0027 table for a volunteer named \u0027Alice Lee\u0027 with 3 hours of volunteered time", + "sql_context": "CREATE TABLE Volunteers (id INT PRIMARY KEY, volunteer_name VARCHAR(255), hours_volunteered DECIMAL(10,2));", + "sql": "INSERT INTO Volunteers (volunteer_name, hours_volunteered) VALUES (\u0027Alice Lee\u0027, 3.00);", + "sql_explanation": "This SQL query inserts a new record into the \u0027Volunteers\u0027 table for a volunteer named \u0027Alice Lee\u0027 with 3 hours of volunteered time. It does this by specifying the INSERT INTO keyword, followed by the table name and the columns to insert data into. The VALUES keyword is used to specify the values for the new record." +}, { + "id": "4126", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of volunteers who have worked on projects related to \u0027disaster relief\u0027 or \u0027housing\u0027?", + "sql_context": "CREATE TABLE volunteers (id INT, name VARCHAR(30), cause_area VARCHAR(20)); INSERT INTO volunteers (id, name, cause_area) VALUES (1, \u0027Bob\u0027, \u0027disaster relief\u0027), (2, \u0027Alice\u0027, \u0027housing\u0027), (3, \u0027Charlie\u0027, \u0027education\u0027);", + "sql": "SELECT COUNT(*) FROM volunteers WHERE cause_area IN (\u0027disaster relief\u0027, \u0027housing\u0027);", + "sql_explanation": "This query counts the total number of volunteers who have worked on projects related to \u0027disaster relief\u0027 or \u0027housing\u0027." +}, { + "id": "4182", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many \u0027repeat_volunteers\u0027 are recorded in the \u0027volunteer_history\u0027 table?", + "sql_context": "CREATE TABLE volunteer_history (volunteer_type VARCHAR(20)); INSERT INTO volunteer_history (volunteer_type) VALUES (\u0027first_time_volunteers\u0027), (\u0027repeat_volunteers\u0027), (\u0027first_time_volunteers\u0027);", + "sql": "SELECT COUNT(*) FROM volunteer_history WHERE volunteer_type \u003d \u0027repeat_volunteers\u0027;", + "sql_explanation": "1. Count the number of records in the volunteer_history table where volunteer_type is \u0027repeat_volunteers\u0027." +}, { + "id": "4480", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new volunteer with ID 6 who signed up on 2022-07-15", + "sql_context": "CREATE TABLE volunteers (volunteer_id INT, signup_date DATE); INSERT INTO volunteers (volunteer_id, signup_date) VALUES (1, \u00272022-01-05\u0027), (2, \u00272022-03-30\u0027), (3, \u00272022-04-15\u0027), (4, \u00272022-06-10\u0027);", + "sql": "INSERT INTO volunteers (volunteer_id, signup_date) VALUES (6, \u00272022-07-15\u0027);", + "sql_explanation": "This query inserts a new record into the volunteers table for a volunteer with ID 6 and a signup date of 2022-07-15." +}, { + "id": "4998", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which regions have the highest and lowest social impact scores?", + "sql_context": "CREATE TABLE SocialImpact (id INT, region VARCHAR(20), score FLOAT); INSERT INTO SocialImpact (id, region, score) VALUES (1, \u0027Northeast\u0027, 80.0), (2, \u0027Southeast\u0027, 85.0), (3, \u0027Midwest\u0027, 90.0), (4, \u0027Southwest\u0027, 70.0), (5, \u0027Northwest\u0027, 75.0);", + "sql": "SELECT region, score FROM SocialImpact ORDER BY score DESC LIMIT 1;", + "sql_explanation": "This SQL query finds the region with the highest social impact score. It orders the data by the \u0027score\u0027 column in descending order and then returns the top row using the LIMIT clause." +}, { + "id": "5064", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of volunteers who participated in each event in the \u0027events\u0027 table, sorted by event_id in ascending order?", + "sql_context": "CREATE TABLE events (event_id INT, event_name TEXT, event_date DATE, num_volunteers INT);", + "sql": "SELECT event_id, num_volunteers FROM events ORDER BY event_id ASC;", + "sql_explanation": "This query retrieves the number of volunteers who participated in each event in the \u0027events\u0027 table, sorted by event_id in ascending order. It simply selects the required columns from the \u0027events\u0027 table and orders the result by event_id in ascending order." +}, { + "id": "5226", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many volunteers signed up in the \u0027Volunteers\u0027 table in 2022?", + "sql_context": "CREATE TABLE Volunteers (VolunteerID INT, SignUpDate DATE);", + "sql": "SELECT COUNT(*) FROM Volunteers WHERE YEAR(SignUpDate) \u003d 2022;", + "sql_explanation": "The SQL query counts the number of records in the Volunteers table for volunteers who signed up in the year 2022 by using the COUNT function and WHERE clause with the YEAR function." +}, { + "id": "5590", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records with donation amounts below 1000 in the \u0027donations\u0027 table.", + "sql_context": "CREATE TABLE donations (donation_id INT, donor_id INT, campaign_id INT, donation_amount DECIMAL(10,2));", + "sql": "DELETE FROM donations WHERE donation_amount \u003c 1000;", + "sql_explanation": "This SQL query deletes all records with donation amounts below 1000 in the \u0027donations\u0027 table." +}, { + "id": "5822", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum donation amount in the \u0027Donations\u0027 table?", + "sql_context": "CREATE TABLE Donations (id INT, department VARCHAR(20), amount FLOAT); INSERT INTO Donations (id, department, amount) VALUES (1, \u0027Animals\u0027, 500.00), (2, \u0027Education\u0027, 300.00);", + "sql": "SELECT MIN(amount) FROM Donations", + "sql_explanation": "This SQL query finds the minimum \u0027amount\u0027 value in the \u0027Donations\u0027 table." +}, { + "id": "2502", + "domain": "archeology", + "domain_description": "Detailed records on excavation sites, artifact analysis, historical context, and public outreach in archeology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update artifact quantities based on their type?", + "sql_context": "CREATE TABLE Artifacts (ArtifactID INT, ArtifactType VARCHAR(50), Quantity INT); INSERT INTO Artifacts (ArtifactID, ArtifactType, Quantity) VALUES (1, \u0027Pottery\u0027, 25), (2, \u0027Tools\u0027, 12), (3, \u0027Pottery\u0027, 30);", + "sql": "UPDATE Artifacts SET Quantity \u003d CASE ArtifactType WHEN \u0027Pottery\u0027 THEN Quantity * 1.1 WHEN \u0027Tools\u0027 THEN Quantity * 1.2 END;", + "sql_explanation": "This SQL query uses a CASE statement to update the quantities of pottery and tools. It increases the quantity of pottery by 10% and tools by 20%. The UPDATE statement modifies the Artifacts table, changing the Quantity column based on the ArtifactType." +}, { + "id": "2508", + "domain": "archeology", + "domain_description": "Detailed records on excavation sites, artifact analysis, historical context, and public outreach in archeology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert data about an archaeologist into the Archaeologists table", + "sql_context": "CREATE TABLE Archaeologists (id INT PRIMARY KEY, name VARCHAR(255), specialty TEXT, years_experience INT); INSERT INTO Archaeologists (id, name, specialty, years_experience) VALUES (1, \u0027Dr. Jane Doe\u0027, \u0027Egyptology\u0027, 20);", + "sql": "INSERT INTO Archaeologists (id, name, specialty, years_experience) VALUES (2, \u0027Dr. John Smith\u0027, \u0027Mayan Civilization\u0027, 15);", + "sql_explanation": "Inserts a new record into the Archaeologists table with details about Dr. John Smith, a specialist in Mayan Civilization with 15 years of experience." +}, { + "id": "2969", + "domain": "archeology", + "domain_description": "Detailed records on excavation sites, artifact analysis, historical context, and public outreach in archeology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update excavation notes for site 789", + "sql_context": "CREATE TABLE excavations (id INT PRIMARY KEY, site_id INT, date DATE, notes TEXT);", + "sql": "UPDATE excavations SET notes \u003d \u0027Extensive water damage observed\u0027 WHERE site_id \u003d 789 AND date \u003d \u00272022-05-15\u0027;", + "sql_explanation": "This query updates the notes for excavation records associated with site_id 789 and date May 15, 2022, noting extensive water damage observed." +}, { + "id": "3597", + "domain": "archeology", + "domain_description": "Detailed records on excavation sites, artifact analysis, historical context, and public outreach in archeology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the archaeologists from Mexico that specialize in Mayan Civilization?", + "sql_context": "CREATE TABLE Archaeologist (ArchaeologistID INT PRIMARY KEY, Name VARCHAR(50), Specialty VARCHAR(50), Country VARCHAR(50)); INSERT INTO Archaeologist (ArchaeologistID, Name, Specialty, Country) VALUES (3, \u0027Merle Greene Robertson\u0027, \u0027Mayan Civilization\u0027, \u0027Mexico\u0027);", + "sql": "SELECT Name FROM Archaeologist WHERE Country \u003d \u0027Mexico\u0027 AND Specialty \u003d \u0027Mayan Civilization\u0027;", + "sql_explanation": "Select the names of all archaeologists from Mexico who specialize in Mayan Civilization." +}, { + "id": "3764", + "domain": "archeology", + "domain_description": "Detailed records on excavation sites, artifact analysis, historical context, and public outreach in archeology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 3 countries with the highest number of excavated artifacts?", + "sql_context": "CREATE TABLE ArtifactsByCountry (Country TEXT, ArtifactCount INT); INSERT INTO ArtifactsByCountry (Country, ArtifactCount) VALUES (\u0027Italy\u0027, 250), (\u0027Egypt\u0027, 500), (\u0027France\u0027, 300), (\u0027Greece\u0027, 400);", + "sql": "SELECT Country, ArtifactCount FROM ArtifactsByCountry ORDER BY ArtifactCount DESC LIMIT 3;", + "sql_explanation": "This query sorts the ArtifactsByCountry table in descending order by ArtifactCount and limits the output to the first 3 records. It selects both Country and ArtifactCount fields." +}, { + "id": "3916", + "domain": "archeology", + "domain_description": "Detailed records on excavation sites, artifact analysis, historical context, and public outreach in archeology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many excavation sites are located in \u0027Italy\u0027 or \u0027Greece\u0027?", + "sql_context": "CREATE TABLE ExcavationSites (id INT, site VARCHAR(20), location VARCHAR(30), start_date DATE, end_date DATE); INSERT INTO ExcavationSites (id, site, location, start_date, end_date) VALUES (1, \u0027BronzeAge\u0027, \u0027UK\u0027, \u00272000-01-01\u0027, \u00272005-12-31\u0027), (2, \u0027AncientRome\u0027, \u0027Italy\u0027, \u00271999-01-01\u0027, \u00272002-12-31\u0027), (3, \u0027Mycenae\u0027, \u0027Greece\u0027, \u00272003-01-01\u0027, \u00272006-12-31\u0027);", + "sql": "SELECT COUNT(DISTINCT site) FROM ExcavationSites WHERE location IN (\u0027Italy\u0027, \u0027Greece\u0027);", + "sql_explanation": "Count the number of distinct excavation sites by filtering the location from the ExcavationSites table with the specified countries \u0027Italy\u0027 or \u0027Greece\u0027." +}, { + "id": "4066", + "domain": "archeology", + "domain_description": "Detailed records on excavation sites, artifact analysis, historical context, and public outreach in archeology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names and periods of all excavation sites in France and Germany?", + "sql_context": "CREATE TABLE ExcavationSites (SiteID INT, SiteName TEXT, Location TEXT, Period TEXT); INSERT INTO ExcavationSites (SiteID, SiteName, Location, Period) VALUES (1, \u0027Pompeii\u0027, \u0027Italy\u0027, \u0027Roman\u0027);", + "sql": "SELECT SiteName, Period FROM ExcavationSites WHERE Location IN (\u0027France\u0027, \u0027Germany\u0027)", + "sql_explanation": "This query selects the SiteName and Period columns from the ExcavationSites table, filtering the rows where the Location is either France or Germany." +}, { + "id": "4077", + "domain": "archeology", + "domain_description": "Detailed records on excavation sites, artifact analysis, historical context, and public outreach in archeology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the archaeologists that have discovered the most artifacts?", + "sql_context": "CREATE TABLE Archaeologists (ArchaeologistID INT, Archaeologist TEXT, Country TEXT, ArtifactCount INT); INSERT INTO Archaeologists (ArchaeologistID, Archaeologist, Country, ArtifactCount) VALUES (1, \u0027Zahi Hawass\u0027, \u0027Egypt\u0027, 100), (2, \u0027Howard Carter\u0027, \u0027UK\u0027, 75), (3, \u0027Hassan Fathy\u0027, \u0027Egypt\u0027, 50);", + "sql": "SELECT Archaeologist, ArtifactCount FROM Archaeologists ORDER BY ArtifactCount DESC;", + "sql_explanation": "This query sorts the Archaeologists table in descending order by ArtifactCount and selects both Archaeologist and ArtifactCount fields." +}, { + "id": "5221", + "domain": "archeology", + "domain_description": "Detailed records on excavation sites, artifact analysis, historical context, and public outreach in archeology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "show the total number of artifacts excavated from site \u0027Tikal\u0027", + "sql_context": "CREATE TABLE artifact_tikal (artifact_id INTEGER, site_name TEXT, artifact_type TEXT, age INTEGER); INSERT INTO artifact_tikal (artifact_id, site_name, artifact_type, age) VALUES (1, \u0027Tikal\u0027, \u0027Pottery\u0027, 1200), (2, \u0027Tikal\u0027, \u0027Stone\u0027, 800), (3, \u0027Tikal\u0027, \u0027Ceramic\u0027, 1500), (4, \u0027Tikal\u0027, \u0027Bone\u0027, 1100), (5, \u0027Tikal\u0027, \u0027Stone\u0027, 900), (6, \u0027Tikal\u0027, \u0027Stone\u0027, 1300);", + "sql": "SELECT COUNT(*) FROM artifact_tikal WHERE site_name \u003d \u0027Tikal\u0027;", + "sql_explanation": "The query shows the total number of artifacts excavated from site \u0027Tikal\u0027 by selecting the number of rows from the artifact_tikal table where the site_name is \u0027Tikal\u0027." +}, { + "id": "5277", + "domain": "archeology", + "domain_description": "Detailed records on excavation sites, artifact analysis, historical context, and public outreach in archeology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average weight of all pottery artifacts from the \u0027Italian Digs\u0027 site?", + "sql_context": "CREATE TABLE If Not Exists excavation_sites (site_id INT, site_name TEXT); INSERT INTO excavation_sites (site_id, site_name) VALUES (1, \u0027Italian Digs\u0027), (2, \u0027Greek Site\u0027), (3, \u0027Egyptian Digs\u0027); CREATE TABLE If Not Exists artifacts (artifact_id INT, artifact_name TEXT, artifact_weight FLOAT, site_id INT); INSERT INTO artifacts (artifact_id, artifact_name, artifact_weight, site_id) VALUES (1, \u0027Amphora\u0027, 12.3, 1), (2, \u0027Pithos\u0027, 34.5, 1), (3, \u0027Oinochoe\u0027, 2.5, 2), (4, \u0027Kylix\u0027, 1.2, 2), (5, \u0027Scarab\u0027, 0.3, 3);", + "sql": "SELECT AVG(artifact_weight) FROM artifacts WHERE site_id \u003d 1;", + "sql_explanation": "The SQL query calculates the average weight of all pottery artifacts from the \u0027Italian Digs\u0027 site by using the AVG function on the artifact_weight column, filtering the data with the WHERE clause to only include records with site_id equal to 1." +}, { + "id": "5587", + "domain": "archeology", + "domain_description": "Detailed records on excavation sites, artifact analysis, historical context, and public outreach in archeology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average weight of artifacts from \u0027Site J\u0027?", + "sql_context": "CREATE TABLE Site (SiteID VARCHAR(10), SiteName VARCHAR(20)); INSERT INTO Site (SiteID, SiteName) VALUES (\u0027J\u0027, \u0027Site J\u0027); CREATE TABLE Artifact (ArtifactID VARCHAR(10), SiteID VARCHAR(10), Weight FLOAT); INSERT INTO Artifact (ArtifactID, SiteID, Weight) VALUES (\u00271\u0027, \u0027J\u0027, 12.3), (\u00272\u0027, \u0027J\u0027, 15.6), (\u00273\u0027, \u0027J\u0027, 18.9), (\u00274\u0027, \u0027J\u0027, 9.7), (\u00275\u0027, \u0027J\u0027, 25.6);", + "sql": "SELECT AVG(Weight) FROM Artifact WHERE SiteID \u003d \u0027J\u0027;", + "sql_explanation": "The SQL query retrieves the average weight of artifacts from \u0027Site J\u0027 by using the AVG function on the Weight column, and filtering the records with the WHERE clause based on the SiteID column." +}, { + "id": "5729", + "domain": "archeology", + "domain_description": "Detailed records on excavation sites, artifact analysis, historical context, and public outreach in archeology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the earliest excavation start date in the \u0027ExcavationSites\u0027 table?", + "sql_context": "CREATE TABLE ExcavationSites (ID INT, Name VARCHAR(50), Country VARCHAR(50), StartDate DATE, EndDate DATE);", + "sql": "SELECT MIN(StartDate) FROM ExcavationSites;", + "sql_explanation": "The SQL query calculates the earliest excavation start date by using the MIN function on the \u0027StartDate\u0027 column from the \u0027ExcavationSites\u0027 table." +}, { + "id": "5734", + "domain": "archeology", + "domain_description": "Detailed records on excavation sites, artifact analysis, historical context, and public outreach in archeology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete the artifact with ArtifactID 1 from the database.", + "sql_context": "CREATE TABLE ExcavationSites (SiteID INT, SiteName TEXT, Country TEXT); INSERT INTO ExcavationSites (SiteID, SiteName, Country) VALUES (1, \u0027MayanRuins\u0027, \u0027Guatemala\u0027); CREATE TABLE Artifacts (ArtifactID INT, SiteID INT, ArtifactName TEXT, ArtifactType TEXT, Quantity INT); INSERT INTO Artifacts (ArtifactID, SiteID, ArtifactName, ArtifactType, Quantity) VALUES (1, 1, \u0027Jade Mask\u0027, \u0027Mask\u0027, 1), (2, 1, \u0027Obsidian Knife\u0027, \u0027Knife\u0027, 2);", + "sql": "DELETE FROM Artifacts WHERE ArtifactID \u003d 1;", + "sql_explanation": "This query deletes the record with ArtifactID 1 from the Artifacts table." +}, { + "id": "5794", + "domain": "archeology", + "domain_description": "Detailed records on excavation sites, artifact analysis, historical context, and public outreach in archeology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of all individuals from the \u0027ancient_burials\u0027 table?", + "sql_context": "CREATE TABLE ancient_burials (id INT, name VARCHAR(50), age INT, gender VARCHAR(10), grave_contents VARCHAR(255)); INSERT INTO ancient_burials (id, name, age, gender, grave_contents) VALUES (1, \u0027John Doe\u0027, 45, \u0027Male\u0027, \u0027Pottery, coins\u0027);", + "sql": "SELECT AVG(age) FROM ancient_burials;", + "sql_explanation": "This query calculates the average age of all individuals in the \u0027ancient_burials\u0027 table by summing all the \u0027age\u0027 values and then dividing by the count of rows in the table." +}, { + "id": "1969", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of hybrid and electric vehicles sold in the Sales_Data table in the fourth quarter of 2022?", + "sql_context": "CREATE TABLE Sales_Data (Sale_Date DATE, Vehicle_Type VARCHAR(20), Quantity_Sold INT);", + "sql": "SELECT SUM(Quantity_Sold) FROM Sales_Data WHERE Vehicle_Type IN (\u0027Hybrid\u0027, \u0027Electric\u0027) AND Sale_Date BETWEEN \u00272022-10-01\u0027 AND \u00272022-12-31\u0027;", + "sql_explanation": "The SQL query calculates the sum of the Quantity_Sold column from the Sales_Data table where the Vehicle_Type is either \u0027Hybrid\u0027 or \u0027Electric\u0027 and the Sale_Date is between \u00272022-10-01\u0027 and \u00272022-12-31\u0027." +}, { + "id": "1976", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average safety rating of electric vehicles compared to gasoline vehicles?", + "sql_context": "CREATE TABLE Vehicles (id INT, make VARCHAR(50), model VARCHAR(50), safety_rating FLOAT, fuel_type VARCHAR(50));", + "sql": "SELECT AVG(safety_rating) FROM Vehicles WHERE fuel_type \u003d \u0027Electric\u0027; SELECT AVG(safety_rating) FROM Vehicles WHERE fuel_type \u003d \u0027Gasoline\u0027;", + "sql_explanation": "This query calculates the average safety rating of electric vehicles by filtering the Vehicles table where the fuel_type is Electric and then computes the average of the safety_rating column. Similarly, it calculates the average safety rating of gasoline vehicles." +}, { + "id": "2592", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many autonomous driving research papers were published by authors from the United States and Germany?", + "sql_context": "CREATE TABLE research_papers (title VARCHAR(100), author_country VARCHAR(50), publication_year INT);", + "sql": "SELECT COUNT(*) FROM research_papers WHERE author_country IN (\u0027United States\u0027, \u0027Germany\u0027) AND publication_year \u003e\u003d 2015;", + "sql_explanation": "The SQL query counts the number of autonomous driving research papers published by authors from the United States and Germany since 2015 in the \u0027research_papers\u0027 table. It first filters the rows based on the author\u0027s country and publication year and then counts the number of rows using the COUNT function." +}, { + "id": "3176", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many electric vehicles were sold in Canada in 2021?", + "sql_context": "CREATE TABLE Sales (year INT, country VARCHAR(50), vehicle_type VARCHAR(50), quantity INT); INSERT INTO Sales (year, country, vehicle_type, quantity) VALUES (2021, \u0027Canada\u0027, \u0027Electric\u0027, 75000);", + "sql": "SELECT SUM(quantity) FROM Sales WHERE year \u003d 2021 AND country \u003d \u0027Canada\u0027 AND vehicle_type \u003d \u0027Electric\u0027;", + "sql_explanation": "This query calculates the total quantity of electric vehicles sold in Canada in 2021 by summing the quantity column in the Sales table where the year is 2021, country is Canada, and vehicle_type is Electric." +}, { + "id": "3384", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many safety tests were conducted by \u0027Green Motors\u0027 in 2021?", + "sql_context": "CREATE TABLE SafetyTestingCounts (ID INT, Manufacturer VARCHAR(255), NumTests INT); INSERT INTO SafetyTestingCounts (ID, Manufacturer, NumTests) VALUES (1, \u0027Green Motors\u0027, 50), (2, \u0027FutureAutomobiles\u0027, 75), (3, \u0027Blue Cars\u0027, 60), (4, \u0027Green Motors\u0027, 40), (5, \u0027Green Motors\u0027, 55);", + "sql": "SELECT SUM(NumTests) FROM SafetyTestingCounts WHERE Manufacturer \u003d \u0027Green Motors\u0027 AND Year \u003d 2021;", + "sql_explanation": "This query calculates the total number of safety tests conducted by \u0027Green Motors\u0027 in 2021. It does this by summing the values in the \u0027NumTests\u0027 column, but only for rows where \u0027Manufacturer\u0027 is \u0027Green Motors\u0027 and \u0027Year\u0027 is 2021." +}, { + "id": "3424", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum horsepower of sports cars released in 2022?", + "sql_context": "CREATE TABLE SportsCars (VIN VARCHAR(20), Model VARCHAR(20), Horsepower INT, ProductionYear INT); INSERT INTO SportsCars (VIN, Model, Horsepower, ProductionYear) VALUES (\u0027AA11BB2233\u0027, \u0027ModelX\u0027, 500, 2022), (\u0027CC22DD3344\u0027, \u0027ModelY\u0027, 600, 2022);", + "sql": "SELECT MAX(Horsepower) FROM SportsCars WHERE ProductionYear \u003d 2022 AND Model LIKE \u0027%Sports Car%\u0027;", + "sql_explanation": "The SQL query calculates the maximum horsepower of sports cars produced in 2022 using the MAX() function. It filters the SportsCars table for rows where the ProductionYear is 2022 and the Model contains the phrase \u0027Sports Car\u0027." +}, { + "id": "3498", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many autonomous driving research projects were completed in \u00272022\u0027 in the \u0027autonomous_driving\u0027 schema?", + "sql_context": "CREATE TABLE autonomous_driving (id INT, project_name VARCHAR(50), start_date DATE, end_date DATE); INSERT INTO autonomous_driving VALUES (1, \u0027Project Alpha\u0027, \u00272022-01-01\u0027, \u00272022-12-31\u0027); INSERT INTO autonomous_driving VALUES (2, \u0027Project Bravo\u0027, \u00272021-01-01\u0027, \u00272021-12-31\u0027);", + "sql": "SELECT COUNT(*) FROM autonomous_driving WHERE YEAR(start_date) \u003d 2022 AND YEAR(end_date) \u003d 2022;", + "sql_explanation": "First, we filter the \u0027autonomous_driving\u0027 table to only include rows where both the start and end date are in \u00272022\u0027. Then, we count the number of rows that meet this criteria." +}, { + "id": "3541", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names of all vehicles that passed safety tests in India in 2019?", + "sql_context": "CREATE TABLE VehicleSafetyTests (vehicle_id INT, model VARCHAR(100), passed BOOLEAN, country VARCHAR(50), year INT); INSERT INTO VehicleSafetyTests (vehicle_id, model, passed, country, year) VALUES (1, \u0027Model X\u0027, true, \u0027India\u0027, 2019), (2, \u0027Corolla\u0027, false, \u0027India\u0027, 2019);", + "sql": "SELECT model FROM VehicleSafetyTests WHERE passed \u003d true AND country \u003d \u0027India\u0027 AND year \u003d 2019;", + "sql_explanation": "List the names of all vehicles that passed safety tests in India in 2019 by selecting the model column values for rows with passed \u0027true\u0027, country \u0027India\u0027, and year 2019." +}, { + "id": "3570", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What vehicles passed the \u0027Pedestrian Safety Test\u0027 in the SafetyTesting table?", + "sql_context": "CREATE TABLE SafetyTesting (Id INT, Vehicle VARCHAR(50), Test VARCHAR(50), Result VARCHAR(50)); INSERT INTO SafetyTesting (Id, Vehicle, Test, Result) VALUES (1, \u0027Volvo XC60\u0027, \u0027Frontal Crash Test\u0027, \u0027Passed\u0027), (2, \u0027Nissan Leaf\u0027, \u0027Pedestrian Safety Test\u0027, \u0027Passed\u0027);", + "sql": "SELECT Vehicle FROM SafetyTesting WHERE Test \u003d \u0027Pedestrian Safety Test\u0027 AND Result \u003d \u0027Passed\u0027;", + "sql_explanation": "This query selects vehicles from the SafetyTesting table that have passed the Pedestrian Safety Test." +}, { + "id": "3699", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum and maximum safety rating for electric vehicles?", + "sql_context": "CREATE TABLE EV_Safety (Vehicle VARCHAR(255), SafetyRating FLOAT); INSERT INTO EV_Safety (Vehicle, SafetyRating) VALUES (\u0027Tesla Model 3\u0027, 5.3), (\u0027Chevrolet Bolt\u0027, 5.1), (\u0027Nissan Leaf\u0027, 4.9), (\u0027Ford Focus Electric\u0027, 4.5);", + "sql": "SELECT MIN(SafetyRating), MAX(SafetyRating) FROM EV_Safety WHERE Vehicle LIKE \u0027%electric%\u0027;", + "sql_explanation": "This query calculates the minimum and maximum safety rating for electric vehicles in the EV_Safety table by using the MIN and MAX functions. It filters the records to only include rows where the Vehicle name contains the word \u0027electric\u0027." +}, { + "id": "3704", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many electric vehicles were sold in California in 2020?", + "sql_context": "CREATE TABLE Sales (Id INT, Vehicle VARCHAR(100), Year INT, State VARCHAR(50), IsElectric BOOLEAN); INSERT INTO Sales (Id, Vehicle, Year, State, IsElectric) VALUES (1, \u0027Model S\u0027, 2020, \u0027California\u0027, true), (2, \u0027Model 3\u0027, 2020, \u0027California\u0027, true), (3, \u0027Golf\u0027, 2020, \u0027California\u0027, false), (4, \u0027Polo\u0027, 2020, \u0027California\u0027, false), (5, \u0027Tesla Model X\u0027, 2020, \u0027California\u0027, true);", + "sql": "SELECT COUNT(*) FROM Sales WHERE Year \u003d 2020 AND State \u003d \u0027California\u0027 AND IsElectric \u003d true", + "sql_explanation": "We count the number of electric vehicles sold in California in 2020 by using the COUNT(*) function, filtering the rows by the Year, State, and IsElectric columns with 2020, \u0027California\u0027, and true as the values respectively." +}, { + "id": "3853", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many autonomous driving research papers were published by \u0027SmartLabs\u0027 in 2020?", + "sql_context": "CREATE TABLE AutonomousDrivingResearch (ID INT, Lab VARCHAR(255), Year INT, NumPapers INT); INSERT INTO AutonomousDrivingResearch (ID, Lab, Year, NumPapers) VALUES (1, \u0027SmartLabs\u0027, 2019, 15), (2, \u0027SmartLabs\u0027, 2020, 25), (3, \u0027RoboLabs\u0027, 2020, 18);", + "sql": "SELECT NumPapers FROM AutonomousDrivingResearch WHERE Lab \u003d \u0027SmartLabs\u0027 AND Year \u003d 2020;", + "sql_explanation": "This query retrieves the number of autonomous driving research papers published by \u0027SmartLabs\u0027 in 2020. It does this by selecting the value of the \u0027NumPapers\u0027 column, but only for rows where \u0027Lab\u0027 is \u0027SmartLabs\u0027 and \u0027Year\u0027 is 2020." +}, { + "id": "3927", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum speed of electric vehicles produced by Tesla?", + "sql_context": "CREATE TABLE ElectricVehicles (Id INT, Make VARCHAR(50), Model VARCHAR(50), MaxSpeed FLOAT); INSERT INTO ElectricVehicles (Id, Make, Model, MaxSpeed) VALUES (1, \u0027Tesla\u0027, \u0027Model S\u0027, 261), (2, \u0027Tesla\u0027, \u0027Model 3\u0027, 225), (3, \u0027Tesla\u0027, \u0027Model X\u0027, 250), (4, \u0027Tesla\u0027, \u0027Model Y\u0027, 217);", + "sql": "SELECT MAX(MaxSpeed) FROM ElectricVehicles WHERE Make \u003d \u0027Tesla\u0027 AND Model LIKE \u0027Model%\u0027", + "sql_explanation": "This query determines the maximum speed of electric vehicles produced by Tesla. It filters the ElectricVehicles table to only include rows where the Make is Tesla and the Model starts with \u0027Model\u0027, then calculates the maximum value of the MaxSpeed column for those rows." +}, { + "id": "3931", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average safety rating of vehicles produced by \u0027Green Motors\u0027?", + "sql_context": "CREATE TABLE VehicleSafetyTesting (ID INT, Manufacturer VARCHAR(255), Model VARCHAR(255), SafetyRating FLOAT); INSERT INTO VehicleSafetyTesting (ID, Manufacturer, Model, SafetyRating) VALUES (1, \u0027Green Motors\u0027, \u0027EcoCar\u0027, 4.8), (2, \u0027Blue Cars\u0027, \u0027HyperHybrid\u0027, 4.6);", + "sql": "SELECT AVG(SafetyRating) FROM VehicleSafetyTesting WHERE Manufacturer \u003d \u0027Green Motors\u0027;", + "sql_explanation": "This query calculates the average safety rating for vehicles manufactured by \u0027Green Motors\u0027. It does this by selecting the average value of the \u0027SafetyRating\u0027 column, but only for rows where \u0027Manufacturer\u0027 is \u0027Green Motors\u0027." +}, { + "id": "3941", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average safety rating of electric vehicles released in 2022?", + "sql_context": "CREATE TABLE Vehicles (ID INT, Name TEXT, Type TEXT, SafetyRating FLOAT, ReleaseYear INT); INSERT INTO Vehicles (ID, Name, Type, SafetyRating, ReleaseYear) VALUES (1, \u0027Tesla Model 3\u0027, \u0027Electric\u0027, 5.3, 2022); INSERT INTO Vehicles (ID, Name, Type, SafetyRating, ReleaseYear) VALUES (2, \u0027Nissan Leaf\u0027, \u0027Electric\u0027, 5.1, 2022);", + "sql": "SELECT AVG(SafetyRating) FROM Vehicles WHERE Type \u003d \u0027Electric\u0027 AND ReleaseYear \u003d 2022;", + "sql_explanation": "This query calculates the average safety rating of electric vehicles released in 2022 by filtering the Vehicles table based on the Type and ReleaseYear, and then computing the average of the SafetyRating column." +}, { + "id": "4125", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of autonomous vehicles tested in the state of California?", + "sql_context": "CREATE TABLE AutonomousVehicles (Id INT, TestLocation VARCHAR(50), TestDate DATE, VehicleCount INT); INSERT INTO AutonomousVehicles (Id, TestLocation, TestDate, VehicleCount) VALUES (1, \u0027California\u0027, \u00272018-01-01\u0027, 500), (2, \u0027California\u0027, \u00272019-01-01\u0027, 1000), (3, \u0027California\u0027, \u00272020-01-01\u0027, 1500), (4, \u0027California\u0027, \u00272021-01-01\u0027, 2000);", + "sql": "SELECT SUM(VehicleCount) FROM AutonomousVehicles WHERE TestLocation \u003d \u0027California\u0027;", + "sql_explanation": "This query calculates the total number of autonomous vehicles tested in the state of California. It filters the AutonomousVehicles table to only include rows where the TestLocation is California, then calculates the sum of the VehicleCount column for those rows." +}, { + "id": "4291", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the horsepower of the Tesla Model 3?", + "sql_context": "CREATE TABLE electric_cars (make VARCHAR(255), model VARCHAR(255), horsepower INT); INSERT INTO electric_cars (make, model, horsepower) VALUES (\u0027Tesla\u0027, \u0027Model 3\u0027, 261), (\u0027Tesla\u0027, \u0027Model S\u0027, 469);", + "sql": "SELECT horsepower FROM electric_cars WHERE make \u003d \u0027Tesla\u0027 AND model \u003d \u0027Model 3\u0027;", + "sql_explanation": "This query retrieves the horsepower of the Tesla Model 3 by selecting the horsepower from the electric_cars table where the make is equal to Tesla and the model is equal to Model 3." +}, { + "id": "4664", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average safety rating of vehicles manufactured in the US?", + "sql_context": "CREATE TABLE Vehicles (Id INT, Name TEXT, Manufacturer TEXT, SafetyRating FLOAT, ManufacturedCountry TEXT); INSERT INTO Vehicles (Id, Name, Manufacturer, SafetyRating, ManufacturedCountry) VALUES (1, \u0027Model S\u0027, \u0027Tesla\u0027, 5.0, \u0027USA\u0027); INSERT INTO Vehicles (Id, Name, Manufacturer, SafetyRating, ManufacturedCountry) VALUES (2, \u0027Model 3\u0027, \u0027Tesla\u0027, 5.0, \u0027USA\u0027);", + "sql": "SELECT AVG(SafetyRating) FROM Vehicles WHERE ManufacturedCountry \u003d \u0027USA\u0027;", + "sql_explanation": "This query calculates the average safety rating of vehicles by filtering the Vehicles table for rows where ManufacturedCountry is \u0027USA\u0027 and then computing the average of the SafetyRating column." +}, { + "id": "4706", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum horsepower for electric vehicles in the \u0027green_vehicles\u0027 table?", + "sql_context": "CREATE TABLE green_vehicles (vehicle_id INT, make VARCHAR(50), model VARCHAR(50), fuel_type VARCHAR(10), horsepower INT);", + "sql": "SELECT MAX(horsepower) FROM green_vehicles WHERE fuel_type \u003d \u0027electric\u0027;", + "sql_explanation": "This SQL query calculates the maximum horsepower for electric vehicles in the green_vehicles table by selecting all records from the green_vehicles table where the fuel_type is \u0027electric\u0027, and then applying the aggregate function MAX() to the selected horsepower values." +}, { + "id": "4774", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many autonomous driving research studies have been conducted in Sweden?", + "sql_context": "CREATE TABLE AutonomousDrivingResearch (Country VARCHAR(50), Studies INT); INSERT INTO AutonomousDrivingResearch (Country, Studies) VALUES (\u0027Sweden\u0027, 18), (\u0027Germany\u0027, 30), (\u0027Norway\u0027, 16), (\u0027Finland\u0027, 14), (\u0027Denmark\u0027, 12);", + "sql": "SELECT Studies FROM AutonomousDrivingResearch WHERE Country \u003d \u0027Sweden\u0027;", + "sql_explanation": "This query retrieves the number of autonomous driving research studies conducted in Sweden by selecting the studies from the AutonomousDrivingResearch table where the country is Sweden." +}, { + "id": "4815", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many safety tests were conducted on electric vehicles in the last 3 years?", + "sql_context": "CREATE TABLE SafetyTests (Vehicle VARCHAR(50), Year INT); INSERT INTO SafetyTests (Vehicle, Year) VALUES (\u0027Tesla Model S\u0027, 2020), (\u0027Tesla Model S\u0027, 2021), (\u0027Tesla Model S\u0027, 2022), (\u0027Tesla Model 3\u0027, 2020), (\u0027Tesla Model 3\u0027, 2021), (\u0027Tesla Model 3\u0027, 2022), (\u0027Chevrolet Bolt\u0027, 2020), (\u0027Chevrolet Bolt\u0027, 2021), (\u0027Chevrolet Bolt\u0027, 2022), (\u0027Nissan Leaf\u0027, 2020), (\u0027Nissan Leaf\u0027, 2021), (\u0027Nissan Leaf\u0027, 2022);", + "sql": "SELECT COUNT(*) FROM SafetyTests WHERE Year \u003e\u003d YEAR(CURRENT_DATE) - 3;", + "sql_explanation": "We start by using a SELECT statement to count the number of safety tests conducted on electric vehicles in the last 3 years. We use the YEAR and CURRENT_DATE functions to get the current year and filter for the last 3 years." +}, { + "id": "5069", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the average safety rating of vehicles released in 2020?", + "sql_context": "CREATE TABLE Vehicles (id INT, name VARCHAR(255), safety_rating DECIMAL(3,2), release_year INT); INSERT INTO Vehicles (id, name, safety_rating, release_year) VALUES (1, \u0027Model X\u0027, 5.3, 2020); INSERT INTO Vehicles (id, name, safety_rating, release_year) VALUES (2, \u0027Model 3\u0027, 5.7, 2020);", + "sql": "SELECT AVG(safety_rating) FROM Vehicles WHERE release_year \u003d 2020;", + "sql_explanation": "Calculate the average safety rating of vehicles released in 2020." +}, { + "id": "5082", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records of auto shows that took place before a specific date from the auto_show_information table.", + "sql_context": "CREATE TABLE auto_show_information (show_id INT, show_name VARCHAR(50), show_date DATE, show_location VARCHAR(50));", + "sql": "DELETE FROM auto_show_information WHERE show_date \u003c \u00272022-01-01\u0027;", + "sql_explanation": "This query deletes records from the auto_show_information table by filtering for auto shows that took place before a specific date using the WHERE clause." +}, { + "id": "5089", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average horsepower of EVs in the \u0027green_vehicles\u0027 table?", + "sql_context": "CREATE TABLE green_vehicles (make VARCHAR(50), model VARCHAR(50), year INT, horsepower INT);", + "sql": "SELECT AVG(horsepower) FROM green_vehicles WHERE make LIKE \u0027EV%\u0027;", + "sql_explanation": "This query calculates the average horsepower for vehicles in the \u0027green_vehicles\u0027 table where the make starts with \u0027EV\u0027, which are assumed to be electric vehicles." +}, { + "id": "5158", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all autonomous driving research studies and their respective lead researchers.", + "sql_context": "CREATE TABLE AutonomousDrivingResearch (StudyID INT, StudyName VARCHAR(50), LeadResearcher VARCHAR(50)); INSERT INTO AutonomousDrivingResearch (StudyID, StudyName, LeadResearcher) VALUES (1, \u0027Impact of Autonomous Vehicles on Urban Mobility\u0027, \u0027Dr. Jane Smith\u0027), (2, \u0027Deep Learning Algorithms for Autonomous Driving\u0027, \u0027Dr. John Doe\u0027);", + "sql": "SELECT StudyName, LeadResearcher FROM AutonomousDrivingResearch;", + "sql_explanation": "The SQL query lists all autonomous driving research studies and their lead researchers by selecting the StudyName and LeadResearcher columns from the AutonomousDrivingResearch table." +}, { + "id": "5265", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all autonomous driving research projects and their respective budgets in the US.", + "sql_context": "CREATE TABLE AutonomousDrivingResearch (project_id INT, project_name VARCHAR(100), budget DECIMAL(10,2), country VARCHAR(50)); INSERT INTO AutonomousDrivingResearch (project_id, project_name, budget, country) VALUES (1, \u0027Project A\u0027, 5000000, \u0027US\u0027), (2, \u0027Project B\u0027, 3000000, \u0027US\u0027);", + "sql": "SELECT * FROM AutonomousDrivingResearch WHERE country \u003d \u0027US\u0027;", + "sql_explanation": "List all autonomous driving research projects and their respective budgets in the US by selecting all columns from AutonomousDrivingResearch table where country is \u0027US\u0027." +}, { + "id": "5323", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all crash tests with results in the \u0027safety_testing\u0027 table where the test type is \u0027autonomous\u0027.", + "sql_context": "CREATE TABLE safety_testing (vehicle_make VARCHAR(50), test_type VARCHAR(20), test_result VARCHAR(10));", + "sql": "SELECT * FROM safety_testing WHERE test_type \u003d \u0027autonomous\u0027;", + "sql_explanation": "This query retrieves all rows from the \u0027safety_testing\u0027 table where the \u0027test_type\u0027 column is \u0027autonomous\u0027." +}, { + "id": "5382", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum range for electric vehicles in the US?", + "sql_context": "CREATE TABLE EVs (Id INT PRIMARY KEY, Make VARCHAR(50), Model VARCHAR(50), Range INT, Country VARCHAR(50));", + "sql": "SELECT MAX(Range) FROM EVs WHERE Country \u003d \u0027United States\u0027;", + "sql_explanation": "The SQL query calculates the maximum range for electric vehicles by using the MAX function on the Range column, filtering for Country \u003d \u0027US\u0027" +}, { + "id": "5403", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all autonomous driving research data from 2019.", + "sql_context": "CREATE TABLE if not exists AutonomousDriving (Id int, Vehicle varchar(100), TestType varchar(50), Result float, TestDate date, Country varchar(50)); INSERT INTO AutonomousDriving (Id, Vehicle, TestType, Result, TestDate, Country) VALUES (1, \u0027Tesla Autopilot\u0027, \u0027Highway Driving\u0027, 9.1, \u00272018-02-03\u0027, \u0027USA\u0027), (2, \u0027Waymo\u0027, \u0027City Driving\u0027, 9.2, \u00272019-06-12\u0027, \u0027USA\u0027), (3, \u0027NVIDIA Drive\u0027, \u0027Handling Test\u0027, 8.8, \u00272020-11-28\u0027, \u0027Japan\u0027), (4, \u0027Baidu Apollo\u0027, \u0027Crash Test\u0027, 8.5, \u00272018-09-15\u0027, \u0027China\u0027), (5, \u0027Tesla Full Self-Driving\u0027, \u0027Braking Test\u0027, 9.0, \u00272017-07-21\u0027, \u0027USA\u0027);", + "sql": "DELETE FROM AutonomousDriving WHERE YEAR(TestDate) \u003d 2019;", + "sql_explanation": "The query deletes all records from the AutonomousDriving table where the TestDate is in 2019 by using the YEAR function." +}, { + "id": "5427", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all autonomous driving research projects with a budget over $5 million.", + "sql_context": "CREATE TABLE autonomous_projects (project_name VARCHAR(50), budget DECIMAL(10,2), year INT); INSERT INTO autonomous_projects (project_name, budget, year) VALUES (\u0027Project Apollo\u0027, 7000000, 2018), (\u0027Wayve\u0027, 6000000, 2020), (\u0027Project Baidu\u0027, 5500000, 2017), (\u0027Project Zoox\u0027, 9000000, 2019);", + "sql": "SELECT * FROM autonomous_projects WHERE budget \u003e 5000000;", + "sql_explanation": "This query lists all autonomous driving research projects with a budget over $5 million by using the SELECT statement with the asterisk wildcard to retrieve all columns, filtering the records using the WHERE clause and the greater than operator." +}, { + "id": "5478", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which autonomous driving research studies were conducted in Japan?", + "sql_context": "CREATE TABLE Research (StudyID int, StudyName varchar(50), Location varchar(50)); INSERT INTO Research (StudyID, StudyName, Location) VALUES (1, \u0027Autonomous Driving in Cities\u0027, \u0027Japan\u0027), (2, \u0027Impact of Autonomous Driving on Traffic\u0027, \u0027USA\u0027), (3, \u0027Safety of Autonomous Vehicles\u0027, \u0027Germany\u0027);", + "sql": "SELECT StudyName FROM Research WHERE Location \u003d \u0027Japan\u0027;", + "sql_explanation": "This query selects the StudyName from the Research table where the Location is Japan." +}, { + "id": "5510", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "Show vehicle safety testing results for vehicles manufactured in Germany", + "sql_context": "CREATE TABLE safety_testing (id INT, vehicle VARCHAR(50), make VARCHAR(50), country VARCHAR(50), score INT); INSERT INTO safety_testing VALUES (1, \u0027Model X\u0027, \u0027Tesla\u0027, \u0027USA\u0027, 90); INSERT INTO safety_testing VALUES (2, \u0027Model 3\u0027, \u0027Tesla\u0027, \u0027USA\u0027, 95); INSERT INTO safety_testing VALUES (3, \u0027e-Tron\u0027, \u0027Audi\u0027, \u0027Germany\u0027, 88);", + "sql": "SELECT * FROM safety_testing WHERE country \u003d \u0027Germany\u0027;", + "sql_explanation": "This SQL query retrieves vehicle safety testing results for vehicles manufactured in Germany by filtering the \u0027safety_testing\u0027 table by the \u0027Germany\u0027 country." +}, { + "id": "5723", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the adoption rate of electric vehicles in the world?", + "sql_context": "CREATE TABLE EV_Adoption (id INT, country VARCHAR(50), adoption_rate FLOAT);", + "sql": "SELECT SUM(adoption_rate) FROM EV_Adoption;", + "sql_explanation": "This query calculates the adoption rate of electric vehicles in the world by summing the adoption_rate column in the EV_Adoption table." +}, { + "id": "5743", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all autonomous driving research projects and their respective end dates", + "sql_context": "CREATE TABLE autonomous_driving_research (project_name VARCHAR(100), start_date DATE, end_date DATE);", + "sql": "SELECT * FROM autonomous_driving_research;", + "sql_explanation": "This SQL query retrieves all records from the \u0027autonomous_driving_research\u0027 table, which contains information about autonomous driving research projects, their start dates, and end dates." +}, { + "id": "5807", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum speed of the fastest vehicle?", + "sql_context": "CREATE TABLE Vehicles (Id INT, Name VARCHAR(255), Type VARCHAR(255), MaxSpeed INT); INSERT INTO Vehicles (Id, Name, Type, MaxSpeed) VALUES (1, \u0027Model S\u0027, \u0027Sedan\u0027, 250), (2, \u0027Model X\u0027, \u0027SUV\u0027, 220), (3, \u0027Model 3\u0027, \u0027Sports Car\u0027, 280);", + "sql": "SELECT MAX(MaxSpeed) FROM Vehicles;", + "sql_explanation": "The SQL query calculates the maximum speed of vehicles in the Vehicles table using the MAX function." +}, { + "id": "1427", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum transaction value (in USD) on the Polygon network in July 2021?", + "sql_context": "CREATE TABLE polygon_transactions (tx_hash VARCHAR(255), block_number INT, timestamp TIMESTAMP, from_address VARCHAR(42), to_address VARCHAR(42), value DECIMAL(20, 8), usd_price DECIMAL(18, 4));", + "sql": "SELECT MAX(value * usd_price) AS max_transaction_value FROM polygon_transactions WHERE timestamp \u003e\u003d \u00272021-07-01 00:00:00\u0027 AND timestamp \u003c \u00272021-08-01 00:00:00\u0027;", + "sql_explanation": "This query finds the maximum transaction value in USD on the Polygon network in July 2021. It multiplies the value and usd_price columns, and then selects the maximum value from the transactions that took place during this time period." +}, { + "id": "1548", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add a new smart contract to the \u0027smart_contracts\u0027 table", + "sql_context": "CREATE TABLE smart_contracts (contract_id INT PRIMARY KEY, address VARCHAR(42), creator VARCHAR(42), language VARCHAR(50), creation_time TIMESTAMP);", + "sql": "INSERT INTO smart_contracts (contract_id, address, creator, language, creation_time) VALUES (1, \u00270xdef456\u0027, \u0027JaneDoe\u0027, \u0027Solidity\u0027, \u00272022-12-25 12:34:56\u0027);", + "sql_explanation": "This query adds a new row to the \u0027smart_contracts\u0027 table, specifying the values for all columns in the new record." +}, { + "id": "2348", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of digital assets in circulation in India as of 2022-02-01?", + "sql_context": "CREATE TABLE digital_assets (asset_name TEXT, in_circulation INTEGER, circulation_date DATE); INSERT INTO digital_assets (asset_name, in_circulation, circulation_date) VALUES (\u0027Bitcoin\u0027, 18750000, \u00272022-02-01\u0027), (\u0027Ethereum\u0027, 115500000, \u00272022-02-01\u0027);", + "sql": "SELECT SUM(in_circulation) FROM digital_assets WHERE circulation_date \u003d \u00272022-02-01\u0027 AND asset_name IN (\u0027Bitcoin\u0027, \u0027Ethereum\u0027);", + "sql_explanation": "This query calculates the total number of digital assets in circulation in India as of 2022-02-01 by summing the in_circulation column where the circulation_date is \u00272022-02-01\u0027 and the asset_name is either \u0027Bitcoin\u0027 or \u0027Ethereum\u0027." +}, { + "id": "2357", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of digital assets in circulation in Canada as of 2022-03-01?", + "sql_context": "CREATE TABLE digital_assets (asset_name TEXT, in_circulation INTEGER, circulation_date DATE); INSERT INTO digital_assets (asset_name, in_circulation, circulation_date) VALUES (\u0027Bitcoin\u0027, 18750000, \u00272022-03-01\u0027), (\u0027Ethereum\u0027, 115500000, \u00272022-03-01\u0027);", + "sql": "SELECT SUM(in_circulation) FROM digital_assets WHERE circulation_date \u003d \u00272022-03-01\u0027 AND asset_name IN (\u0027Bitcoin\u0027, \u0027Ethereum\u0027);", + "sql_explanation": "This query calculates the total number of digital assets in circulation in Canada as of 2022-03-01 by summing the in_circulation column where the circulation_date is \u00272022-03-01\u0027 and the asset_name is either \u0027Bitcoin\u0027 or \u0027Ethereum\u0027." +}, { + "id": "2439", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average gas price for transactions involving the digital asset \u0027BTC\u0027 over the past week?", + "sql_context": "CREATE TABLE transactions (asset_name VARCHAR(10), gas_price DECIMAL(5,2)); INSERT INTO transactions (asset_name, gas_price) VALUES (\u0027BTC\u0027, 10.50), (\u0027ETH\u0027, 20.00), (\u0027BTC\u0027, 11.25), (\u0027LTC\u0027, 5.00); CREATE TABLE dates (transaction_date DATE); INSERT INTO dates (transaction_date) VALUES (\u00272022-01-01\u0027), (\u00272022-01-02\u0027), (\u00272022-01-03\u0027), (\u00272022-01-04\u0027), (\u00272022-01-05\u0027), (\u00272022-01-06\u0027), (\u00272022-01-07\u0027);", + "sql": "SELECT AVG(gas_price) FROM transactions WHERE asset_name \u003d \u0027BTC\u0027 AND transaction_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-01-07\u0027;", + "sql_explanation": "This query calculates the average gas price for transactions involving the digital asset \u0027BTC\u0027 over the past week using the \u0027transactions\u0027 and \u0027dates\u0027 tables." +}, { + "id": "2443", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum gas limit for transactions involving the digital asset \u0027ETH\u0027 in the past week?", + "sql_context": "CREATE TABLE transactions (asset_name VARCHAR(10), gas_limit INT); INSERT INTO transactions (asset_name, gas_limit) VALUES (\u0027BTC\u0027, 100000), (\u0027ETH\u0027, 200000), (\u0027BTC\u0027, 120000), (\u0027LTC\u0027, 50000); CREATE TABLE dates (transaction_date DATE); INSERT INTO dates (transaction_date) VALUES (\u00272022-01-01\u0027), (\u00272022-01-02\u0027), (\u00272022-01-03\u0027), (\u00272022-01-04\u0027), (\u00272022-01-05\u0027), (\u00272022-01-06\u0027), (\u00272022-01-07\u0027);", + "sql": "SELECT MAX(gas_limit) FROM transactions WHERE asset_name \u003d \u0027ETH\u0027 AND transaction_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-01-07\u0027;", + "sql_explanation": "This query finds the maximum gas limit for transactions involving the digital asset \u0027ETH\u0027 in the past week using the \u0027transactions\u0027 and \u0027dates\u0027 tables." +}, { + "id": "3004", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all decentralized applications in the \u0027Polygon\u0027 network that were launched between 2022-01-01 and 2022-12-31.", + "sql_context": "CREATE TABLE polygon_dapps (id INT, name VARCHAR(255), network VARCHAR(255), launch_date DATE); INSERT INTO polygon_dapps (id, name, network, launch_date) VALUES (1, \u0027Dapp3\u0027, \u0027polygon\u0027, \u00272022-03-01\u0027), (2, \u0027Dapp4\u0027, \u0027polygon\u0027, \u00272021-12-31\u0027);", + "sql": "SELECT * FROM polygon_dapps WHERE network \u003d \u0027polygon\u0027 AND launch_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-12-31\u0027;", + "sql_explanation": "We create a table for Polygon decentralized applications and insert sample data. Then, we select all rows with the specified network and launch date between 2022-01-01 and 2022-12-31." +}, { + "id": "3347", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all smart contracts associated with a given address?", + "sql_context": "CREATE TABLE smart_contracts (contract_id INT, address VARCHAR(42), name VARCHAR(255));", + "sql": "SELECT contract_id, name FROM smart_contracts WHERE address \u003d \u00270x1234567890abcdef1234567890abcdef\u0027;", + "sql_explanation": "The SQL query selects the contract_id and name columns from the smart_contracts table where the address column matches the input value \u00270x1234567890abcdef1234567890abcdef\u0027." +}, { + "id": "3537", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names and regulatory frameworks for all blockchain technologies in the \u0027Americas\u0027 region?", + "sql_context": "CREATE TABLE blockchain_tech (name TEXT, region TEXT, framework TEXT); INSERT INTO blockchain_tech (name, region, framework) VALUES (\u0027Tech1\u0027, \u0027North America\u0027, \u0027Regulation1\u0027), (\u0027Tech2\u0027, \u0027South America\u0027, \u0027Regulation2\u0027);", + "sql": "SELECT name, framework FROM blockchain_tech WHERE region IN (\u0027North America\u0027, \u0027South America\u0027);", + "sql_explanation": "The SQL query selects the name and framework columns from the blockchain_tech table where the region is in (\u0027North America\u0027, \u0027South America\u0027). This will return the names and regulatory frameworks for all blockchain technologies in the \u0027Americas\u0027 region." +}, { + "id": "3715", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the maximum and minimum supply of each stablecoin?", + "sql_context": "CREATE TABLE stablecoins (id INT, name VARCHAR(255), max_supply INT, min_supply INT); INSERT INTO stablecoins (id, name, max_supply, min_supply) VALUES (1, \u0027USDT\u0027, 100000000, 100000000), (2, \u0027USDC\u0027, 50000000, 50000000), (3, \u0027DAI\u0027, 20000000, 20000000);", + "sql": "SELECT name, MAX(max_supply) AS max_supply, MIN(min_supply) AS min_supply FROM stablecoins;", + "sql_explanation": "The SQL query calculates the maximum and minimum supply of each stablecoin using the MAX and MIN aggregate functions, respectively. The query then returns the name, maximum supply, and minimum supply columns in the result set." +}, { + "id": "3747", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the smart contracts that have been executed on the Binance Smart Chain network and their respective transaction counts?", + "sql_context": "CREATE TABLE smart_contracts (contract_id INT, name VARCHAR(100), network VARCHAR(100), transaction_count INT); INSERT INTO smart_contracts (contract_id, name, network, transaction_count) VALUES (1, \u0027Contract1\u0027, \u0027Binance Smart Chain\u0027, 2000), (2, \u0027Contract2\u0027, \u0027Binance Smart Chain\u0027, 1500), (3, \u0027Contract3\u0027, \u0027Binance Smart Chain\u0027, 2500), (4, \u0027Contract4\u0027, \u0027Ethereum\u0027, 500), (5, \u0027Contract5\u0027, \u0027Polygon\u0027, 1000);", + "sql": "SELECT name, transaction_count FROM smart_contracts WHERE network \u003d \u0027Binance Smart Chain\u0027;", + "sql_explanation": "This query selects the names and transaction counts of the smart contracts that have been executed on the Binance Smart Chain network. It does so by using a WHERE clause to filter the smart_contracts table for rows where the network is \u0027Binance Smart Chain\u0027." +}, { + "id": "4225", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of Ethereum smart contracts executed by developers in the EU and India?", + "sql_context": "CREATE TABLE Smart_Contracts (Contract_ID INT, Gas_Fees DECIMAL(10,2), Developer_Location VARCHAR(50)); INSERT INTO Smart_Contracts (Contract_ID, Gas_Fees, Developer_Location) VALUES (1, 50.50, \u0027Germany\u0027), (2, 75.25, \u0027India\u0027), (3, 30.00, \u0027France\u0027);", + "sql": "SELECT COUNT(*) FROM Smart_Contracts WHERE Developer_Location IN (\u0027EU\u0027, \u0027India\u0027);", + "sql_explanation": "This query counts the number of rows in the Smart_Contracts table where the Developer_Location is either \u0027EU\u0027 or \u0027India\u0027." +}, { + "id": "4238", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the \u0027status\u0027 column of \u0027transaction_id\u0027 12345 in the \u0027crypto_transactions\u0027 table to \u0027completed\u0027", + "sql_context": "CREATE TABLE crypto_transactions (transaction_id INT, status VARCHAR(20), timestamp TIMESTAMP);", + "sql": "UPDATE crypto_transactions SET status \u003d \u0027completed\u0027 WHERE transaction_id \u003d 12345;", + "sql_explanation": "This query updates the status column of the record with transaction_id 12345 in the crypto_transactions table to \u0027completed\u0027." +}, { + "id": "4308", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many smart contracts are associated with digital assets in the Asia-Pacific region?", + "sql_context": "CREATE TABLE smart_contracts (contract_name VARCHAR(20), associated_asset VARCHAR(10), region VARCHAR(20)); INSERT INTO smart_contracts (contract_name, associated_asset, region) VALUES (\u0027Contract1\u0027, \u0027ETH\u0027, \u0027Asia-Pacific\u0027), (\u0027Contract2\u0027, \u0027BTC\u0027, \u0027North America\u0027), (\u0027Contract3\u0027, \u0027LTC\u0027, \u0027Asia-Pacific\u0027);", + "sql": "SELECT COUNT(contract_name) FROM smart_contracts WHERE region \u003d \u0027Asia-Pacific\u0027;", + "sql_explanation": "This query counts the number of smart contracts associated with digital assets in the Asia-Pacific region by selecting the count of contract_name from the smart_contracts table where the region is \u0027Asia-Pacific\u0027." +}, { + "id": "4353", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the name and description of all smart contracts created in the year 2021", + "sql_context": "CREATE TABLE smart_contracts (id INT, name VARCHAR(20), description VARCHAR(50), creation_date DATE); INSERT INTO smart_contracts (id, name, description, creation_date) VALUES (1, \u0027SmartContractA\u0027, \u0027Sample Description A\u0027, \u00272021-01-01\u0027), (2, \u0027SmartContractB\u0027, \u0027Sample Description B\u0027, \u00272022-01-01\u0027);", + "sql": "SELECT name, description FROM smart_contracts WHERE YEAR(creation_date) \u003d 2021;", + "sql_explanation": "*Step 1*: Select all records with a creation date in the year 2021 *Step 2*: Display the name and description of the selected records." +}, { + "id": "4422", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the total number of Ethereum smart contracts categorized as \u0027High Risk\u0027?", + "sql_context": "CREATE TABLE ethereum_smart_contracts (contract_address VARCHAR(42), risk_level VARCHAR(10)); INSERT INTO ethereum_smart_contracts (contract_address, risk_level) VALUES (\u00270x1234567890123456789012345678901234567890\u0027, \u0027High Risk\u0027);", + "sql": "SELECT COUNT(*) FROM ethereum_smart_contracts WHERE risk_level \u003d \u0027High Risk\u0027;", + "sql_explanation": "This SQL query counts the number of smart contracts with a risk level of \u0027High Risk\u0027 in the ethereum_smart_contracts table." +}, { + "id": "4542", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of regulatory frameworks in place, and for which countries are they applicable?", + "sql_context": "CREATE TABLE RegulatoryFrameworksByCountry (FrameworkCountry VARCHAR(50), FrameworkCount INT); INSERT INTO RegulatoryFrameworksByCountry (FrameworkCountry, FrameworkCount) VALUES (\u0027Brazil\u0027, 1), (\u0027India\u0027, 2), (\u0027China\u0027, 3); ALTER TABLE RegulatoryFrameworksByCountry ADD COLUMN FrameworkCountry VARCHAR(50);", + "sql": "SELECT FrameworkCountry, FrameworkCount FROM RegulatoryFrameworksByCountry;", + "sql_explanation": "This query returns all data from the RegulatoryFrameworksByCountry table, displaying the total number of regulatory frameworks and the countries for which they are applicable." +}, { + "id": "4634", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 3 blockchains with the most digital assets?", + "sql_context": "CREATE TABLE blockchains (id INT, name VARCHAR(255), num_assets INT); INSERT INTO blockchains (id, name, num_assets) VALUES (1, \u0027Bitcoin\u0027, 1000), (2, \u0027Ethereum\u0027, 5000), (3, \u0027Ripple\u0027, 2000), (4, \u0027Binance Smart Chain\u0027, 3000), (5, \u0027Cardano\u0027, 4000);", + "sql": "SELECT name, num_assets FROM blockchains ORDER BY num_assets DESC LIMIT 3;", + "sql_explanation": "The SQL query orders the rows in the blockchains table by the num_assets column in descending order, and then returns the top 3 rows using the LIMIT clause. The query effectively returns the top 3 blockchains with the most digital assets." +}, { + "id": "4645", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names and regulatory frameworks for all blockchain technologies in the \u0027Asia-Pacific\u0027 region?", + "sql_context": "CREATE TABLE blockchain_tech (name TEXT, region TEXT, framework TEXT); INSERT INTO blockchain_tech (name, region, framework) VALUES (\u0027Tech1\u0027, \u0027North America\u0027, \u0027Regulation1\u0027), (\u0027Tech2\u0027, \u0027Asia-Pacific\u0027, \u0027Regulation2\u0027);", + "sql": "SELECT name, framework FROM blockchain_tech WHERE region \u003d \u0027Asia-Pacific\u0027;", + "sql_explanation": "The SQL query selects the name and framework columns from the blockchain_tech table where the region is \u0027Asia-Pacific\u0027. This will return the names and regulatory frameworks for all blockchain technologies in the \u0027Asia-Pacific\u0027 region." +}, { + "id": "4879", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records from the \u0027decentralized_applications\u0027 table where the \u0027app_category\u0027 is \u0027Gaming\u0027", + "sql_context": "CREATE TABLE decentralized_applications (app_id INT PRIMARY KEY, app_name VARCHAR(100), app_category VARCHAR(50));", + "sql": "DELETE FROM decentralized_applications WHERE app_category \u003d \u0027Gaming\u0027;", + "sql_explanation": "This SQL query deletes all records from the \u0027decentralized_applications\u0027 table where the \u0027app_category\u0027 is \u0027Gaming\u0027. It uses the DELETE statement to remove the records, and specifies the conditions in the WHERE clause." +}, { + "id": "5312", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records in the \u0027crypto_regulations\u0027 table where \u0027country_name\u0027 is \u0027China\u0027", + "sql_context": "CREATE TABLE crypto_regulations (regulation_id INT, country_name VARCHAR(50), regulation_description VARCHAR(255), effective_date DATE);", + "sql": "DELETE FROM crypto_regulations WHERE country_name \u003d \u0027China\u0027;", + "sql_explanation": "This query deletes all records from the crypto_regulations table where the country_name is \u0027China\u0027." +}, { + "id": "5362", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the value of a specific transaction \u0027abc123\u0027.", + "sql_context": "CREATE TABLE transactions (hash VARCHAR(64), value INT, timestamp TIMESTAMP);", + "sql": "UPDATE transactions SET value \u003d 7000 WHERE hash \u003d \u0027abc123\u0027;", + "sql_explanation": "This query will update the \u0027value\u0027 of the specific transaction with the hash \u0027abc123\u0027 to the new value of 7000." +}, { + "id": "5417", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of non-fungible tokens (NFTs) sold on the Flow blockchain?", + "sql_context": "CREATE TABLE flow_nfts (nft_id INT, nft_sale BOOLEAN);", + "sql": "SELECT COUNT(nft_id) FROM flow_nfts WHERE nft_sale \u003d TRUE;", + "sql_explanation": "This query counts the total number of non-fungible tokens (NFTs) (COUNT(nft_id)) sold on the Flow blockchain (WHERE nft_sale \u003d TRUE)." +}, { + "id": "5480", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records in the \u0027digital_assets\u0027 table where \u0027asset_name\u0027 is \u0027Bitcoin\u0027", + "sql_context": "CREATE TABLE digital_assets (asset_id INT, asset_name VARCHAR(50), asset_symbol VARCHAR(10), asset_type VARCHAR(20), market_cap DECIMAL(20,2));", + "sql": "DELETE FROM digital_assets WHERE asset_name \u003d \u0027Bitcoin\u0027;", + "sql_explanation": "This query deletes all records from the digital_assets table where the asset_name is \u0027Bitcoin\u0027." +}, { + "id": "5515", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the regulatory status of digital asset \u0027CoinX\u0027?", + "sql_context": "CREATE TABLE digital_assets (id INT, name TEXT, status TEXT); INSERT INTO digital_assets (id, name, status) VALUES (1, \u0027CoinX\u0027, \u0027Unregulated\u0027), (2, \u0027CoinY\u0027, \u0027Regulated\u0027);", + "sql": "SELECT status FROM digital_assets WHERE name \u003d \u0027CoinX\u0027;", + "sql_explanation": "The SQL query selects the \u0027status\u0027 column from the \u0027digital_assets\u0027 table where the \u0027name\u0027 column is equal to \u0027CoinX\u0027." +}, { + "id": "5687", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the name of the smart contract associated with the ID 5?", + "sql_context": "CREATE TABLE smart_contracts (id INT, name VARCHAR(255)); INSERT INTO smart_contracts (id, name) VALUES (5, \u0027Compound\u0027);", + "sql": "SELECT name FROM smart_contracts WHERE id \u003d 5;", + "sql_explanation": "This query retrieves the name of the smart contract associated with the ID 5. It filters the smart_contracts table for the smart contract with the ID of 5 and retrieves the name." +}, { + "id": "5705", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the decentralized applications built on the EOS network?", + "sql_context": "CREATE TABLE dapps (dapp_id INT, name VARCHAR(100), network VARCHAR(100)); INSERT INTO dapps (dapp_id, name, network) VALUES (1, \u0027Dapp1\u0027, \u0027EOS\u0027), (2, \u0027Dapp2\u0027, \u0027EOS\u0027), (3, \u0027Dapp3\u0027, \u0027EOS\u0027), (4, \u0027Dapp4\u0027, \u0027EOS\u0027), (5, \u0027Dapp5\u0027, \u0027EOS\u0027);", + "sql": "SELECT name FROM dapps WHERE network \u003d \u0027EOS\u0027;", + "sql_explanation": "This query selects the names of the decentralized applications built on the EOS network. It does so by using a WHERE clause to filter the dapps table for rows where the network is \u0027EOS\u0027." +}, { + "id": "1515", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many public transportation trips were taken by people with disabilities in London in the last 6 months?", + "sql_context": "CREATE TABLE transit_trips_uk (id INT, disability TEXT, trip_date DATE, city TEXT, country TEXT); INSERT INTO transit_trips_uk (id, disability, trip_date, city, country) VALUES (1, \u0027yes\u0027, \u00272023-02-01\u0027, \u0027London\u0027, \u0027UK\u0027), (2, \u0027no\u0027, \u00272023-02-02\u0027, \u0027Manchester\u0027, \u0027UK\u0027), (3, \u0027yes\u0027, \u00272023-01-01\u0027, \u0027London\u0027, \u0027UK\u0027);", + "sql": "SELECT SUM(trip_count) FROM transit_trips_uk WHERE disability \u003d \u0027yes\u0027 AND city \u003d \u0027London\u0027 AND country \u003d \u0027UK\u0027 AND trip_date \u003e\u003d DATEADD(month, -6, GETDATE());", + "sql_explanation": "This query calculates the total number of public transportation trips taken by people with disabilities in London in the last 6 months by summing the \u0027trip_count\u0027 column where \u0027disability\u0027 is \u0027yes\u0027, \u0027city\u0027 is \u0027London\u0027, \u0027country\u0027 is \u0027UK\u0027, and \u0027trip_date\u0027 is within the last 6 months. It does this by using the DATEADD function to subtract 6 months from the current date and then filtering the \u0027trip_date\u0027 column to only include dates within this range." +}, { + "id": "1538", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify evidence-based policy making related to climate change", + "sql_context": "CREATE TABLE Policy (id INT, name VARCHAR(50), category VARCHAR(50), description TEXT); INSERT INTO Policy (id, name, category, description) VALUES (1, \u0027Renewable Energy Standard\u0027, \u0027Energy\u0027, \u0027Standard to increase renewable energy production\u0027);", + "sql": "SELECT Policy.name, Policy.category, Policy.description FROM Policy WHERE Policy.description LIKE \u0027%climate change%\u0027 OR Policy.category \u003d \u0027Climate Change\u0027;", + "sql_explanation": "This SQL query returns all policies from the Policy table where the description column contains the phrase \u0027climate change\u0027 or the category column is equal to \u0027Climate Change\u0027." +}, { + "id": "2573", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total population of the state of Texas, and what is the percentage of the population that lives in urban areas?", + "sql_context": "CREATE TABLE StatePopulation (State VARCHAR(100), Population INT, UrbanPopulation INT); INSERT INTO StatePopulation (State, Population, UrbanPopulation) VALUES (\u0027Texas\u0027, 29528404, 15675394);", + "sql": "SELECT (UrbanPopulation / Population) * 100.0 AS UrbanPercentage, Population FROM StatePopulation WHERE State \u003d \u0027Texas\u0027;", + "sql_explanation": "This SQL query calculates the percentage of the population living in urban areas in Texas. It divides the UrbanPopulation by the Population column and multiplies the result by 100.0 to get a percentage." +}, { + "id": "3500", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum number of public parks in the western region that were established before 2010?", + "sql_context": "CREATE TABLE public_parks (id INT, region VARCHAR(255), establishment_date DATE); INSERT INTO public_parks (id, region, establishment_date) VALUES (1, \u0027Western\u0027, \u00272005-05-15\u0027), (2, \u0027Eastern\u0027, \u00272012-08-18\u0027), (3, \u0027Western\u0027, \u00272008-09-09\u0027);", + "sql": "SELECT MIN(id) FROM public_parks WHERE region \u003d \u0027Western\u0027 AND establishment_date \u003c \u00272010-01-01\u0027;", + "sql_explanation": "1. Filter rows where region is \u0027Western\u0027 and establishment_date is before 2010. 2. Find the minimum value of the \u0027id\u0027 column." +}, { + "id": "3604", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of funding allocated to minority-owned businesses in the Technology sector?", + "sql_context": "CREATE TABLE businesses (id INT, name TEXT, industry TEXT, ownership TEXT, funding FLOAT); INSERT INTO businesses (id, name, industry, ownership, funding) VALUES (1, \u0027TechCo\u0027, \u0027Technology\u0027, \u0027Minority\u0027, 500000.00); INSERT INTO businesses (id, name, industry, ownership, funding) VALUES (2, \u0027FinCo\u0027, \u0027Finance\u0027, \u0027Majority\u0027, 1000000.00);", + "sql": "SELECT SUM(funding) FROM businesses WHERE industry \u003d \u0027Technology\u0027 AND ownership \u003d \u0027Minority\u0027;", + "sql_explanation": "We first filter the results to only include records where the industry column is \u0027Technology\u0027 and the ownership column is \u0027Minority\u0027. Then, we calculate the total amount of funding using the SUM function." +}, { + "id": "3880", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many public schools are there in California, and what is the average enrollment per school?", + "sql_context": "CREATE TABLE public_schools (name VARCHAR(255), state VARCHAR(255), enrollment INT); INSERT INTO public_schools (name, state, enrollment) VALUES (\u0027Los Angeles High School\u0027, \u0027California\u0027, 3150), (\u0027San Francisco High School\u0027, \u0027California\u0027, 2500), (\u0027Oakland High School\u0027, \u0027California\u0027, 2200);", + "sql": "SELECT AVG(enrollment) AS avg_enrollment FROM public_schools WHERE state \u003d \u0027California\u0027;", + "sql_explanation": "The SQL query calculates the average enrollment per public school in California by finding the average enrollment value of all records in the public_schools table where the state is California." +}, { + "id": "4063", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total number of public meetings held in the state of New York in 2021?", + "sql_context": "CREATE TABLE meetings (id INT PRIMARY KEY, state VARCHAR(20), year INT, num_participants INT); INSERT INTO meetings (id, state, year, num_participants) VALUES (1, \u0027New York\u0027, 2021, 35); INSERT INTO meetings (id, state, year, num_participants) VALUES (2, \u0027New York\u0027, 2021, 45);", + "sql": "SELECT SUM(num_participants) FROM meetings WHERE state \u003d \u0027New York\u0027 AND year \u003d 2021;", + "sql_explanation": "The SQL query calculates the sum of the num_participants column for rows where the state is \u0027New York\u0027 and year is 2021." +}, { + "id": "4121", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many criminal complaints were filed in California in the year 2020?", + "sql_context": "CREATE TABLE crimes (id INT, state TEXT, year INT, complaint_count INT); INSERT INTO crimes (id, state, year, complaint_count) VALUES (1, \u0027California\u0027, 2018, 1000), (2, \u0027Texas\u0027, 2019, 1500), (3, \u0027California\u0027, 2020, 1200), (4, \u0027New York\u0027, 2021, 1800);", + "sql": "SELECT SUM(complaint_count) FROM crimes WHERE state \u003d \u0027California\u0027 AND year \u003d 2020;", + "sql_explanation": "This query calculates the total number of criminal complaints filed in California in the year 2020 by filtering the crimes table for rows where state is \u0027California\u0027 and year is 2020, and then summing the complaint_count column using the SUM function." +}, { + "id": "4230", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many public meetings were held in the last 30 days?", + "sql_context": "CREATE TABLE public_meetings (date DATE, location VARCHAR(255), topic VARCHAR(255));", + "sql": "SELECT COUNT(*) FROM public_meetings WHERE date \u003e\u003d DATE(NOW()) - INTERVAL 30 DAY;", + "sql_explanation": "The SQL query uses the DATE() and NOW() functions and the INTERVAL keyword to return the count of public meetings held in the last 30 days from the \u0027public_meetings\u0027 table." +}, { + "id": "4316", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names and IDs of all cities in the state of \u0027California\u0027 with a population greater than 500,000?", + "sql_context": "CREATE TABLE cities (id INT, name VARCHAR(255), state VARCHAR(255), population INT); INSERT INTO cities (id, name, state, population) VALUES (1, \u0027Los Angeles\u0027, \u0027California\u0027, 4000000), (2, \u0027San Diego\u0027, \u0027California\u0027, 1400000), (3, \u0027San Jose\u0027, \u0027California\u0027, 1000000);", + "sql": "SELECT name, id FROM cities WHERE state \u003d \u0027California\u0027 AND population \u003e 500000;", + "sql_explanation": "This query selects the name and id columns from the cities table where the state is \u0027California\u0027 and the population is greater than 500,000." +}, { + "id": "4358", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record in the \u0027cities\u0027 table with name \u0027Yountville\u0027, state \u0027CA\u0027, and population 3000", + "sql_context": "CREATE TABLE cities (id INT, name VARCHAR(50), state VARCHAR(2), population INT);", + "sql": "INSERT INTO cities (name, state, population) VALUES (\u0027Yountville\u0027, \u0027CA\u0027, 3000);", + "sql_explanation": "This query inserts a new record into the \u0027cities\u0027 table, with the name \u0027Yountville\u0027, state \u0027CA\u0027, and population 3000." +}, { + "id": "4384", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of registered voters in Texas and California?", + "sql_context": "CREATE TABLE VoterRegistration (State VARCHAR(2), NumberOfVoters INT); INSERT INTO VoterRegistration (State, NumberOfVoters) VALUES (\u0027TX\u0027, 16007617), (\u0027CA\u0027, 22034627);", + "sql": "SELECT SUM(NumberOfVoters) FROM VoterRegistration WHERE State IN (\u0027TX\u0027, \u0027CA\u0027);", + "sql_explanation": "This query calculates the total number of registered voters in Texas and California by summing the NumberOfVoters field in the VoterRegistration table for the rows with a State of \u0027TX\u0027 or \u0027CA\u0027." +}, { + "id": "4457", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the name of the government program in the state of California with the ID of 3 to \u0027Program X\u0027.", + "sql_context": "CREATE TABLE programs (id INT, name VARCHAR(255), budget FLOAT, state VARCHAR(255)); INSERT INTO programs (id, name, budget, state) VALUES (1, \u0027Program A\u0027, 2.5, \u0027Florida\u0027), (2, \u0027Program B\u0027, 1.2, \u0027Florida\u0027), (3, \u0027Program C\u0027, 1.8, \u0027California\u0027);", + "sql": "UPDATE programs SET name \u003d \u0027Program X\u0027 WHERE id \u003d 3 AND state \u003d \u0027California\u0027;", + "sql_explanation": "This query updates the name of the government program in California with the ID of 3 to \u0027Program X\u0027 by filtering the programs table based on the id and state criteria and then setting the name column to the desired value." +}, { + "id": "4493", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum population of cities in North America?", + "sql_context": "CREATE TABLE city_population (city VARCHAR(50), country VARCHAR(50), population INT); INSERT INTO city_population (city, country, population) VALUES (\u0027New York\u0027, \u0027United States\u0027, 8550405), (\u0027Los Angeles\u0027, \u0027United States\u0027, 3971883), (\u0027Chicago\u0027, \u0027United States\u0027, 2705275), (\u0027Houston\u0027, \u0027United States\u0027, 2325502), (\u0027Phoenix\u0027, \u0027United States\u0027, 1660272), (\u0027Philadelphia\u0027, \u0027United States\u0027, 1585577), (\u0027San Antonio\u0027, \u0027United States\u0027, 1547253), (\u0027San Diego\u0027, \u0027United States\u0027, 1425876);", + "sql": "SELECT MAX(population) FROM city_population WHERE country \u003d \u0027United States\u0027;", + "sql_explanation": "Determine the maximum population of cities in North America by selecting the maximum population from the city_population table where the country is United States." +}, { + "id": "4517", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all evidence-based policy making initiatives in the Middle East and North Africa region from the ebp_initiatives table.", + "sql_context": "CREATE TABLE ebp_initiatives (id INT, name VARCHAR(255), region VARCHAR(255)); INSERT INTO ebp_initiatives (id, name, region) VALUES (1, \u0027Initiative 1\u0027, \u0027Middle East\u0027), (2, \u0027Initiative 2\u0027, \u0027North Africa\u0027), (3, \u0027Initiative 3\u0027, \u0027Europe\u0027);", + "sql": "DELETE FROM ebp_initiatives WHERE region IN (\u0027Middle East\u0027, \u0027North Africa\u0027);", + "sql_explanation": "This query deletes records from the ebp_initiatives table where the region is either \u0027Middle East\u0027 or \u0027North Africa\u0027." +}, { + "id": "4587", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total number of data sets related to economic development in \u0027island\u0027 schema where is_open is true.", + "sql_context": "CREATE SCHEMA island; CREATE TABLE island.economic_development_data (id INT, name VARCHAR(255), is_open BOOLEAN); INSERT INTO island.economic_development_data (id, name, is_open) VALUES (1, \u0027investment\u0027, true), (2, \u0027growth\u0027, true), (3, \u0027jobs\u0027, true);", + "sql": "SELECT COUNT(*) FROM island.economic_development_data WHERE is_open \u003d true;", + "sql_explanation": "This SQL query creates a schema \u0027island\u0027 and corresponding \u0027economic_development_data\u0027 table. It then inserts some sample records in the table, with is_open column set to true for open data sets. The query then counts the total number of records in the table where is_open is true." +}, { + "id": "4617", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of crimes committed in the city of Chicago in the year 2020?", + "sql_context": "CREATE TABLE crimes (id INT, city TEXT, year INT, crime_type TEXT, num_crimes INT); INSERT INTO crimes (id, city, year, crime_type, num_crimes) VALUES (1, \u0027Chicago\u0027, 2020, \u0027Theft\u0027, 5000), (2, \u0027Chicago\u0027, 2020, \u0027Burglary\u0027, 3000), (3, \u0027Chicago\u0027, 2019, \u0027Theft\u0027, 4500);", + "sql": "SELECT SUM(num_crimes) FROM crimes WHERE city \u003d \u0027Chicago\u0027 AND year \u003d 2020;", + "sql_explanation": "The SQL query calculates the total number of crimes committed in the city of Chicago in the year 2020. It uses the WHERE clause to filter crimes by city and year and the SUM function to calculate the total number of crimes." +}, { + "id": "4767", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names of all the hospitals and the number of beds available in each one in the state of New York?", + "sql_context": "CREATE TABLE hospitals (id INT, hospital_name VARCHAR(255), state VARCHAR(255), num_beds INT);", + "sql": "SELECT hospital_name, num_beds FROM hospitals WHERE state \u003d \u0027New York\u0027;", + "sql_explanation": "We filter the hospitals table to only include records where the state is \u0027New York\u0027. Then, we select the hospital_name and num_beds columns to get the desired result." +}, { + "id": "4789", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many people in New York voted in the last election?", + "sql_context": "CREATE TABLE voters (name TEXT, state TEXT, voted INTEGER); INSERT INTO voters (name, state, voted) VALUES (\u0027Person1\u0027, \u0027New York\u0027, 1), (\u0027Person2\u0027, \u0027New York\u0027, 0), (\u0027Person3\u0027, \u0027New York\u0027, 1), (\u0027Person4\u0027, \u0027New York\u0027, 1), (\u0027Person5\u0027, \u0027New York\u0027, 0);", + "sql": "SELECT SUM(voted) as total_voters FROM voters WHERE state \u003d \u0027New York\u0027;", + "sql_explanation": "This SQL query calculates the total number of people in New York who voted in the last election. It does so by summing the voted column in the voters table where the state is \u0027New York\u0027. The SUM aggregate function is used to add up the values in the voted column." +}, { + "id": "5078", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average budget of all transportation projects in the year 2020?", + "sql_context": "CREATE TABLE TransportationProjects (ProjectID INT, Name VARCHAR(100), Budget DECIMAL(10,2), Year INT); INSERT INTO TransportationProjects (ProjectID, Name, Budget, Year) VALUES (1, \u0027Road Expansion\u0027, 5000000, 2020), (2, \u0027Bridge Construction\u0027, 8000000, 2019), (3, \u0027Traffic Light Installation\u0027, 200000, 2020);", + "sql": "SELECT AVG(Budget) FROM TransportationProjects WHERE Year \u003d 2020;", + "sql_explanation": "This SQL query calculates the average budget of all transportation projects in the year 2020. It does this by selecting the average (AVG) of the Budget column from the TransportationProjects table where the Year column is equal to 2020." +}, { + "id": "5119", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum age of residents who have not participated in civic engagement activities?", + "sql_context": "CREATE TABLE resident (id INT PRIMARY KEY, name TEXT, age INT, city_id INT, gender TEXT, civic_participation BOOLEAN, FOREIGN KEY (city_id) REFERENCES city(id));", + "sql": "SELECT MIN(age) FROM resident WHERE civic_participation \u003d FALSE;", + "sql_explanation": "This query calculates the minimum age of residents who have not participated in civic engagement activities by filtering the residents table for rows with a value of FALSE in the civic_participation column and then calculating the minimum age using the MIN function." +}, { + "id": "5124", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average income in the \"East\" and \"West\" districts?", + "sql_context": "CREATE TABLE district (name VARCHAR(20), income FLOAT); INSERT INTO district (name, income) VALUES (\u0027North\u0027, 45000.0), (\u0027East\u0027, 50000.0), (\u0027West\u0027, 40000.0), (\u0027South\u0027, 55000.0), (\u0027East\u0027, 53000.0), (\u0027West\u0027, 42000.0);", + "sql": "SELECT AVG(income) FROM district WHERE name IN (\u0027East\u0027, \u0027West\u0027);", + "sql_explanation": "The SQL query calculates the average income in the \"East\" and \"West\" districts by using the AVG function and the IN operator." +}, { + "id": "5245", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all departments and their associated data types.", + "sql_context": "CREATE TABLE department_data (dept_name TEXT, column_name TEXT, data_type TEXT); INSERT INTO department_data (dept_name, column_name, data_type) VALUES (\u0027Human Services Department\u0027, \u0027age\u0027, \u0027INTEGER\u0027), (\u0027Human Services Department\u0027, \u0027gender\u0027, \u0027TEXT\u0027), (\u0027Human Services Department\u0027, \u0027income\u0027, \u0027FLOAT\u0027), (\u0027Education Department\u0027, \u0027school_name\u0027, \u0027TEXT\u0027), (\u0027Education Department\u0027, \u0027student_count\u0027, \u0027INTEGER\u0027);", + "sql": "SELECT dept_name, column_name, data_type FROM department_data;", + "sql_explanation": "This SQL query retrieves all columns from the department_data table, listing all departments and their associated data types." +}, { + "id": "5255", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of public meetings held in \u0027government_meetings\u0027 table, excluding meetings with a duration less than 1 hour?", + "sql_context": "CREATE TABLE government_meetings (meeting_id INT, duration INT);", + "sql": "SELECT COUNT(*) FROM government_meetings WHERE duration \u003e\u003d 60;", + "sql_explanation": "This query calculates the number of public meetings with a duration of 1 hour or more by filtering the \u0027government_meetings\u0027 table based on the \u0027duration\u0027 condition and counting the number of records." +}, { + "id": "1870", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert records into Health Equity Metrics table", + "sql_context": "CREATE TABLE HealthEquityMetrics (ID INT PRIMARY KEY, MetricName VARCHAR(100), Score INT);", + "sql": "INSERT INTO HealthEquityMetrics (ID, MetricName, Score) VALUES (1, \u0027Accessibility\u0027, 80), (2, \u0027QualityOfCare\u0027, 90), (3, \u0027LanguageServices\u0027, 85);", + "sql_explanation": "1. We are inserting 3 records into the \u0027HealthEquityMetrics\u0027 table. 2. Each record has an \u0027ID\u0027, \u0027MetricName\u0027, and \u0027Score\u0027. 3. For example, the first record has \u0027ID\u0027 as 1, \u0027MetricName\u0027 as \u0027Accessibility\u0027, and \u0027Score\u0027 as 80." +}, { + "id": "1913", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the percentage of patients who identify as American Indian or Alaska Native, from the total number of patients, rounded to two decimal places.", + "sql_context": "CREATE TABLE PatientDemographics (PatientID INT, Race TEXT); INSERT INTO PatientDemographics (PatientID, Race) VALUES (1, \u0027American Indian or Alaska Native\u0027); INSERT INTO PatientDemographics (PatientID, Race) VALUES (2, \u0027Asian\u0027); INSERT INTO PatientDemographics (PatientID, Race) VALUES (3, \u0027American Indian or Alaska Native\u0027);", + "sql": "SELECT ROUND(COUNT(*) FILTER (WHERE Race \u003d \u0027American Indian or Alaska Native\u0027) * 100.0 / COUNT(*), 2) as Percentage FROM PatientDemographics;", + "sql_explanation": "The SQL query calculates the percentage of patients who identify as American Indian or Alaska Native, from the total number of patients. It uses the COUNT() function with a FILTER clause to count the number of patients who identify as American Indian or Alaska Native. The result is divided by the total number of patients and multiplied by 100 to get the percentage. The result is rounded to two decimal places." +}, { + "id": "2100", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Link parity laws to regions affected", + "sql_context": "CREATE TABLE MentalHealthParity (LawID INT PRIMARY KEY, LawName TEXT, LawDescription TEXT, PassedDate DATE, AffectedRegion TEXT); INSERT INTO MentalHealthParity (LawID, LawName, LawDescription, PassedDate, AffectedRegion) VALUES (1, \u0027Parity Act 2020\u0027, \u0027Requires insurers to cover mental health at the same level as physical health\u0027, \u00272020-01-01\u0027, \u0027Northeast\u0027);", + "sql": "ALTER TABLE MentalHealthParity ADD CONSTRAINT FK_MentalHealthParity_Region FOREIGN KEY (AffectedRegion) REFERENCES Region(RegionName);", + "sql_explanation": "Add a foreign key constraint to \u0027MentalHealthParity\u0027 table\u0027s AffectedRegion column, referencing the \u0027Region\u0027 table\u0027s RegionName column." +}, { + "id": "2188", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of providers who have served more than 100 patients in the \u0027providers\u0027 table, ordered by the number of patients served in descending order.", + "sql_context": "CREATE TABLE providers (provider_id INT PRIMARY KEY AUTO_INCREMENT, first_name VARCHAR(50), last_name VARCHAR(50), gender VARCHAR(10), ethnicity VARCHAR(50), state VARCHAR(20), patients_served INT);", + "sql": "SELECT provider_id, first_name, last_name, patients_served FROM providers WHERE patients_served \u003e 100 ORDER BY patients_served DESC;", + "sql_explanation": "This query calculates the number of providers who have served more than 100 patients in the \u0027providers\u0027 table and orders them by the number of patients served in descending order." +}, { + "id": "2360", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the record of cultural competency training for the employee with id 1001", + "sql_context": "CREATE TABLE employee_trainings (employee_id INT, training_type VARCHAR(255), completed_date DATE); INSERT INTO employee_trainings (employee_id, training_type, completed_date) VALUES (1001, \u0027Cultural Competency\u0027, \u00272022-01-15\u0027), (1002, \u0027Cultural Competency\u0027, \u00272021-12-12\u0027), (1003, \u0027Cultural Competency\u0027, \u00272022-02-20\u0027);", + "sql": "UPDATE employee_trainings SET completed_date \u003d \u00272022-05-10\u0027 WHERE employee_id \u003d 1001 AND training_type \u003d \u0027Cultural Competency\u0027;", + "sql_explanation": "The SQL query updates the completed_date of the Cultural Competency training for the employee with id 1001 to \u00272022-05-10\u0027. It uses the UPDATE keyword followed by the table name, the columns to update, the new value, and a WHERE clause to filter the record." +}, { + "id": "2713", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which community health workers are from underrepresented communities and have not received cultural competency training?", + "sql_context": "CREATE TABLE CommunityHealthWorkers (WorkerID INT, Age INT, Ethnicity VARCHAR(20), CulturalCompetencyTraining DATE, UnderrepresentedCommunity VARCHAR(20)); INSERT INTO CommunityHealthWorkers (WorkerID, Age, Ethnicity, CulturalCompetencyTraining, UnderrepresentedCommunity) VALUES (1, 35, \u0027Hispanic\u0027, NULL, \u0027Yes\u0027); INSERT INTO CommunityHealthWorkers (WorkerID, Age, Ethnicity, CulturalCompetencyTraining, UnderrepresentedCommunity) VALUES (2, 40, \u0027African American\u0027, \u00272022-02-01\u0027, \u0027Yes\u0027); INSERT INTO CommunityHealthWorkers (WorkerID, Age, Ethnicity, CulturalCompetencyTraining, UnderrepresentedCommunity) VALUES (3, 45, \u0027Caucasian\u0027, \u00272022-01-01\u0027, \u0027No\u0027);", + "sql": "SELECT * FROM CommunityHealthWorkers WHERE CulturalCompetencyTraining IS NULL AND UnderrepresentedCommunity \u003d \u0027Yes\u0027;", + "sql_explanation": "The SQL query lists all community health workers from underrepresented communities who have not received cultural competency training by selecting all columns from the CommunityHealthWorkers table and filtering for workers with a NULL CulturalCompetencyTraining date and \u0027Yes\u0027 value in the UnderrepresentedCommunity column." +}, { + "id": "3056", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of community health workers who have been hired in the last year?", + "sql_context": "CREATE TABLE community_health_workers (id INT PRIMARY KEY, name TEXT, hired_date DATE, language TEXT, cultural_competency_score INT);", + "sql": "SELECT COUNT(*) FROM community_health_workers WHERE hired_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR);", + "sql_explanation": "This query counts the number of community health workers who have been hired in the last year. It does so by selecting all records from the community_health_workers table where hired_date is within the last year. The query then counts the number of records returned, which corresponds to the total number of community health workers who have been hired in the last year." +}, { + "id": "3074", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum age of community health workers who identify as LGBTQ+ and are from the Southeast region?", + "sql_context": "CREATE TABLE CommunityHealthWorkers (Id INT, Age INT, Gender VARCHAR(10), Ethnicity VARCHAR(20), Region VARCHAR(20)); INSERT INTO CommunityHealthWorkers (Id, Age, Gender, Ethnicity, Region) VALUES (1, 45, \u0027Female\u0027, \u0027Hispanic\u0027, \u0027Northeast\u0027), (2, 35, \u0027Male\u0027, \u0027LGBTQ+\u0027, \u0027Southwest\u0027), (3, 50, \u0027Non-binary\u0027, \u0027African American\u0027, \u0027Northeast\u0027), (4, 40, \u0027Transgender\u0027, \u0027LGBTQ+\u0027, \u0027Northeast\u0027), (5, 60, \u0027Male\u0027, \u0027Native American\u0027, \u0027Northwest\u0027), (6, 30, \u0027Female\u0027, \u0027Native American\u0027, \u0027Northwest\u0027), (7, 70, \u0027Female\u0027, \u0027LGBTQ+\u0027, \u0027Southeast\u0027);", + "sql": "SELECT MIN(Age) as MinAge FROM CommunityHealthWorkers WHERE Ethnicity \u003d \u0027LGBTQ+\u0027 AND Region \u003d \u0027Southeast\u0027;", + "sql_explanation": "This query calculates the minimum age of community health workers who identify as LGBTQ+ and are from the Southeast region. It selects the rows where the Ethnicity column is equal to \u0027LGBTQ+\u0027 and the Region column is equal to \u0027Southeast\u0027 and then calculates the minimum age within that group." +}, { + "id": "3380", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the \u0027health_equity_metrics\u0027 table with the following information: ID 101, Metric_Name \u0027Racial Disparities\u0027, Score 80", + "sql_context": "CREATE TABLE health_equity_metrics (id INT, metric_name VARCHAR(255), score INT);", + "sql": "INSERT INTO health_equity_metrics (id, metric_name, score) VALUES (101, \u0027Racial Disparities\u0027, 80);", + "sql_explanation": "* This query inserts a new record into the \u0027health_equity_metrics\u0027 table with the specified values for ID, Metric_Name, and Score." +}, { + "id": "3393", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a table for health equity metrics by race", + "sql_context": "CREATE TABLE health_equity_race (id INT PRIMARY KEY, state VARCHAR(2), year INT, race VARCHAR(20), disparity_rate FLOAT);", + "sql": "CREATE TABLE if not exists health_equity_race_new AS SELECT * FROM health_equity_race WHERE FALSE;", + "sql_explanation": "A new table \u0027health_equity_race_new\u0027 is created which is a copy of \u0027health_equity_race\u0027 using the \u0027CREATE TABLE AS SELECT * FROM table_name WHERE FALSE\u0027 pattern." +}, { + "id": "3694", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average cultural competency score for healthcare providers serving the LGBTQ+ community?", + "sql_context": "CREATE TABLE healthcare_providers (id INT, name TEXT, cultural_competency_score INT, community TEXT); INSERT INTO healthcare_providers (id, name, cultural_competency_score, community) VALUES (1, \u0027Dr. Jane Smith\u0027, 95, \u0027LGBTQ+\u0027); INSERT INTO healthcare_providers (id, name, cultural_competency_score, community) VALUES (2, \u0027Dr. Maria Garcia\u0027, 88, \u0027Latinx\u0027); INSERT INTO healthcare_providers (id, name, cultural_competency_score, community) VALUES (3, \u0027Dr. David Kim\u0027, 92, \u0027LGBTQ+\u0027);", + "sql": "SELECT AVG(cultural_competency_score) FROM healthcare_providers WHERE community \u003d \u0027LGBTQ+\u0027;", + "sql_explanation": "This SQL query calculates the average cultural competency score for healthcare providers serving the LGBTQ+ community by filtering the healthcare_providers table based on community, then calculating the average cultural_competency_score using the AVG function." +}, { + "id": "3758", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all mental health parity violations that occurred in the last 6 months.", + "sql_context": "CREATE TABLE mental_health_parity (violation_id INT, date DATE, location VARCHAR(50), description VARCHAR(100)); INSERT INTO mental_health_parity (violation_id, date, location, description) VALUES (1, \u00272023-01-01\u0027, \u0027NY\u0027, \u0027Description 1\u0027), (2, \u00272023-02-01\u0027, \u0027CA\u0027, \u0027Description 2\u0027), (3, \u00272023-03-01\u0027, \u0027TX\u0027, \u0027Description 3\u0027);", + "sql": "SELECT * FROM mental_health_parity WHERE date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 6 MONTH);", + "sql_explanation": "This query selects all columns from the \u0027mental_health_parity\u0027 table where the \u0027date\u0027 column is within the last 6 months. The query uses the DATE_SUB() function to subtract 6 months from the current date and the \u0027\u003e\u0027 operator to filter the results." +}, { + "id": "3897", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the highest mental health score for patients who identify as Caucasian or Asian?", + "sql_context": "CREATE TABLE patient (id INT, name TEXT, mental_health_score INT, community TEXT); INSERT INTO patient (id, name, mental_health_score, community) VALUES (1, \u0027John Doe\u0027, 60, \u0027Straight\u0027), (2, \u0027Jane Smith\u0027, 70, \u0027LGBTQ+\u0027), (3, \u0027Ana Garcia\u0027, 50, \u0027Latino\u0027), (4, \u0027Sara Johnson\u0027, 85, \u0027African American\u0027), (5, \u0027Hiroshi Tanaka\u0027, 90, \u0027Asian\u0027), (6, \u0027Peter Brown\u0027, 80, \u0027Caucasian\u0027);", + "sql": "SELECT MAX(mental_health_score) FROM patient WHERE community IN (\u0027Caucasian\u0027, \u0027Asian\u0027);", + "sql_explanation": "This query finds the highest mental health score for patients who identify as either Caucasian or Asian." +}, { + "id": "3998", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all community health workers who have a cultural competency score greater than or equal to 90", + "sql_context": "CREATE TABLE healthcare.CommunityHealthWorker( worker_id INT PRIMARY KEY, name VARCHAR(100), cultural_competency_score FLOAT); INSERT INTO healthcare.CommunityHealthWorker (worker_id, name, cultural_competency_score) VALUES (1, \u0027Jane Smith\u0027, 85.5), (2, \u0027Maria Garcia\u0027, 92.3), (3, \u0027David Kim\u0027, 88.7), (4, \u0027Fatima Patel\u0027, 93.1);", + "sql": "SELECT * FROM healthcare.CommunityHealthWorker WHERE cultural_competency_score \u003e\u003d 90;", + "sql_explanation": "This SQL statement retrieves all records from the \u0027CommunityHealthWorker\u0027 table in the \u0027healthcare\u0027 schema where the \u0027cultural_competency_score\u0027 is greater than or equal to 90." +}, { + "id": "4833", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records from the table \u0027health_equity_metrics\u0027 where the metric type is \u0027accessibility\u0027.", + "sql_context": "CREATE TABLE health_equity_metrics (id INT, metric_type VARCHAR(50), value INT); INSERT INTO health_equity_metrics (id, metric_type, value) VALUES (1, \u0027accessibility\u0027, 70), (2, \u0027quality\u0027, 85), (3, \u0027cost\u0027, 65);", + "sql": "DELETE FROM health_equity_metrics WHERE metric_type \u003d \u0027accessibility\u0027;", + "sql_explanation": "This query deletes all records from the health_equity_metrics table where the metric type is \u0027accessibility\u0027 by using the DELETE statement and filtering the records based on the metric_type column." +}, { + "id": "4886", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total cultural competency score for health workers in California?", + "sql_context": "CREATE TABLE CulturalCompetency (WorkerID INT, WorkerName VARCHAR(100), State VARCHAR(2), Score INT); INSERT INTO CulturalCompetency (WorkerID, WorkerName, State, Score) VALUES (1, \u0027Michael Johnson\u0027, \u0027California\u0027, 85);", + "sql": "SELECT SUM(Score) FROM CulturalCompetency WHERE State \u003d \u0027California\u0027;", + "sql_explanation": "Sum the Score column in the CulturalCompetency table for California workers." +}, { + "id": "4916", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all states that have mental health parity laws", + "sql_context": "CREATE TABLE mental_health_parity (id INT PRIMARY KEY, state VARCHAR(2), parity_law TEXT, year INT);", + "sql": "SELECT state FROM mental_health_parity WHERE parity_law IS NOT NULL;", + "sql_explanation": "1. The \u0027state\u0027 column is being selected from the \u0027mental_health_parity\u0027 table. 2. Only rows with a non-null \u0027parity_law\u0027 value are returned." +}, { + "id": "5380", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of patients served by community health workers?", + "sql_context": "CREATE TABLE worker_patient_data_total (worker_id INT, patients_served INT); INSERT INTO worker_patient_data_total (worker_id, patients_served) VALUES (1, 50); INSERT INTO worker_patient_data_total (worker_id, patients_served) VALUES (2, 75);", + "sql": "SELECT SUM(patients_served) FROM worker_patient_data_total;", + "sql_explanation": "The SQL query calculates the total number of patients served by community health workers by using the SUM function." +}, { + "id": "5773", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete the record for the provider with ID 12345 from the \u0027providers\u0027 table", + "sql_context": "CREATE TABLE providers (id INT PRIMARY KEY, name VARCHAR(100), city VARCHAR(50), specialty VARCHAR(50));", + "sql": "DELETE FROM providers WHERE id \u003d 12345;", + "sql_explanation": "* Deletes the record with an ID of 12345 from the \u0027providers\u0027 table." +}, { + "id": "5823", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "Select all data from the view for health equity metrics", + "sql_context": "CREATE OR REPLACE VIEW health_equity_view AS SELECT * FROM health_equity;", + "sql": "SELECT * FROM health_equity_view;", + "sql_explanation": "All records from the \u0027health_equity_view\u0027 view are selected using the \u0027SELECT * FROM view_name\u0027 pattern." +}, { + "id": "707", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total word count for articles related to a specific topic, in the last year?", + "sql_context": "CREATE TABLE articles (article_id INT, article_title VARCHAR(100), article_text TEXT, article_date DATE, topic VARCHAR(50)); INSERT INTO articles VALUES (1, \u0027Article 1\u0027, \u0027Climate change is...\u0027, \u00272022-01-01\u0027, \u0027climate change\u0027), (2, \u0027Article 2\u0027, \u0027Global warming is...\u0027, \u00272022-02-15\u0027, \u0027climate change\u0027), (3, \u0027Article 3\u0027, \u0027The environment is...\u0027, \u00272021-12-31\u0027, \u0027environment\u0027); CREATE TABLE topics (topic VARCHAR(50)); INSERT INTO topics VALUES (\u0027topic1\u0027), (\u0027topic2\u0027), (\u0027climate change\u0027);", + "sql": "SELECT SUM(LENGTH(article_text) - LENGTH(REPLACE(article_text, \u0027 \u0027, \u0027\u0027)) + 1) as total_word_count FROM articles WHERE topic \u003d \u0027climate change\u0027 AND article_date \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 1 YEAR);", + "sql_explanation": "The SQL query calculates the total word count for articles related to a specific topic (in this case, climate change) and published in the last year. It does this by finding the length of each article\u0027s text, subtracting the length of the text with spaces removed, and adding 1 for each word. It then sums this value for all articles related to the specified topic and published in the last year." +}, { + "id": "992", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the \"media_ethics\" table with \"principle\" as \"Accountability\", \"description\" as \"Journalists should be accountable for their actions\", and \"example\" as \"Apologizing for incorrect information\"", + "sql_context": "CREATE TABLE media_ethics (id INT PRIMARY KEY, principle VARCHAR(255), description TEXT, example TEXT);", + "sql": "INSERT INTO media_ethics (principle, description, example) VALUES (\u0027Accountability\u0027, \u0027Journalists should be accountable for their actions\u0027, \u0027Apologizing for incorrect information\u0027);", + "sql_explanation": "This query inserts a new record into the \"media_ethics\" table with the \"principle\" as \"Accountability\", \"description\" as \"Journalists should be accountable for their actions\", and \"example\" as \"Apologizing for incorrect information\"." +}, { + "id": "1175", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of users who liked articles about climate change in the past month?", + "sql_context": "CREATE TABLE articles (id INT, title TEXT, category TEXT, likes INT, created_at DATETIME); INSERT INTO articles (id, title, category, likes, created_at) VALUES (1, \u0027Climate crisis: 12 years to save the planet\u0027, \u0027climate change\u0027, 100, \u00272022-01-01 10:30:00\u0027);", + "sql": "SELECT AVG(DATEDIFF(\u0027day\u0027, created_at, NOW())) as avg_age FROM articles WHERE category \u003d \u0027climate change\u0027 AND likes \u003e 50 AND created_at \u003e\u003d DATE_SUB(NOW(), INTERVAL 1 MONTH)", + "sql_explanation": "Calculate the average age of users who liked articles about climate change in the past month. First, find the difference between the created_at date and the current date (NOW()) for each article, then average these differences. Only consider articles with \u0027climate change\u0027 category, more than 50 likes and created in the past month." +}, { + "id": "2817", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average budget for investigative journalism projects in \u0027Europe\u0027?", + "sql_context": "CREATE TABLE projects (id INT, name TEXT, type TEXT, budget INT, location TEXT); INSERT INTO projects VALUES (1, \u0027Project A\u0027, \u0027investigative\u0027, 5000, \u0027Europe\u0027); INSERT INTO projects VALUES (2, \u0027Project B\u0027, \u0027regular\u0027, 3000, \u0027Europe\u0027);", + "sql": "SELECT AVG(projects.budget) FROM projects WHERE projects.type \u003d \u0027investigative\u0027 AND projects.location \u003d \u0027Europe\u0027;", + "sql_explanation": "We are calculating the average budget for investigative journalism projects in \u0027Europe\u0027. We filter for investigative projects and \u0027Europe\u0027 location and finally calculate the average budget." +}, { + "id": "2820", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many news articles have been published about climate change in the past year?", + "sql_context": "CREATE TABLE articles (id INT, title VARCHAR(100), publication_date DATE, topic VARCHAR(50));", + "sql": "SELECT COUNT(*) FROM articles WHERE topic \u003d \u0027climate change\u0027 AND publication_date \u003e\u003d CURDATE() - INTERVAL 1 YEAR;", + "sql_explanation": "This query counts the number of news articles published about climate change in the past year by filtering the articles table based on the topic and publication date, and then calculating the count of the matching records." +}, { + "id": "3329", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of articles in the \"articles\" table published in the first half of 2022?", + "sql_context": "CREATE TABLE articles (article_id INT, title VARCHAR(100), category VARCHAR(50), publication_date DATE, views INT); INSERT INTO articles (article_id, title, category, publication_date, views) VALUES (1, \u0027News from the Capital\u0027, \u0027Politics\u0027, \u00272022-01-01\u0027, 1500), (2, \u0027Tech Innovations in 2022\u0027, \u0027Technology\u0027, \u00272022-01-02\u0027, 1200), (3, \u0027The Art of Persuasion\u0027, \u0027Psychology\u0027, \u00272022-01-03\u0027, 1800), (4, \u0027Education Reforms in Europe\u0027, \u0027Education\u0027, \u00272022-01-04\u0027, 1000), (5, \u0027Climate Change in Asia\u0027, \u0027Environment\u0027, \u00272022-02-05\u0027, 2000);", + "sql": "SELECT COUNT(article_id) FROM articles WHERE publication_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-06-30\u0027;", + "sql_explanation": "This SQL query calculates the total number of articles in the \"articles\" table published in the first half of 2022 by using the COUNT function on the \"article_id\" column and filtering the data by the \"publication_date\" column with the condition between \u00272022-01-01\u0027 and \u00272022-06-30\u0027." +}, { + "id": "3531", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Update the names of the columns \u0027age\u0027 to \u0027audience_age\u0027 and \u0027gender\u0027 to \u0027audience_gender\u0027 in the \u0027audience_demographics\u0027 table", + "sql_context": "CREATE TABLE audience_demographics (article_id INT, audience_age INT, gender VARCHAR(10), location VARCHAR(100));", + "sql": "ALTER TABLE audience_demographics RENAME COLUMN age TO audience_age, gender TO audience_gender;", + "sql_explanation": "The ALTER TABLE statement with RENAME COLUMN clause allows updating the column names in the \u0027audience_demographics\u0027 table." +}, { + "id": "3826", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the \"age\" field to 35 for all records in the \"audience_demographics\" table where \"country\" is \"Brazil\" and \"gender\" is \"Female\"", + "sql_context": "CREATE TABLE audience_demographics (id INT PRIMARY KEY, age INT, country VARCHAR(255), gender VARCHAR(255));", + "sql": "UPDATE audience_demographics SET age \u003d 35 WHERE country \u003d \u0027Brazil\u0027 AND gender \u003d \u0027Female\u0027;", + "sql_explanation": "This query updates the \"age\" field to 35 for all records in the \"audience_demographics\" table where the \"country\" is \"Brazil\" and the \"gender\" is \"Female\"." +}, { + "id": "4008", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the names of all authors from the \u0027Freelance\u0027 category to \u0027Independent Contributor\u0027.", + "sql_context": "CREATE TABLE authors (id INT, name TEXT, category TEXT); INSERT INTO authors (id, name, category) VALUES (1, \u0027Jane Doe\u0027, \u0027Freelance\u0027);", + "sql": "UPDATE authors SET category \u003d \u0027Independent Contributor\u0027 WHERE category \u003d \u0027Freelance\u0027;", + "sql_explanation": "This query updates the category of all authors from \u0027Freelance\u0027 to \u0027Independent Contributor\u0027. It does so by using the UPDATE keyword, followed by the \u0027authors\u0027 table, specifying the new value for the \u0027category\u0027 column in the SET clause and the condition in the WHERE clause." +}, { + "id": "4293", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of readers who prefer sports news in the city of Los Angeles?", + "sql_context": "CREATE TABLE readers (id INT, name TEXT, age INT, city TEXT, interest TEXT); INSERT INTO readers (id, name, age, city, interest) VALUES (1, \u0027John Doe\u0027, 35, \u0027Los Angeles\u0027, \u0027sports\u0027);", + "sql": "SELECT AVG(age) FROM readers WHERE city \u003d \u0027Los Angeles\u0027 AND interest \u003d \u0027sports\u0027;", + "sql_explanation": "This query calculates the average age of readers who prefer sports news in the city of Los Angeles. It does so by selecting the AVG (average) of the \u0027age\u0027 column, filtering the data where the \u0027city\u0027 column is \u0027Los Angeles\u0027 and the \u0027interest\u0027 column is \u0027sports\u0027." +}, { + "id": "4340", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of female news reporters in the \"reporters\" table?", + "sql_context": "CREATE TABLE reporters (id INT, name VARCHAR(50), gender VARCHAR(10), age INT, department VARCHAR(20));", + "sql": "SELECT AVG(age) FROM reporters WHERE gender \u003d \u0027female\u0027 AND department \u003d \u0027news\u0027;", + "sql_explanation": "This query calculates the average age of female reporters in the \"reporters\" table by using the AVG function on the \"age\" column, filtered by gender and department." +}, { + "id": "4371", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all the articles published by \u0027The Guardian\u0027 before January 1, 2022, excluding the articles published in the \u0027Opinion\u0027 section.", + "sql_context": "CREATE TABLE guardian (id INT, title VARCHAR(255), section VARCHAR(255), content TEXT, pub_date DATE); INSERT INTO guardian (id, title, section, content, pub_date) VALUES (1, \u0027Title1\u0027, \u0027Opinion\u0027, \u0027Content1\u0027, \u00272022-01-01\u0027); INSERT INTO guardian (id, title, section, content, pub_date) VALUES (2, \u0027Title2\u0027, \u0027News\u0027, \u0027Content2\u0027, \u00272021-12-31\u0027);", + "sql": "SELECT * FROM guardian WHERE pub_date \u003c \u00272022-01-01\u0027 AND section !\u003d \u0027Opinion\u0027;", + "sql_explanation": "This query selects all the rows from the \u0027guardian\u0027 table where the \u0027pub_date\u0027 is before January 1, 2022, and the \u0027section\u0027 is not \u0027Opinion\u0027. This query uses the AND operator to combine two conditions." +}, { + "id": "4469", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of readers who prefer news about politics in the \"NewsReporting\" database?", + "sql_context": "CREATE TABLE ReaderDemographics (ReaderID INT, Age INT, PreferredNewsTopic VARCHAR(255)); INSERT INTO ReaderDemographics (ReaderID, Age, PreferredNewsTopic) VALUES (1, 45, \u0027politics\u0027);", + "sql": "SELECT AVG(Age) FROM ReaderDemographics WHERE PreferredNewsTopic \u003d \u0027politics\u0027", + "sql_explanation": "This query calculates the average age of readers who prefer news about politics by selecting the \u0027Age\u0027 column and calculating the average using the AVG() function for rows where \u0027PreferredNewsTopic\u0027 is equal to \u0027politics\u0027." +}, { + "id": "4788", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum age for audience demographics in the \"audience_demographics\" table with a gender of \u0027Non-binary\u0027?", + "sql_context": "CREATE TABLE audience_demographics (id INT, name VARCHAR(50), gender VARCHAR(20), age INT); INSERT INTO audience_demographics (id, name, gender, age) VALUES (1, \u0027Young Adults\u0027, \u0027Female\u0027, 25), (2, \u0027Middle Aged\u0027, \u0027Male\u0027, 45), (3, \u0027Senior Citizens\u0027, \u0027Non-binary\u0027, 55), (4, \u0027Teenagers\u0027, \u0027Male\u0027, 15), (5, \u0027Young Adults\u0027, \u0027Non-binary\u0027, 32);", + "sql": "SELECT MIN(age) FROM audience_demographics WHERE gender \u003d \u0027Non-binary\u0027;", + "sql_explanation": "This SQL query calculates the minimum age for audience demographics with a gender of \u0027Non-binary\u0027 by using the MIN() aggregation function and the WHERE clause to filter the records based on the gender column." +}, { + "id": "4974", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the unique authors who have written for \u0027The Guardian\u0027 in the technology category.", + "sql_context": "CREATE TABLE guardian (article_id INT, title TEXT, author TEXT, category TEXT, publisher TEXT); INSERT INTO guardian (article_id, title, author, category, publisher) VALUES (1, \u0027Article 1\u0027, \u0027Author 1\u0027, \u0027Technology\u0027, \u0027The Guardian\u0027), (2, \u0027Article 2\u0027, \u0027Author 2\u0027, \u0027Politics\u0027, \u0027The Guardian\u0027);", + "sql": "SELECT DISTINCT author FROM guardian WHERE category \u003d \u0027Technology\u0027;", + "sql_explanation": "The SQL query first creates a table \u0027guardian\u0027 with its respective articles. Then, it selects all unique authors who have written for \u0027The Guardian\u0027 in the technology category." +}, { + "id": "5108", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average word count for articles in \u0027category2\u0027?", + "sql_context": "CREATE TABLE articles (id INT, title VARCHAR(50), word_count INT, category VARCHAR(20)); INSERT INTO articles (id, title, word_count, category) VALUES (1, \u0027Article1\u0027, 400, \u0027category1\u0027), (2, \u0027Article2\u0027, 600, \u0027category2\u0027), (3, \u0027Article3\u0027, 450, \u0027category3\u0027);", + "sql": "SELECT AVG(word_count) FROM articles WHERE category \u003d \u0027category2\u0027", + "sql_explanation": "This query calculates the average word count for articles in \u0027category2\u0027. It first filters the \u0027articles\u0027 table to only include rows where the category is \u0027category2\u0027. Then, it uses the AVG function to calculate the average word count of the remaining rows." +}, { + "id": "5127", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of readers who prefer \"Sports\" news category?", + "sql_context": "CREATE TABLE readers (id INT, name VARCHAR(50), age INT, preferred_category VARCHAR(20)); INSERT INTO readers (id, name, age, preferred_category) VALUES (1, \u0027John Doe\u0027, 25, \u0027Sports\u0027);", + "sql": "SELECT AVG(age) FROM readers WHERE preferred_category \u003d \u0027Sports\u0027", + "sql_explanation": "This query calculates the average age of readers who prefer the \"Sports\" news category. It does so by finding all rows in the readers table with the preferred_category set to \u0027Sports\u0027 and then calculating the average age of those rows." +}, { + "id": "5153", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which news articles have been updated the most?", + "sql_context": "CREATE TABLE News (news_id INT, title TEXT, update_count INT); INSERT INTO News (news_id, title, update_count) VALUES (1, \u0027Article1\u0027, 3), (2, \u0027Article2\u0027, 1), (3, \u0027Article3\u0027, 2);", + "sql": "SELECT title, update_count FROM News ORDER BY update_count DESC;", + "sql_explanation": "This SQL query retrieves the news articles with the highest update_count by simply ordering the News table by the update_count column in descending order." +}, { + "id": "5233", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many articles were published in the \u0027articles\u0027 table for the year 2021?", + "sql_context": "CREATE TABLE articles (id INT, title VARCHAR(100), publish_date DATE, author_id INT); INSERT INTO articles (id, title, publish_date, author_id) VALUES (1, \u0027Article 1\u0027, \u00272021-01-01\u0027, 1); INSERT INTO articles (id, title, publish_date, author_id) VALUES (2, \u0027Article 2\u0027, \u00272021-02-01\u0027, 2); INSERT INTO articles (id, title, publish_date, author_id) VALUES (3, \u0027Article 3\u0027, \u00272022-01-01\u0027, 3);", + "sql": "SELECT COUNT(*) FROM articles WHERE YEAR(publish_date) \u003d 2021;", + "sql_explanation": "The SQL query counts the number of articles published in the year 2021 by selecting all rows from the \u0027articles\u0027 table where the \u0027publish_date\u0027 column is in the year 2021. It uses the YEAR() function to extract the year from the \u0027publish_date\u0027 column." +}, { + "id": "5296", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of readers who prefer reading articles about technology in the \"TechNews\" newspaper?", + "sql_context": "CREATE TABLE Readers (id INT, age INT, preference VARCHAR(20)); INSERT INTO Readers (id, age, preference) VALUES (1, 25, \u0027technology\u0027), (2, 32, \u0027politics\u0027), (3, 45, \u0027technology\u0027);", + "sql": "SELECT AVG(age) FROM Readers WHERE preference \u003d \u0027technology\u0027;", + "sql_explanation": "This query calculates the average age of readers who prefer reading articles about technology by filtering the \"Readers\" table based on the \"preference\" column and then calculating the average value of the \"age\" column." +}, { + "id": "5411", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum age of male journalists in the \u0027news_reporters\u0027 table?", + "sql_context": "CREATE TABLE news_reporters (id INT, name VARCHAR(50), gender VARCHAR(10), age INT); INSERT INTO news_reporters (id, name, gender, age) VALUES (1, \u0027John Doe\u0027, \u0027Male\u0027, 35), (2, \u0027Jane Smith\u0027, \u0027Female\u0027, 32), (3, \u0027Alice Johnson\u0027, \u0027Female\u0027, 40);", + "sql": "SELECT MIN(age) FROM news_reporters WHERE gender \u003d \u0027Male\u0027;", + "sql_explanation": "This query retrieves the minimum age of male journalists in the \u0027news_reporters\u0027 table. It uses the MIN() aggregation function to find the lowest age and filters the records using the WHERE clause to only include male journalists." +}, { + "id": "5545", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many unique news categories are there in the \u0027news_categories\u0027 table?", + "sql_context": "CREATE TABLE news_categories (id INT, category VARCHAR(30)); INSERT INTO news_categories (id, category) VALUES (1, \u0027Politics\u0027); INSERT INTO news_categories (id, category) VALUES (2, \u0027Sports\u0027); INSERT INTO news_categories (id, category) VALUES (3, \u0027Entertainment\u0027);", + "sql": "SELECT COUNT(DISTINCT category) FROM news_categories;", + "sql_explanation": "The SQL query counts the number of unique news categories by selecting the \u0027category\u0027 column and applying the COUNT() function to it with the DISTINCT keyword. This returns the count of distinct categories in the \u0027news_categories\u0027 table." +}, { + "id": "1106", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which donors from Oceania have not donated in the last 6 months?", + "sql_context": "CREATE TABLE Donors (DonorID INT, DonorName TEXT, Country TEXT, DateOfLastDonation DATE); INSERT INTO Donors (DonorID, DonorName, Country, DateOfLastDonation) VALUES (1, \u0027Kate\u0027, \u0027Australia\u0027, \u00272022-02-20\u0027), (2, \u0027Luke\u0027, \u0027New Zealand\u0027, \u00272022-01-05\u0027), (3, \u0027Mia\u0027, \u0027Fiji\u0027, \u00272021-12-10\u0027);", + "sql": "SELECT Donors.DonorID, Donors.DonorName, Donors.Country FROM Donors WHERE Donors.Country LIKE \u0027Oceania%\u0027 AND Donors.DateOfLastDonation \u003c DATE_SUB(CURDATE(), INTERVAL 6 MONTH);", + "sql_explanation": "This query retrieves donors from Oceania who have not donated in the last 6 months by filtering the Donors table for the specified conditions and selecting the required columns." +}, { + "id": "1698", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new programs and their respective participant counts for 2023.", + "sql_context": "CREATE TABLE programs (id INT, program_name TEXT, participant_count INT, participant_disability TEXT, year INT); INSERT INTO programs (id, program_name, participant_count, participant_disability, year) VALUES (1, \u0027Education\u0027, 200, \u0027Yes\u0027, 2021), (2, \u0027Health\u0027, 150, \u0027No\u0027, 2021);", + "sql": "INSERT INTO programs (program_name, participant_count, participant_disability, year) VALUES (\u0027Science\u0027, 120, \u0027No\u0027, 2023), (\u0027Arts\u0027, 180, \u0027Yes\u0027, 2023);", + "sql_explanation": "This query inserts two new records for programs in the year 2023 with respective participant counts and disability information." +}, { + "id": "2066", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many hours did volunteers contribute to disaster relief programs in Japan in 2020?", + "sql_context": "CREATE TABLE Volunteers (id INT, volunteer_name VARCHAR(255), program VARCHAR(255), volunteer_hours INT, volunteer_date DATE); INSERT INTO Volunteers (id, volunteer_name, program, volunteer_hours, volunteer_date) VALUES (1, \u0027Yamada Taro\u0027, \u0027Disaster Relief\u0027, 25, \u00272020-03-14\u0027), (2, \u0027Tanaka Hanako\u0027, \u0027Disaster Relief\u0027, 30, \u00272020-11-05\u0027);", + "sql": "SELECT SUM(volunteer_hours) FROM Volunteers WHERE program \u003d \u0027Disaster Relief\u0027 AND volunteer_date BETWEEN \u00272020-01-01\u0027 AND \u00272020-12-31\u0027;", + "sql_explanation": "This query calculates the total number of hours contributed by volunteers to disaster relief programs in Japan during 2020. It filters the Volunteers table based on the program name being \u0027Disaster Relief\u0027 and the volunteer date falling within the specified date range. Then, it sums up the volunteer hours for the matching records." +}, { + "id": "2286", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average donation amount by corporations in Q1 2022?", + "sql_context": "CREATE TABLE Donations (DonationID int, DonorType varchar(50), DonationAmount decimal(10,2), DonationDate date); INSERT INTO Donations (DonationID, DonorType, DonationAmount, DonationDate) VALUES (1, \u0027Corporation\u0027, 2500, \u00272022-01-10\u0027); INSERT INTO Donations (DonationID, DonorType, DonationAmount, DonationDate) VALUES (2, \u0027Foundation\u0027, 10000, \u00272022-03-15\u0027);", + "sql": "SELECT AVG(DonationAmount) FROM Donations WHERE DonorType \u003d \u0027Corporation\u0027 AND DonationDate BETWEEN \u00272022-01-01\u0027 AND \u00272022-03-31\u0027;", + "sql_explanation": "This query calculates the average donation amount for corporations in Q1 2022 by averaging the DonationAmount column, filtering rows by DonorType and DonationDate using WHERE clause and BETWEEN clause." +}, { + "id": "2509", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add a new record for a program \u0027Cultural Arts\u0027 with a start date of Sep 1, 2023 and an end date of Aug 31, 2024 in the \u0027Programs\u0027 table", + "sql_context": "CREATE TABLE Programs (ProgramID INT, ProgramName VARCHAR(50), StartDate DATE, EndDate DATE);", + "sql": "INSERT INTO Programs (ProgramID, ProgramName, StartDate, EndDate) VALUES (3, \u0027Cultural Arts\u0027, \u00272023-09-01\u0027, \u00272024-08-31\u0027);", + "sql_explanation": "This query inserts a new record into the \u0027Programs\u0027 table for a \u0027Cultural Arts\u0027 program, with a start date of Sep 1, 2023 and an end date of Aug 31, 2024." +}, { + "id": "3111", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find total donation amount and number of donors from \u0027young\u0027 donors in 2021.", + "sql_context": "CREATE TABLE donors (id INT, name VARCHAR, age INT, donation_amount DECIMAL, donation_date DATE);", + "sql": "SELECT SUM(donation_amount) as total_donation_amount, COUNT(DISTINCT donors.id) as num_donors FROM donors", + "sql_explanation": "This query calculates the total donation amount and number of distinct donors from the \u0027donors\u0027 table where the donor\u0027s age is within the \u0027young\u0027 range in 2021." +}, { + "id": "3235", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many unique donors donated in \u0027Q3 2022\u0027?", + "sql_context": "CREATE TABLE donors (donor_id INT, donor_name TEXT, donation_amount DECIMAL, donation_date DATE); INSERT INTO donors (donor_id, donor_name, donation_amount, donation_date) VALUES (1, \u0027John Doe\u0027, 50.00, \u00272022-07-15\u0027);", + "sql": "SELECT COUNT(DISTINCT donor_id) FROM donors WHERE donation_date BETWEEN \u00272022-07-01\u0027 AND \u00272022-09-30\u0027;", + "sql_explanation": "This query calculates the number of unique donors who made a donation in Q3 2022 by selecting the distinct donor_id values from the donors table where the donation_date falls within the specified date range." +}, { + "id": "3588", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount donated by individual donors from the USA in 2021?", + "sql_context": "CREATE TABLE Donors (DonorID INT, DonorName TEXT, Country TEXT, Amount DECIMAL(10,2), DonationYear INT); INSERT INTO Donors (DonorID, DonorName, Country, Amount, DonationYear) VALUES (1, \u0027John Doe\u0027, \u0027USA\u0027, 500.00, 2021), (2, \u0027Jane Smith\u0027, \u0027Canada\u0027, 300.00, 2021);", + "sql": "SELECT SUM(Amount) FROM Donors WHERE Country \u003d \u0027USA\u0027 AND DonationYear \u003d 2021 AND DonorID !\u003d 1;", + "sql_explanation": "This query calculates the total donation amount from individual donors in the USA for the year 2021, excluding DonorID 1." +}, { + "id": "3619", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total budget for programs focused on Climate Change?", + "sql_context": "CREATE TABLE Programs (ProgramID INT, ProgramName TEXT, Budget DECIMAL); INSERT INTO Programs (ProgramID, ProgramName, Budget) VALUES (1, \u0027Education\u0027, 50000.00), (2, \u0027Environment\u0027, 75000.00), (3, \u0027Climate Change\u0027, 100000.00);", + "sql": "SELECT SUM(Programs.Budget) FROM Programs WHERE Programs.ProgramName LIKE \u0027%Climate Change%\u0027;", + "sql_explanation": "This query calculates the total budget for programs focused on Climate Change. It filters for records where the ProgramName contains \u0027Climate Change\u0027 and sums the corresponding Budget values." +}, { + "id": "3917", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many volunteers signed up in 2021 from Germany?", + "sql_context": "CREATE TABLE volunteers (id INT, name TEXT, country TEXT, signup_date DATE); INSERT INTO volunteers (id, name, country, signup_date) VALUES (1, \u0027Anna Muller\u0027, \u0027Germany\u0027, \u00272021-08-20\u0027), (2, \u0027Benedict Schmidt\u0027, \u0027Austria\u0027, \u00272022-02-15\u0027);", + "sql": "SELECT COUNT(*) FROM volunteers WHERE country \u003d \u0027Germany\u0027 AND YEAR(signup_date) \u003d 2021;", + "sql_explanation": "The SQL query counts the number of volunteers from Germany in 2021 by using the COUNT() function on all records (*), filtering the data by the country and the YEAR() function applied on the signup_date column." +}, { + "id": "3933", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of volunteers for education programs in Australia?", + "sql_context": "CREATE TABLE volunteers (volunteer_id INT, volunteer_name TEXT, program TEXT, country TEXT); INSERT INTO volunteers VALUES (1, \u0027Alex Johnson\u0027, \u0027Education\u0027, \u0027Australia\u0027), (2, \u0027Emily Brown\u0027, \u0027Healthcare\u0027, \u0027Australia\u0027), (3, \u0027Oliver Smith\u0027, \u0027Education\u0027, \u0027Australia\u0027);", + "sql": "SELECT COUNT(*) FROM volunteers WHERE program \u003d \u0027Education\u0027 AND country \u003d \u0027Australia\u0027;", + "sql_explanation": "This SQL query counts the total number of volunteers for education programs in Australia. It uses the COUNT() function with an asterisk (*) to count all rows where the program is \u0027Education\u0027 and the country is \u0027Australia\u0027." +}, { + "id": "4111", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update total donations for \u0027John Doe\u0027 by adding $100.", + "sql_context": "CREATE TABLE Donors (DonorID INT, DonorName TEXT, TotalDonation DECIMAL(10,2)); INSERT INTO Donors (DonorID, DonorName, TotalDonation) VALUES (1, \u0027John Doe\u0027, 500.00), (2, \u0027Jane Smith\u0027, 350.00), (3, \u0027Mike Johnson\u0027, 200.00);", + "sql": "UPDATE Donors SET TotalDonation \u003d TotalDonation + 100 WHERE DonorName \u003d \u0027John Doe\u0027;", + "sql_explanation": "This SQL query updates the total donation amount for \u0027John Doe\u0027 by $100 by using the UPDATE statement and the WHERE clause to filter for the specific donor." +}, { + "id": "4338", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount donated by donors from California and Texas?", + "sql_context": "CREATE TABLE donors (id INT, name TEXT, state TEXT, donation_amount DECIMAL); INSERT INTO donors (id, name, state, donation_amount) VALUES (1, \u0027John Doe\u0027, \u0027California\u0027, 150.00), (2, \u0027Jane Smith\u0027, \u0027Texas\u0027, 200.00);", + "sql": "SELECT SUM(donation_amount) FROM donors WHERE state IN (\u0027California\u0027, \u0027Texas\u0027);", + "sql_explanation": "This query calculates the total amount donated by donors from California and Texas by using the SUM function on the donation_amount column, and filtering the data for donors from California and Texas using the WHERE clause and the IN keyword." +}, { + "id": "4554", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount donated by individuals in the arts program?", + "sql_context": "CREATE TABLE donors (id INT, name TEXT, program TEXT, amount INT); INSERT INTO donors (id, name, program, amount) VALUES (1, \u0027John Doe\u0027, \u0027Arts\u0027, 500), (2, \u0027Jane Smith\u0027, \u0027Education\u0027, 1000), (3, \u0027Alice Johnson\u0027, \u0027Arts\u0027, 750);", + "sql": "SELECT SUM(amount) FROM donors WHERE program \u003d \u0027Arts\u0027 AND id NOT IN (4, 5);", + "sql_explanation": "This query calculates the total amount donated to the arts program by summing the \u0027amount\u0027 column where the \u0027program\u0027 is \u0027Arts\u0027 and the \u0027id\u0027 is not 4 or 5, as those IDs may represent corporate or other non-individual donors." +}, { + "id": "5243", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum donation amount from donors in Texas?", + "sql_context": "CREATE TABLE donors (id INT, name TEXT, state TEXT, donation_amount DECIMAL); INSERT INTO donors (id, name, state, donation_amount) VALUES (1, \u0027John Doe\u0027, \u0027Texas\u0027, 150.00), (2, \u0027Jane Smith\u0027, \u0027Texas\u0027, 200.00);", + "sql": "SELECT MIN(donation_amount) FROM donors WHERE state \u003d \u0027Texas\u0027;", + "sql_explanation": "This query calculates the minimum donation amount from donors in Texas by using the MIN function on the donation_amount column, and filtering the data for donors from Texas using the WHERE clause." +}, { + "id": "5352", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average donation amount in the state of New York?", + "sql_context": "CREATE TABLE donations (id INT, donor_id INT, state TEXT, amount DECIMAL(10,2)); INSERT INTO donations (id, donor_id, state, amount) VALUES (1, 1, \u0027New York\u0027, 50.00), (2, 2, \u0027New York\u0027, 30.00), (3, 1, \u0027New York\u0027, 100.00);", + "sql": "SELECT AVG(amount) FROM donations WHERE state \u003d \u0027New York\u0027;", + "sql_explanation": "This query calculates the average donation amount in New York by finding the mean \u0027amount\u0027 for donations from \u0027New York\u0027." +}, { + "id": "5407", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum donation amount in the state of Florida?", + "sql_context": "CREATE TABLE donations (id INT, state TEXT, amount DECIMAL(10,2)); INSERT INTO donations (id, state, amount) VALUES (1, \u0027Florida\u0027, 50.00), (2, \u0027Florida\u0027, 30.00), (3, \u0027Florida\u0027, 100.00);", + "sql": "SELECT MIN(amount) FROM donations WHERE state \u003d \u0027Florida\u0027;", + "sql_explanation": "This query calculates the minimum donation amount in Florida by finding the minimum \u0027amount\u0027 for donations from \u0027Florida\u0027." +}, { + "id": "5737", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete the \"Sports\" program with ID 7?", + "sql_context": "CREATE TABLE programs (program_id INT, program_name TEXT); INSERT INTO programs (program_id, program_name) VALUES (1, \u0027Youth Mentoring\u0027), (2, \u0027Food Security\u0027), (3, \u0027Elder Care\u0027), (4, \u0027Arts Education\u0027), (5, \u0027After School Program\u0027), (6, \u0027Environmental Education\u0027), (7, \u0027Sports\u0027);", + "sql": "DELETE FROM programs WHERE program_id \u003d 7;", + "sql_explanation": "This query deletes the \"Sports\" program with an ID of 7 by removing its record from the programs table." +}, { + "id": "5818", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of hours volunteered by a volunteer?", + "sql_context": "CREATE TABLE volunteers (id INT, name TEXT, program TEXT, hours INT); INSERT INTO volunteers (id, name, program, hours) VALUES (1, \u0027John Doe\u0027, \u0027Food Distribution\u0027, 10), (2, \u0027Jane Smith\u0027, \u0027Education Support\u0027, 20);", + "sql": "SELECT MAX(hours) FROM volunteers;", + "sql_explanation": "This query calculates the maximum number of hours volunteered by a volunteer by using the MAX function on the hours column." +}, { + "id": "5834", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many total donations are there in the \u0027donations\u0027 table?", + "sql_context": "CREATE TABLE donations (id INT, donor_id INT, amount DECIMAL(10,2));", + "sql": "SELECT COUNT(*) FROM donations;", + "sql_explanation": "This SQL query counts all rows in the \u0027donations\u0027 table to determine the total number of donations." +}, { + "id": "1122", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total budget spent on programs focused on climate change education in Q2 2022?", + "sql_context": "CREATE TABLE Programs (ProgramID INT, ProgramType TEXT, ProgramBudget DECIMAL(10,2), ProgramStartDate DATE, ProgramEndDate DATE); INSERT INTO Programs (ProgramID, ProgramType, ProgramBudget, ProgramStartDate, ProgramEndDate) VALUES (1, \u0027Climate Change Education\u0027, 7000.00, \u00272022-04-01\u0027, \u00272022-06-30\u0027);", + "sql": "SELECT SUM(ProgramBudget) as TotalBudget FROM Programs WHERE ProgramType \u003d \u0027Climate Change Education\u0027 AND ProgramStartDate \u003c\u003d \u00272022-06-30\u0027 AND ProgramEndDate \u003e\u003d \u00272022-04-01\u0027;", + "sql_explanation": "This query calculates the total budget spent on programs focused on climate change education in Q2 2022 by using SUM() function and the WHERE clause to filter programs with the type \u0027Climate Change Education\u0027 and the start and end dates within Q2 2022." +}, { + "id": "1980", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many unique one-time donors made donations in \u0027q4\u0027 of \u00272022\u0027?", + "sql_context": "CREATE TABLE Donations (id INT, donor_type VARCHAR(10), donation_amount DECIMAL(10,2), donation_date DATE); INSERT INTO Donations (id, donor_type, donation_amount, donation_date) VALUES (1, \u0027one-time\u0027, 50.00, \u00272022-01-01\u0027); INSERT INTO Donations (id, donor_type, donation_amount, donation_date) VALUES (2, \u0027recurring\u0027, 25.00, \u00272022-01-15\u0027); INSERT INTO Donations (id, donor_type, donation_amount, donation_date) VALUES (3, \u0027one-time\u0027, 75.00, \u00272022-12-31\u0027);", + "sql": "SELECT COUNT(DISTINCT donor_id) FROM Donations WHERE donor_type \u003d \u0027one-time\u0027 AND QUARTER(donation_date) \u003d 4 AND YEAR(donation_date) \u003d 2022;", + "sql_explanation": "This query counts the number of unique donor_ids for one-time donors in the fourth quarter of 2022 by filtering the Donations table for the appropriate donor_type, quarter, and year, and then applying the COUNT function to the distinct donor_ids." +}, { + "id": "2230", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of total volunteer hours contributed by female volunteers?", + "sql_context": "CREATE TABLE volunteers (volunteer_id INT, gender VARCHAR(10), hours_per_week FLOAT); INSERT INTO volunteers (volunteer_id, gender, hours_per_week) VALUES (1, \u0027female\u0027, 5.0), (2, \u0027male\u0027, 8.0), (3, \u0027female\u0027, 3.0), (4, \u0027male\u0027, 4.0), (5, \u0027non-binary\u0027, 6.0);", + "sql": "SELECT (SUM(CASE WHEN gender \u003d \u0027female\u0027 THEN hours_per_week ELSE 0 END) / SUM(hours_per_week)) * 100 AS percentage FROM volunteers;", + "sql_explanation": "This query calculates the total volunteer hours contributed by female volunteers and divides it by the total volunteer hours contributed by all volunteers. The result is multiplied by 100 to express it as a percentage." +}, { + "id": "2478", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new program with budget", + "sql_context": "CREATE TABLE Programs (Id INT, ProgramName VARCHAR(50), Budget DECIMAL(10,2), StartDate DATE, EndDate DATE);", + "sql": "INSERT INTO Programs (Id, ProgramName, Budget, StartDate, EndDate) VALUES (3, \u0027Arts\u0027, 7000.00, \u00272022-01-01\u0027, \u00272022-12-31\u0027);", + "sql_explanation": "This query inserts a new program record into the Programs table with an ID of 3, a name of \u0027Arts\u0027, a budget of $7000, and a start/end date of Jan 1, 2022 and Dec 31, 2022." +}, { + "id": "2748", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many donors made donations in the last quarter, including those who made multiple donations during the same time period?", + "sql_context": "CREATE TABLE donation_time (donor_id INT, donation_date DATE);INSERT INTO donation_time VALUES (1, \u00272022-01-01\u0027), (2, \u00272022-02-01\u0027), (3, \u00272022-03-01\u0027), (1, \u00272022-04-01\u0027), (4, \u00272022-05-01\u0027), (4, \u00272022-06-01\u0027), (5, \u00272022-07-01\u0027), (5, \u00272022-08-01\u0027), (5, \u00272022-09-01\u0027);", + "sql": "SELECT COUNT(DISTINCT donor_id) FROM donation_time WHERE donation_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 3 MONTH);", + "sql_explanation": "The SQL query uses the DATE_SUB and CURRENT_DATE functions to get the date three months ago and then selects the count of distinct donor_id\u0027s that made donations on or after that date, counting each donor once regardless of how many donations they made during that time period." +}, { + "id": "2772", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total budget for health programs in 2022?", + "sql_context": "CREATE TABLE ProgramBudget (ProgramID INT, ProgramName TEXT, BudgetYear INT, BudgetAmount INT); INSERT INTO ProgramBudget (ProgramID, ProgramName, BudgetYear, BudgetAmount) VALUES (1, \u0027Health Clinic\u0027, 2022, 50000), (2, \u0027Mental Health Services\u0027, 2021, 30000);", + "sql": "SELECT SUM(BudgetAmount) FROM ProgramBudget WHERE ProgramBudget.ProgramName LIKE \u0027%Health%\u0027 AND BudgetYear \u003d 2022;", + "sql_explanation": "This query calculates the total budget for health programs in 2022 by filtering the ProgramBudget table for health-related programs in the year 2022, and then summing the BudgetAmount column." +}, { + "id": "2887", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many programs were successfully completed in H1 2022?", + "sql_context": "CREATE TABLE Programs (ProgramID int, Name varchar(50), Status varchar(50)); INSERT INTO Programs (ProgramID, Name, Status) VALUES (1, \u0027Education\u0027, \u0027Completed\u0027), (2, \u0027Health\u0027, \u0027In Progress\u0027), (3, \u0027Environment\u0027, \u0027Completed\u0027);", + "sql": "SELECT COUNT(*) as TotalPrograms FROM Programs WHERE STATUS \u003d \u0027Completed\u0027 AND MONTH(StartDate) BETWEEN 1 AND 6;", + "sql_explanation": "This SQL query calculates the number of programs that were successfully completed in H1 2022. It starts by selecting the COUNT(*) function, which counts the number of records that match the specified criteria. The WHERE clause filters the records to only include completed programs that started in H1 2022. The MONTH() function extracts the month from the StartDate column, and the BETWEEN clause filters the records to only include those with a start date between January and June. The result is the total number of programs that were successfully completed in H1 2022." +}, { + "id": "3516", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total budget for each program in FY2022?", + "sql_context": "CREATE TABLE Programs (program_id INT, program_name VARCHAR(50), budget DECIMAL(10, 2)); INSERT INTO Programs (program_id, program_name, budget) VALUES (1001, \u0027Education\u0027, 25000.00), (1002, \u0027Health\u0027, 30000.00), (1003, \u0027Environment\u0027, 20000.00);", + "sql": "SELECT program_id, budget as total_budget FROM Programs WHERE program_id IN (1001, 1002, 1003);", + "sql_explanation": "This query calculates the total budget for each program in FY2022. It filters the Programs table for records where program_id is within the set of programs in FY2022 and groups the results by program_id. Then, it returns the budget for each group, providing the total budget for each program." +}, { + "id": "3705", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the programs with zero budget and more than 50 volunteers?", + "sql_context": "CREATE TABLE ZeroBudgetPrograms (ProgramID INT, ProgramName TEXT, Volunteers INT, Budget DECIMAL(10,2)); INSERT INTO ZeroBudgetPrograms (ProgramID, ProgramName, Volunteers, Budget) VALUES (1, \u0027Feeding America\u0027, 75, 0);", + "sql": "SELECT ProgramID, ProgramName FROM ZeroBudgetPrograms WHERE Budget \u003d 0 AND Volunteers \u003e 50;", + "sql_explanation": "The SQL query identifies the programs with zero budget and more than 50 volunteers by selecting the ProgramID and ProgramName from the ZeroBudgetPrograms table where Budget is equal to 0 and Volunteers is greater than 50." +}, { + "id": "3973", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of volunteers and their combined hours for the \u0027Animal Welfare\u0027 program?", + "sql_context": "CREATE TABLE volunteers (id INT, program VARCHAR(255), hours INT); INSERT INTO volunteers (id, program, hours) VALUES (1, \u0027Animal Welfare\u0027, 20), (2, \u0027Education\u0027, 30);", + "sql": "SELECT program, COUNT(*), SUM(hours) FROM volunteers WHERE program \u003d \u0027Animal Welfare\u0027;", + "sql_explanation": "The SQL query counts the number of \u0027id\u0027 (volunteers) and adds up their \u0027hours\u0027 for the \u0027Animal Welfare\u0027 program in the \u0027volunteers\u0027 table using the COUNT() and SUM() functions." +}, { + "id": "4516", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average donation amount for first-time donors?", + "sql_context": "CREATE TABLE donations (id INT, donor_id INT, donation_amount FLOAT, donation_type TEXT, is_first_time_donor BOOLEAN); INSERT INTO donations (id, donor_id, donation_amount, donation_type, is_first_time_donor) VALUES (1, 1, 50.00, \u0027recurring\u0027, false), (2, 2, 40.00, \u0027one-time\u0027, true), (3, 1, 50.00, \u0027recurring\u0027, false), (4, 3, 30.00, \u0027one-time\u0027, true);", + "sql": "SELECT AVG(donation_amount) FROM donations WHERE is_first_time_donor \u003d true;", + "sql_explanation": "Select the average donation amount where the donor is making their first-time donation." +}, { + "id": "4530", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Count the number of unique cities where the \u0027Feeding America\u0027 program has been held.", + "sql_context": "CREATE TABLE programs (id INT, city TEXT, program TEXT); INSERT INTO programs (id, city, program) VALUES (1, \u0027NYC\u0027, \u0027Feeding America\u0027); INSERT INTO programs (id, city, program) VALUES (2, \u0027LA\u0027, \u0027Feeding America\u0027); INSERT INTO programs (id, city, program) VALUES (3, \u0027NYC\u0027, \u0027Feeding America\u0027);", + "sql": "SELECT COUNT(DISTINCT city) FROM programs WHERE program \u003d \u0027Feeding America\u0027;", + "sql_explanation": "This query counts the number of unique values in the \u0027city\u0027 column for rows in the \u0027programs\u0027 table where the \u0027program\u0027 column is equal to \u0027Feeding America\u0027." +}, { + "id": "4701", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the unique program categories and their corresponding budgets, excluding categories with a budget of zero?", + "sql_context": "CREATE TABLE program_categories (program_category VARCHAR(20), budget INT);INSERT INTO program_categories VALUES (\u0027Arts\u0027, 0), (\u0027Education\u0027, 5000), (\u0027Health\u0027, 10000), (\u0027Science\u0027, 2000);", + "sql": "SELECT program_category, budget FROM program_categories WHERE budget \u003e 0;", + "sql_explanation": "The SQL query filters out the records with a budget of zero using the WHERE clause, resulting in the unique program categories and their corresponding non-zero budgets." +}, { + "id": "5285", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total budget for 2022?", + "sql_context": "CREATE TABLE Budget (BudgetID int, BudgetYear int, BudgetAmount decimal(10,2)); INSERT INTO Budget (BudgetID, BudgetYear, BudgetAmount) VALUES (1, 2022, 50000), (2, 2023, 60000);", + "sql": "SELECT SUM(BudgetAmount) FROM Budget WHERE BudgetYear \u003d 2022;", + "sql_explanation": "The query calculates the total budget for 2022 by summing the BudgetAmount for that year." +}, { + "id": "5314", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record for a volunteer with ID 4 who served 8 hours.", + "sql_context": "CREATE TABLE Volunteers (VolunteerID INT, Name TEXT, Hours FLOAT); INSERT INTO Volunteers (VolunteerID, Name, Hours) VALUES (1, \u0027Alice\u0027, 7.0), (2, \u0027Bob\u0027, 4.0), (3, \u0027Charlie\u0027, 6.0);", + "sql": "INSERT INTO Volunteers (VolunteerID, Hours) VALUES (4, 8.0);", + "sql_explanation": "This query inserts a new record into the Volunteers table for a volunteer with ID 4 who served 8 hours. It inserts the VolunteerID and Hours values into the Volunteers table, leaving the Name field NULL." +}, { + "id": "5431", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the donation amount to $100 for the user \u0027Jane\u0027 in the Donations table.", + "sql_context": "CREATE TABLE Donations (id INT, user VARCHAR(255), amount DECIMAL(10, 2)); INSERT INTO Donations (id, user, amount) VALUES (1, \u0027John\u0027, 50.00), (2, \u0027Jane\u0027, 75.00);", + "sql": "UPDATE Donations SET amount \u003d 100.00 WHERE user \u003d \u0027Jane\u0027;", + "sql_explanation": "The SQL query updates the donation amount for the user \u0027Jane\u0027 in the Donations table by using the UPDATE keyword, setting the amount column to 100.00, and filtering the table for the user \u0027Jane\u0027." +}, { + "id": "5520", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total budget for the organization?", + "sql_context": "CREATE TABLE Budget (id INT, amount DECIMAL(10, 2));", + "sql": "SELECT SUM(Budget.amount) as total_budget FROM Budget;", + "sql_explanation": "The SQL query calculates the sum of the amount column from the Budget table to find the total budget for the organization." +}, { + "id": "5528", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of volunteer hours per program?", + "sql_context": "CREATE TABLE Programs (ProgramID INT, Name TEXT, Region TEXT, Hours FLOAT); INSERT INTO Programs (ProgramID, Name, Region, Hours) VALUES (1, \u0027Reading Club\u0027, \u0027Northeast\u0027, 25), (2, \u0027Garden Club\u0027, \u0027Midwest\u0027, 30);", + "sql": "SELECT AVG(Hours) as AvgHoursPerProgram FROM Programs;", + "sql_explanation": "This query calculates the average number of volunteer hours per program by summing the hours column and dividing by the number of programs." +}, { + "id": "5628", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete donations over $5000", + "sql_context": "CREATE TABLE Donations (Id INT, DonorName VARCHAR(50), DonationAmount DECIMAL(10,2), DonationDate DATE); INSERT INTO Donations (Id, DonorName, DonationAmount, DonationDate) VALUES (1, \u0027John Doe\u0027, 50.00, \u00272021-01-01\u0027), (2, \u0027Jane Smith\u0027, 10000.00, \u00272021-01-02\u0027);", + "sql": "DELETE FROM Donations WHERE DonationAmount \u003e 5000;", + "sql_explanation": "This query deletes any donations with an amount greater than $5000." +}, { + "id": "5707", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all the unique program outcomes for each program.", + "sql_context": "CREATE TABLE Programs (id INT, name TEXT, outcome TEXT); INSERT INTO Programs (id, name, outcome) VALUES (1, \u0027Education\u0027, \u0027Literacy\u0027), (2, \u0027Health\u0027, \u0027Wellness\u0027);", + "sql": "SELECT DISTINCT name, outcome FROM Programs;", + "sql_explanation": "This query selects the distinct combination of program names and their outcomes from the Programs table." +}, { + "id": "5714", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many unique cities are associated with volunteers?", + "sql_context": "CREATE TABLE Volunteers (VolunteerID INT, City TEXT); INSERT INTO Volunteers (VolunteerID, City) VALUES (1, \u0027New York\u0027), (2, \u0027Toronto\u0027), (3, \u0027Los Angeles\u0027), (4, \u0027New York\u0027);", + "sql": "SELECT COUNT(DISTINCT City) FROM Volunteers;", + "sql_explanation": "This query counts the number of unique cities associated with volunteers by using the COUNT and DISTINCT keywords on the City column." +}, { + "id": "5831", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of volunteers in the table?", + "sql_context": "CREATE TABLE volunteers( id INT PRIMARY KEY NOT NULL, name VARCHAR(50), age INT, city VARCHAR(30), country VARCHAR(30) );", + "sql": "SELECT AVG(age) FROM volunteers;", + "sql_explanation": "This SQL statement calculates the average age of volunteers by using the AVG function on the age column of the \u0027volunteers\u0027 table." +}, { + "id": "324", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the internet penetration in African countries with a total population greater than 20 million?", + "sql_context": "CREATE TABLE internet_access (id INT, country VARCHAR(50), urban_population FLOAT, rural_population FLOAT, total_population FLOAT, internet_users FLOAT); INSERT INTO internet_access (id, country, urban_population, rural_population, total_population, internet_users) VALUES (3, \u0027Nigeria\u0027, 71.4, 57.1, 200.96, 112.03);", + "sql": "SELECT country, (internet_users / total_population) * 100 as internet_penetration FROM internet_access WHERE total_population \u003e 20000000 AND country IN (\u0027Nigeria\u0027, \u0027South Africa\u0027, \u0027Egypt\u0027, \u0027Ethiopia\u0027, \u0027Kenya\u0027) ORDER BY internet_penetration DESC;", + "sql_explanation": "This query selects the country and calculates the internet penetration (percentage of internet users to total population) from the internet_access table for African countries with a total population greater than 20 million, ordered by internet penetration in descending order." +}, { + "id": "1557", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which countries have the highest and lowest technology accessibility scores?", + "sql_context": "CREATE TABLE Country_Accessibility (country VARCHAR(255), score INT); INSERT INTO Country_Accessibility (country, score) VALUES (\u0027USA\u0027, 85), (\u0027Canada\u0027, 80), (\u0027Mexico\u0027, 70), (\u0027Brazil\u0027, 65), (\u0027Argentina\u0027, 75);", + "sql": "SELECT country, score FROM Country_Accessibility ORDER BY score DESC LIMIT 1; SELECT country, score FROM Country_Accessibility ORDER BY score ASC LIMIT 1;", + "sql_explanation": "These SQL queries find the countries with the highest and lowest technology accessibility scores. The first query uses the ORDER BY clause to sort the records by score in descending order, and the LIMIT clause to return only the top record. The second query does the same but sorts the records in ascending order to return the bottom record." +}, { + "id": "1812", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new records into the \"digital_divide\" table for the country \"Brazil\" and \"South Africa\" with the provided data", + "sql_context": "CREATE TABLE digital_divide (id INT PRIMARY KEY, country VARCHAR(50), internet_penetration DECIMAL(5,2), mobile_penetration DECIMAL(5,2)); INSERT INTO digital_divide (id, country, internet_penetration, mobile_penetration) VALUES (1, \u0027Brazil\u0027, 75.00, 105.00); INSERT INTO digital_divide (id, country, internet_penetration, mobile_penetration) VALUES (2, \u0027South Africa\u0027, 55.00, 150.00);", + "sql": "INSERT INTO digital_divide (country, internet_penetration, mobile_penetration) VALUES (\u0027Brazil\u0027, 75.00, 105.00), (\u0027South Africa\u0027, 55.00, 150.00);", + "sql_explanation": "This SQL query inserts new records into the \"digital_divide\" table for the country \"Brazil\" and \"South Africa\" with the provided data. It uses the INSERT INTO statement with VALUES clause to insert the new records." +}, { + "id": "2099", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the \"description\" for the \"VoiceOver\" record in the \"accessibility_features\" table to \"Screen reader for visually impaired users on iOS and macOS\"", + "sql_context": "CREATE TABLE accessibility_features (name TEXT, description TEXT, platform TEXT); INSERT INTO accessibility_features (name, description, platform) VALUES (\u0027VoiceOver\u0027, \u0027Screen reader for visually impaired users on iOS\u0027, \u0027iOS\u0027); INSERT INTO accessibility_features (name, description, platform) VALUES (\u0027Dictation\u0027, \u0027Speech-to-text functionality\u0027, \u0027Android\u0027);", + "sql": "UPDATE accessibility_features SET description \u003d \u0027Screen reader for visually impaired users on iOS and macOS\u0027 WHERE name \u003d \u0027VoiceOver\u0027;", + "sql_explanation": "This SQL query updates the \"description\" for the \"VoiceOver\" record in the \"accessibility_features\" table to \"Screen reader for visually impaired users on iOS and macOS\"." +}, { + "id": "2123", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of digital divide initiatives in North America and Oceania?", + "sql_context": "CREATE TABLE digital_divide_initiatives (id INT, initiative_name VARCHAR(255), location VARCHAR(255), initiative_type VARCHAR(255));", + "sql": "SELECT COUNT(*) FROM digital_divide_initiatives WHERE location IN (\u0027North America\u0027, \u0027Oceania\u0027) AND initiative_type \u003d \u0027digital divide\u0027;", + "sql_explanation": "This query calculates the total number of digital divide initiatives in North America and Oceania by selecting the COUNT(*) column where the location is either North America or Oceania and the initiative_type is \u0027digital divide\u0027." +}, { + "id": "2307", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of accessible technology products for people with disabilities in Canada and the United States?", + "sql_context": "CREATE TABLE products (product_id INT, product_name VARCHAR(50), country VARCHAR(50), accessibility_rating FLOAT); INSERT INTO products (product_id, product_name, country, accessibility_rating) VALUES (1, \u0027Screen Reader Software\u0027, \u0027Canada\u0027, 4.5), (2, \u0027Adaptive Mouse\u0027, \u0027United States\u0027, 4.7);", + "sql": "SELECT SUM(accessibility_rating) FROM products WHERE country IN (\u0027Canada\u0027, \u0027United States\u0027) AND accessibility_rating IS NOT NULL;", + "sql_explanation": "This query calculates the total accessibility rating of technology products in Canada and the United States by summing the accessibility_rating column where the country is either Canada or the United States." +}, { + "id": "2394", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record in the \"ai_ethics\" table for \"Alibaba Group\" with \"Asia\" as the region and \"AI ethical guidelines for AI research\" as the guidelines", + "sql_context": "CREATE TABLE ai_ethics (company TEXT, region TEXT, guidelines TEXT); INSERT INTO ai_ethics (company, region, guidelines) VALUES (\u0027Microsoft\u0027, \u0027North America\u0027, \u0027Ethical AI guidelines for AI development\u0027); INSERT INTO ai_ethics (company, region, guidelines) VALUES (\u0027Google\u0027, \u0027Europe\u0027, \u0027AI ethical guidelines for AI usage\u0027);", + "sql": "INSERT INTO ai_ethics (company, region, guidelines) VALUES (\u0027Alibaba Group\u0027, \u0027Asia\u0027, \u0027AI ethical guidelines for AI research\u0027);", + "sql_explanation": "This SQL query inserts a new record in the \"ai_ethics\" table for \"Alibaba Group\" with \"Asia\" as the region and \"AI ethical guidelines for AI research\" as the guidelines." +}, { + "id": "2711", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum price of devices produced by companies located in underrepresented communities?", + "sql_context": "CREATE TABLE Devices (id INT, name VARCHAR(50), company VARCHAR(50), price DECIMAL(5,2), community VARCHAR(50)); INSERT INTO Devices (id, name, company, price, community) VALUES (1, \u0027Phone X\u0027, \u0027ABC Tech\u0027, 200.00, \u0027Minority Owned Business\u0027), (2, \u0027Tablet Y\u0027, \u0027XYZ Enterprises\u0027, 350.00, \u0027Women Owned Business\u0027), (3, \u0027Laptop Z\u0027, \u0027Tech for Good\u0027, 700.00, \u0027Non-Profit\u0027);", + "sql": "SELECT MIN(price) FROM Devices WHERE community IN (\u0027Minority Owned Business\u0027, \u0027Women Owned Business\u0027, \u0027Non-Profit\u0027);", + "sql_explanation": "The SQL query calculates the minimum price of devices produced by companies located in underrepresented communities by selecting all records from the Devices table where the community is either a minority owned business, women owned business, or non-profit, and then using the MIN function to find the minimum price." +}, { + "id": "3078", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many people with disabilities have benefited from technology for social good in Central America in the last 3 years?", + "sql_context": "CREATE TABLE Beneficiaries (BeneficiaryID INT, BeneficiaryName VARCHAR(50), Disability BOOLEAN, Initiative VARCHAR(50), Year INT, Region VARCHAR(50)); INSERT INTO Beneficiaries VALUES (1, \u0027Benef1\u0027, true, \u0027Initiative1\u0027, 2019, \u0027Central America\u0027), (2, \u0027Benef2\u0027, true, \u0027Initiative2\u0027, 2020, \u0027Central America\u0027), (3, \u0027Benef3\u0027, false, \u0027Initiative3\u0027, 2021, \u0027Central America\u0027), (4, \u0027Benef4\u0027, true, \u0027Initiative4\u0027, 2020, \u0027Central America\u0027), (5, \u0027Benef5\u0027, true, \u0027Initiative5\u0027, 2021, \u0027Central America\u0027);", + "sql": "SELECT SUM(Disability) FROM Beneficiaries WHERE Region \u003d \u0027Central America\u0027 AND Year BETWEEN 2019 AND 2021;", + "sql_explanation": "Calculate the total number of people with disabilities who benefited from technology for social good in Central America within the last 3 years using the SUM function and the BETWEEN operator." +}, { + "id": "3173", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total budget for digital divide initiatives in the Americas?", + "sql_context": "CREATE TABLE budget_initiatives (initiative TEXT, region TEXT, budget INT); INSERT INTO budget_initiatives (initiative, region, budget) VALUES (\u0027digital divide\u0027, \u0027Americas\u0027, 80000), (\u0027digital divide\u0027, \u0027Europe\u0027, 50000), (\u0027ethical AI\u0027, \u0027Asia-Pacific\u0027, 120000);", + "sql": "SELECT SUM(budget) FROM budget_initiatives WHERE initiative \u003d \u0027digital divide\u0027 AND region \u003d \u0027Americas\u0027;", + "sql_explanation": "This SQL query calculates the total budget for digital divide initiatives in the Americas. It uses the SUM function to find the sum of the \u0027budget\u0027 column for rows where the \u0027initiative\u0027 column is \u0027digital divide\u0027 and the \u0027region\u0027 column is \u0027Americas\u0027." +}, { + "id": "3175", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of accessible technology centers per 100,000 people in cities with a population of over 1 million?", + "sql_context": "CREATE TABLE Cities (id INT, name TEXT, country TEXT, population INT, num_accessible_tech_centers INT); INSERT INTO Cities (id, name, country, population, num_accessible_tech_centers) VALUES (1, \u0027New York\u0027, \u0027USA\u0027, 8500000, 20), (2, \u0027Los Angeles\u0027, \u0027USA\u0027, 4000000, 15), (3, \u0027San Francisco\u0027, \u0027USA\u0027, 800000, 10), (4, \u0027London\u0027, \u0027UK\u0027, 8700000, 25), (5, \u0027Tokyo\u0027, \u0027Japan\u0027, 9000000, 18), (6, \u0027Sydney\u0027, \u0027Australia\u0027, 5000000, 8), (7, \u0027Delhi\u0027, \u0027India\u0027, 16000000, 15), (8, \u0027Beijing\u0027, \u0027China\u0027, 21000000, 20), (9, \u0027Mexico City\u0027, \u0027Mexico\u0027, 22000000, 12), (10, \u0027Toronto\u0027, \u0027Canada\u0027, 2800000, 16);", + "sql": "SELECT AVG(num_accessible_tech_centers * 100000.0 / population) FROM Cities WHERE population \u003e 1000000;", + "sql_explanation": "This query calculates the average number of accessible technology centers per 100,000 people in cities with a population of over 1 million. It does so by using the AVG function in combination with a multiplication and division operation, which converts the number of accessible technology centers to a rate per 100,000 people. The WHERE clause filters the data to only include rows with a population greater than 100,000." +}, { + "id": "3417", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many technology accessibility patents were granted to women in tech in 2019?", + "sql_context": "CREATE TABLE tech_accessibility_patents (id INT, year INT, community VARCHAR(255), type VARCHAR(255)); INSERT INTO tech_accessibility_patents (id, year, community, type) VALUES (1, 2019, \u0027Women in Tech\u0027, \u0027Technology Accessibility\u0027); INSERT INTO tech_accessibility_patents (id, year, community, type) VALUES (2, 2020, \u0027Minority Tech Group\u0027, \u0027AI for Good\u0027);", + "sql": "SELECT COUNT(*) FROM tech_accessibility_patents WHERE year \u003d 2019 AND community \u003d \u0027Women in Tech\u0027;", + "sql_explanation": "This SQL query counts the number of technology accessibility patents granted to women in tech in 2019 by using the COUNT function on the id column, filtering the data where the year is 2019 and community is Women in Tech." +}, { + "id": "3432", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total budget of projects focused on technology accessibility in Europe?", + "sql_context": "CREATE TABLE projects (id INT, budget FLOAT, focus VARCHAR(255)); INSERT INTO projects (id, budget, focus) VALUES (1, 100000.00, \u0027digital divide\u0027), (2, 150000.00, \u0027climate change\u0027), (3, 120000.00, \u0027technology accessibility\u0027), (4, 75000.00, \u0027healthcare\u0027), (5, 200000.00, \u0027technology accessibility\u0027);", + "sql": "SELECT SUM(budget) FROM projects WHERE focus \u003d \u0027technology accessibility\u0027 AND country \u003d \u0027Europe\u0027;", + "sql_explanation": "This query calculates the total budget of projects focused on technology accessibility in Europe. It does this by selecting the budget column and applying the SUM function to it, while also filtering the rows where the focus column is \u0027technology accessibility\u0027 and the country column is \u0027Europe\u0027." +}, { + "id": "3578", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many digital divide initiatives were completed in the last 3 years in Asia?", + "sql_context": "CREATE TABLE Digital_Divide_Initiatives_Year (Year INT, Initiatives INT);", + "sql": "SELECT SUM(Initiatives) FROM Digital_Divide_Initiatives_Year WHERE Year BETWEEN 2019 AND 2021;", + "sql_explanation": "Calculate the total number of digital divide initiatives completed in the last 3 years in Asia by summing the Initiatives column for years between 2019 and 2021." +}, { + "id": "3793", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of employees in companies that have implemented ethical AI in EMEA?", + "sql_context": "CREATE TABLE Companies (id INT, name TEXT, region TEXT, has_ethical_ai BOOLEAN, num_employees INT); INSERT INTO Companies (id, name, region, has_ethical_ai, num_employees) VALUES (1, \u0027TechCo\u0027, \u0027APAC\u0027, true, 1000), (2, \u0027GreenTech\u0027, \u0027APAC\u0027, true, 1500), (3, \u0027EthicalLabs\u0027, \u0027Americas\u0027, true, 750), (4, \u0027Tech4All\u0027, \u0027EMEA\u0027, true, 800), (5, \u0027InclusiveTech\u0027, \u0027APAC\u0027, false, 1200), (6, \u0027GlobalTech\u0027, \u0027EMEA\u0027, true, 3000), (7, \u0027SustainableAI\u0027, \u0027EMEA\u0027, true, 1500);", + "sql": "SELECT AVG(num_employees) FROM Companies WHERE region \u003d \u0027EMEA\u0027 AND has_ethical_ai \u003d true;", + "sql_explanation": "This query calculates the average number of employees in companies that have implemented ethical AI in EMEA. It does so by using the AVG function in combination with a WHERE clause, which filters the data to only include rows with the region column set to \u0027EMEA\u0027 and with the has_ethical_ai column set to true." +}, { + "id": "4014", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all the organizations involved in technology for social good in Asia.", + "sql_context": "CREATE TABLE organizations (id INT, name VARCHAR(50), region VARCHAR(50), involvement VARCHAR(50)); INSERT INTO organizations (id, name, region, involvement) VALUES (1, \u0027Tech4Good\u0027, \u0027Asia\u0027, \u0027social good\u0027), (2, \u0027GreenTechAsia\u0027, \u0027Asia\u0027, \u0027green technology\u0027), (3, \u0027AIforAsia\u0027, \u0027Asia\u0027, \u0027social good\u0027);", + "sql": "SELECT name FROM organizations WHERE region \u003d \u0027Asia\u0027 AND involvement \u003d \u0027social good\u0027;", + "sql_explanation": "This SQL query lists all the organizations involved in technology for social good in Asia. It does this by selecting the \u0027name\u0027 column, filtering the data where the \u0027region\u0027 column is \u0027Asia\u0027 and the \u0027involvement\u0027 column is \u0027social good\u0027." +}, { + "id": "4135", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which technology for social good projects have the highest budgets?", + "sql_context": "CREATE TABLE social_good_projects (id INT, project_name TEXT, budget INT); INSERT INTO social_good_projects (id, project_name, budget) VALUES (1, \u0027Smart Energy Grids\u0027, 900000), (2, \u0027AI for Disaster Response\u0027, 700000), (3, \u0027Accessible Technology Initiative\u0027, 800000);", + "sql": "SELECT project_name, budget FROM social_good_projects ORDER BY budget DESC LIMIT 2;", + "sql_explanation": "This query identifies the two technology for social good projects with the highest budgets. It first creates a table named social_good_projects with the id, project_name, and budget columns. It then inserts sample data into this table. The query itself selects the project_name and budget columns from the social_good_projects table, sorts the results in descending order by the budget, and limits the output to the top two results." +}, { + "id": "4208", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total budget allocated for ethical AI initiatives in Q1 2021?", + "sql_context": "CREATE TABLE ethical_ai_initiatives (id INT, initiative_name VARCHAR(255), funding_quarter VARCHAR(10), budget DECIMAL(10,2)); INSERT INTO ethical_ai_initiatives (id, initiative_name, funding_quarter, budget) VALUES (1, \u0027AI Ethics Research\u0027, \u0027Q1 2021\u0027, 30000), (2, \u0027AI Ethics Guidelines Development\u0027, \u0027Q2 2021\u0027, 25000);", + "sql": "SELECT SUM(budget) FROM ethical_ai_initiatives WHERE funding_quarter \u003d \u0027Q1 2021\u0027;", + "sql_explanation": "This query calculates the total budget allocated for ethical AI initiatives in Q1 2021 by using the SUM function and filtering the data for the specific quarter." +}, { + "id": "4231", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the \u0027status\u0027 column to \u0027completed\u0027 for all projects with \u0027project_id\u0027 between 100 and 200 in the \u0027ai_projects\u0027 table", + "sql_context": "CREATE TABLE ai_projects (id INT PRIMARY KEY, project_name VARCHAR(50), project_id INT, status VARCHAR(20));", + "sql": "UPDATE ai_projects SET status \u003d \u0027completed\u0027 WHERE project_id BETWEEN 100 AND 200;", + "sql_explanation": "This query updates the \u0027status\u0027 column to \u0027completed\u0027 for all projects with \u0027project_id\u0027 between 100 and 200 from the \u0027ai_projects\u0027 table." +}, { + "id": "4434", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update internet_speed column to 20 Mbps in digital_divide table where region is \u0027Rural America\u0027", + "sql_context": "CREATE TABLE digital_divide (region VARCHAR(255), internet_speed FLOAT, updated_on DATE);", + "sql": "UPDATE digital_divide SET internet_speed \u003d 20 WHERE region \u003d \u0027Rural America\u0027;", + "sql_explanation": "This query updates the internet_speed column in the digital_divide table to 20 Mbps for the records where the region is \u0027Rural America\u0027." +}, { + "id": "4470", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average budget for digital divide projects in South America.", + "sql_context": "CREATE TABLE digital_divide_projects (id INT, project_name VARCHAR(50), location VARCHAR(50), budget INT); INSERT INTO digital_divide_projects (id, project_name, location, budget) VALUES (1, \u0027Internet Access Expansion\u0027, \u0027Brazil\u0027, 100000), (2, \u0027Broadband Connectivity\u0027, \u0027Argentina\u0027, 150000), (3, \u0027Digital Literacy Program\u0027, \u0027Colombia\u0027, 75000);", + "sql": "SELECT AVG(budget) FROM digital_divide_projects WHERE location LIKE \u0027South%\u0027;", + "sql_explanation": "This query calculates the average budget for digital divide projects in South America by using the AVG function on the budget column, filtered by the location column starting with \u0027South\u0027." +}, { + "id": "4588", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the tech investment of specific companies.", + "sql_context": "CREATE TABLE companies (id INT, name VARCHAR(255), region VARCHAR(100), tech_investment INT);", + "sql": "UPDATE companies SET tech_investment \u003d 1000000 WHERE id IN (101, 103, 105);", + "sql_explanation": "The query uses the UPDATE statement to modify the tech_investment of specific companies with given IDs to 1000000, representing a higher level of tech investment." +}, { + "id": "4596", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which organizations focus on ethical AI in each continent?", + "sql_context": "CREATE TABLE ethics_by_continent (continent VARCHAR(50), name VARCHAR(50), focus VARCHAR(50)); INSERT INTO ethics_by_continent (continent, name, focus) VALUES (\u0027Asia\u0027, \u0027Ethics Asia\u0027, \u0027Ethical AI\u0027), (\u0027Africa\u0027, \u0027AI for Good\u0027, \u0027Ethical AI\u0027);", + "sql": "SELECT continent, name FROM ethics_by_continent WHERE focus \u003d \u0027Ethical AI\u0027;", + "sql_explanation": "This query selects the \u0027continent\u0027 and \u0027name\u0027 columns from the \u0027ethics_by_continent\u0027 table where the \u0027focus\u0027 is \u0027Ethical AI\u0027, showing organizations focusing on ethical AI in each continent." +}, { + "id": "4609", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average budget allocated for AI ethics initiatives in Asia and Africa?", + "sql_context": "CREATE TABLE AIEthicsBudget(initiative VARCHAR(255), region VARCHAR(255), budget DECIMAL(10,2));INSERT INTO AIEthicsBudget(initiative, region, budget) VALUES(\u0027Bias Mitigation\u0027, \u0027Asia\u0027, 50000.00), (\u0027Transparency\u0027, \u0027Africa\u0027, 45000.00), (\u0027Fairness\u0027, \u0027Asia\u0027, 60000.00), (\u0027Accountability\u0027, \u0027Africa\u0027, 55000.00);", + "sql": "SELECT AVG(budget) FROM AIEthicsBudget WHERE region IN (\u0027Asia\u0027, \u0027Africa\u0027);", + "sql_explanation": "This query calculates the average budget allocated for AI ethics initiatives in Asia and Africa by summing up their budgets and dividing by the count of initiatives in those regions." +}, { + "id": "4735", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of hours spent on ethical AI training by any employee?", + "sql_context": "CREATE TABLE employees(id INT, name TEXT, hours_training INT, role TEXT); INSERT INTO employees(id, name, hours_training, role) VALUES (1, \u0027David\u0027, 10, \u0027Data Scientist\u0027); INSERT INTO employees(id, name, hours_training, role) VALUES (2, \u0027Eve\u0027, 12, \u0027Data Scientist\u0027); INSERT INTO employees(id, name, hours_training, role) VALUES (3, \u0027Frank\u0027, 15, \u0027Engineer\u0027);", + "sql": "SELECT MAX(hours_training) FROM employees WHERE role \u003d \u0027Data Scientist\u0027;", + "sql_explanation": "This SQL query finds the maximum number of hours spent on ethical AI training by any data scientist by using the MAX function on the hours_training column, while filtering the rows with the WHERE clause to only include employees who are data scientists." +}, { + "id": "4922", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records of developers who work on technology for social good projects", + "sql_context": "CREATE TABLE developers (id INT, name VARCHAR(50), salary FLOAT, project VARCHAR(50)); INSERT INTO developers (id, name, salary, project) VALUES (1, \u0027Alice\u0027, 80000.0, \u0027Technology for Social Good\u0027); INSERT INTO developers (id, name, salary, project) VALUES (2, \u0027Bob\u0027, 85000.0, \u0027Machine Learning\u0027);", + "sql": "DELETE FROM developers WHERE project \u003d \u0027Technology for Social Good\u0027;", + "sql_explanation": "This query deletes all records of developers who work on technology for social good projects. It filters the developers table for rows where the project is \u0027Technology for Social Good\u0027 and then deletes those rows." +}, { + "id": "4982", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of people served by technology for social good programs in the \u0027rural\u0027 area in the \u0027digital_divide\u0027 table?", + "sql_context": "CREATE TABLE digital_divide (area TEXT, program TEXT, people_served INTEGER); INSERT INTO digital_divide (area, program, people_served) VALUES (\u0027Rural\u0027, \u0027Tech for Rural Communities\u0027, 7000); INSERT INTO digital_divide (area, program, people_served) VALUES (\u0027Rural\u0027, \u0027Digital Literacy for Farmers\u0027, 8000);", + "sql": "SELECT AVG(people_served) FROM digital_divide WHERE area \u003d \u0027Rural\u0027;", + "sql_explanation": "This SQL query calculates the average number of people served from the digital_divide table where the area is \u0027Rural\u0027." +}, { + "id": "5032", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records from the \"ai_ethics_training\" table where the \"training_date\" is before 2021-01-01", + "sql_context": "CREATE TABLE ai_ethics_training (id INT PRIMARY KEY, employee_name VARCHAR(50), training_date DATE); INSERT INTO ai_ethics_training (id, employee_name, training_date) VALUES (1, \u0027Alice Johnson\u0027, \u00272021-02-01\u0027); INSERT INTO ai_ethics_training (id, employee_name, training_date) VALUES (2, \u0027Bob Williams\u0027, \u00272020-12-15\u0027);", + "sql": "DELETE FROM ai_ethics_training WHERE training_date \u003c \u00272021-01-01\u0027;", + "sql_explanation": "This SQL query deletes all records from the \"ai_ethics_training\" table where the \"training_date\" is before 2021-01-01. It uses the DELETE statement with a WHERE clause to filter the records based on the given condition." +}, { + "id": "5068", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average budget spent on AI projects by organizations in the Asia-Pacific region?", + "sql_context": "CREATE TABLE ap_projects (organization_name TEXT, region TEXT, budget INTEGER); INSERT INTO ap_projects (organization_name, region, budget) VALUES (\u0027TechCorp\u0027, \u0027Asia-Pacific\u0027, 1500000), (\u0027InnoAI\u0027, \u0027Asia-Pacific\u0027, 1200000), (\u0027GreenTech\u0027, \u0027Asia-Pacific\u0027, 1800000);", + "sql": "SELECT AVG(budget) FROM ap_projects WHERE region \u003d \u0027Asia-Pacific\u0027;", + "sql_explanation": "This query calculates the average budget spent on AI projects by organizations in the Asia-Pacific region by summing up the budgets of those organizations and dividing by the number of organizations." +}, { + "id": "5088", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average budget for technology for social good projects in the Middle East?", + "sql_context": "CREATE TABLE social_good (project_id INT, budget FLOAT, region TEXT); INSERT INTO social_good (project_id, budget, region) VALUES (1, 35000, \u0027Middle East\u0027), (2, 50000, \u0027Europe\u0027), (3, 70000, \u0027Middle East\u0027);", + "sql": "SELECT AVG(budget) FROM social_good WHERE region \u003d \u0027Middle East\u0027;", + "sql_explanation": "This SQL query calculates the average budget for technology for social good projects in the Middle East. It uses the AVG function to find the mean value of the \u0027budget\u0027 column for rows where the \u0027region\u0027 column is \u0027Middle East\u0027." +}, { + "id": "5148", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum salary of employees who have received ethical AI training in 2022?", + "sql_context": "CREATE TABLE trainings(id INT, employee_id INT, date DATE); INSERT INTO trainings(id, employee_id, date) VALUES (1, 1, \u00272022-01-05\u0027); INSERT INTO trainings(id, employee_id, date) VALUES (2, 2, \u00272022-03-12\u0027); INSERT INTO trainings(id, employee_id, date) VALUES (3, 3, \u00272021-06-30\u0027); CREATE TABLE employees(id INT, name TEXT, salary FLOAT, training_id INT); INSERT INTO employees(id, name, salary, training_id) VALUES (1, \u0027Aisha\u0027, 90000.0, 1); INSERT INTO employees(id, name, salary, training_id) VALUES (2, \u0027Brian\u0027, 95000.0, 2); INSERT INTO employees(id, name, salary, training_id) VALUES (3, \u0027Carla\u0027, 85000.0, NULL);", + "sql": "SELECT MIN(salary) FROM employees WHERE training_id IS NOT NULL;", + "sql_explanation": "This SQL query finds the minimum salary of employees who have received ethical AI training in 2022 by using the MIN function on the salary column, while filtering the rows with the WHERE clause to only include employees who have a non-NULL value in the training_id column, indicating that they have received training." +}, { + "id": "5331", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total budget for technology for social good projects in Asia?", + "sql_context": "CREATE TABLE Tech_For_Good (project_id INT, project_name VARCHAR(100), region VARCHAR(50), budget FLOAT); INSERT INTO Tech_For_Good (project_id, project_name, region, budget) VALUES (1, \u0027Project A\u0027, \u0027Asia\u0027, 45000.00), (2, \u0027Project B\u0027, \u0027Africa\u0027, 55000.00), (3, \u0027Project C\u0027, \u0027Asia\u0027, 65000.00);", + "sql": "SELECT SUM(budget) FROM Tech_For_Good WHERE region \u003d \u0027Asia\u0027;", + "sql_explanation": "This query calculates the total budget for technology for social good projects in Asia. It uses the Tech_For_Good table and filters for projects in Asia, then sums the budget for those projects." +}, { + "id": "5346", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List schools with accessibility ratings above 4.", + "sql_context": "CREATE TABLE schools (id INT, name VARCHAR(255), accessibility_rating INT);", + "sql": "SELECT id, name FROM schools WHERE accessibility_rating \u003e 4;", + "sql_explanation": "The query uses the SELECT statement to filter schools with accessibility ratings above 4 by using the WHERE clause with the accessibility_rating field." +}, { + "id": "5363", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which countries have the least accessible technology initiatives?", + "sql_context": "CREATE TABLE least_accessible (name VARCHAR(50), initiatives INT); INSERT INTO least_accessible (name, initiatives) VALUES (\u0027Country A\u0027, 2), (\u0027Country B\u0027, 3), (\u0027Country C\u0027, 1);", + "sql": "SELECT name FROM least_accessible ORDER BY initiatives ASC;", + "sql_explanation": "This query selects the \u0027name\u0027 column from the \u0027least_accessible\u0027 table and orders the results in ascending order based on the \u0027initiatives\u0027 column, showing countries with the least accessible technology initiatives." +}, { + "id": "5727", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum budget spent on a single AI project?", + "sql_context": "CREATE TABLE ai_projects_budget (project_name TEXT, budget INTEGER); INSERT INTO ai_projects_budget (project_name, budget) VALUES (\u0027ProjectA\u0027, 1000000), (\u0027ProjectB\u0027, 2000000), (\u0027ProjectC\u0027, 3000000), (\u0027ProjectD\u0027, 4000000);", + "sql": "SELECT MAX(budget) FROM ai_projects_budget;", + "sql_explanation": "This query determines the maximum budget spent on a single AI project by finding the record with the highest budget value." +}, { + "id": "1772", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the number of farmers practicing organic farming methods in Oceania and South America in 2022.", + "sql_context": "CREATE TABLE Organic_Farming (Farmer_ID INT, Region VARCHAR(20), Farming_Method VARCHAR(20), Year INT); INSERT INTO Organic_Farming (Farmer_ID, Region, Farming_Method, Year) VALUES (901, \u0027Oceania\u0027, \u0027Organic\u0027, 2022), (902, \u0027South America\u0027, \u0027Organic\u0027, 2022);", + "sql": "SELECT COUNT(DISTINCT Farmer_ID) FROM Organic_Farming WHERE Region IN (\u0027Oceania\u0027, \u0027South America\u0027) AND Year \u003d 2022 AND Farming_Method \u003d \u0027Organic\u0027;", + "sql_explanation": "This query identifies the number of farmers practicing organic farming methods in Oceania and South America in 2022 by counting the distinct Farmer_ID values where Region is either Oceania or South America, Farming_Method is \u0027Organic\u0027, and Year is 2022." +}, { + "id": "3154", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new agroecological project \u0027Project D\u0027 located in \u0027India\u0027 with an area of 2.5 hectares into the \u0027agroecological_projects\u0027 table.", + "sql_context": "CREATE TABLE agroecological_projects (id INT, name TEXT, location TEXT, area_ha FLOAT);", + "sql": "INSERT INTO agroecological_projects (id, name, location, area_ha) VALUES (4, \u0027Project D\u0027, \u0027India\u0027, 2.5);", + "sql_explanation": "This query inserts a new record into the \u0027agroecological_projects\u0027 table with the specified values for \u0027name\u0027, \u0027location\u0027, and \u0027area_ha\u0027." +}, { + "id": "3208", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many indigenous food producers are there in urban areas?", + "sql_context": "CREATE TABLE indigenous_food_producers (region VARCHAR(50), producer_type VARCHAR(50)); INSERT INTO indigenous_food_producers (region, producer_type) VALUES (\u0027Rural\u0027, \u0027Non-indigenous\u0027), (\u0027Urban\u0027, \u0027Indigenous\u0027), (\u0027Urban\u0027, \u0027Non-indigenous\u0027);", + "sql": "SELECT COUNT(*) FROM indigenous_food_producers WHERE region \u003d \u0027Urban\u0027 AND producer_type \u003d \u0027Indigenous\u0027;", + "sql_explanation": "This SQL query counts the number of indigenous food producers in urban areas by filtering rows with \u0027region\u0027 as \u0027Urban\u0027 and \u0027producer_type\u0027 as \u0027Indigenous\u0027." +}, { + "id": "3462", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many farmers are involved in food justice initiatives in Canada and the USA?", + "sql_context": "CREATE TABLE Justice (FarmerID INT, Country VARCHAR(20), Initiative VARCHAR(20)); INSERT INTO Justice (FarmerID, Country, Initiative) VALUES (1, \u0027Canada\u0027, \u0027Food Justice\u0027), (2, \u0027USA\u0027, \u0027Food Justice\u0027), (3, \u0027Mexico\u0027, \u0027Conventional Agriculture\u0027);", + "sql": "SELECT COUNT(*) FROM Justice WHERE Country IN (\u0027Canada\u0027, \u0027USA\u0027) AND Initiative \u003d \u0027Food Justice\u0027;", + "sql_explanation": "Count the number of farmers involved in food justice initiatives in Canada and the USA with the \u0027COUNT\u0027 function, filtering for the specified countries and initiatives." +}, { + "id": "3476", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total budget (in USD) for agroforestry projects in Central America and how many of them are using permaculture techniques?", + "sql_context": "CREATE TABLE AgroforestryProject (id INT, region VARCHAR(50), budget DECIMAL(10,2), permaculture BOOLEAN); INSERT INTO AgroforestryProject (id, region, budget, permaculture) VALUES (1, \u0027Central America\u0027, 15000.0, true); INSERT INTO AgroforestryProject (id, region, budget, permaculture) VALUES (2, \u0027Central America\u0027, 20000.0, false);", + "sql": "SELECT SUM(budget), SUM(permaculture) FROM AgroforestryProject WHERE region \u003d \u0027Central America\u0027;", + "sql_explanation": "This query calculates the total budget (in USD) for agroforestry projects in Central America and how many of them are using permaculture techniques by using the SUM keywords on the budget and permaculture columns, and filtering the AgroforestryProject table with the WHERE keyword to only include rows with a region of Central America." +}, { + "id": "3930", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 5 countries in terms of organic farming area in 2018.", + "sql_context": "CREATE TABLE organic_farming (country VARCHAR(50), area INT, year INT); INSERT INTO organic_farming (country, area, year) VALUES (\u0027Australia\u0027, 22000, 2018); INSERT INTO organic_farming (country, area, year) VALUES (\u0027Argentina\u0027, 30000, 2018);", + "sql": "SELECT country, area FROM organic_farming WHERE year \u003d 2018 ORDER BY area DESC LIMIT 5;", + "sql_explanation": "We select the top 5 countries with the largest organic farming area in 2018 directly from the organic_farming table." +}, { + "id": "3962", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the farmers in \u0027Springfield\u0027 and what crops do they grow?", + "sql_context": "CREATE TABLE farmers (id INT PRIMARY KEY, name VARCHAR(50), location VARCHAR(50), crop VARCHAR(50)); INSERT INTO farmers (id, name, location, crop) VALUES (1, \u0027John Doe\u0027, \u0027Springfield\u0027, \u0027Corn\u0027), (2, \u0027Jim Brown\u0027, \u0027Springfield\u0027, \u0027Potatoes\u0027);", + "sql": "SELECT farmers.name, farmers.crop FROM farmers WHERE farmers.location \u003d \u0027Springfield\u0027;", + "sql_explanation": "The SQL query selects the name and crop columns from the farmers table where the location is Springfield." +}, { + "id": "4090", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the difference in yield between the highest and lowest yield crops in the Atlantic region in 2022.", + "sql_context": "CREATE TABLE crops (id INT, name VARCHAR(50), yield INT, acrate DECIMAL(5,2), region VARCHAR(50), year INT); INSERT INTO crops (id, name, yield, acrate, region, year) VALUES (1, \u0027Corn\u0027, 200, 2.3, \u0027Atlantic\u0027, 2022), (2, \u0027Soybeans\u0027, 120, 2.2, \u0027Atlantic\u0027, 2022), (3, \u0027Wheat\u0027, 180, 2.5, \u0027Atlantic\u0027, 2022);", + "sql": "SELECT MAX(yield) - MIN(yield) FROM crops WHERE region \u003d \u0027Atlantic\u0027 AND year \u003d 2022;", + "sql_explanation": "Determines the difference in yield between the highest and lowest yield crops in the Atlantic region in 2022 by subtracting the minimum yield from the maximum yield where the region is Atlantic and the year is 2022." +}, { + "id": "4112", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which urban farms have produced more than 100 tons of food in 2022?", + "sql_context": "CREATE TABLE urban_farms (id INT, name VARCHAR(255), year INT, production INT);", + "sql": "SELECT name, production FROM urban_farms WHERE year \u003d 2022 AND production \u003e 100000;", + "sql_explanation": "The SQL query filters the urban_farms table for records from the year 2022 with a production value greater than 100 tons." +}, { + "id": "4138", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many urban farms are there in Canada and Australia?", + "sql_context": "CREATE TABLE urban_farms (id INT, name TEXT, country TEXT); INSERT INTO urban_farms (id, name, country) VALUES (1, \u0027Farm 1\u0027, \u0027Canada\u0027), (2, \u0027Farm 2\u0027, \u0027Australia\u0027);", + "sql": "SELECT COUNT(*) as count FROM urban_farms WHERE country IN (\u0027Canada\u0027, \u0027Australia\u0027);", + "sql_explanation": "This query calculates the number of urban farms that exist in Canada and Australia by using the COUNT function and the IN operator to filter the countries." +}, { + "id": "4157", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many indigenous food systems in the \u0027agroecology\u0027 table are located in the \u0027Andes\u0027 region?", + "sql_context": "CREATE TABLE agroecology (id INT, name TEXT, location TEXT, type TEXT); INSERT INTO agroecology (id, name, location, type) VALUES (1, \u0027System 1\u0027, \u0027Andes\u0027, \u0027Indigenous\u0027); INSERT INTO agroecology (id, name, location, type) VALUES (2, \u0027System 2\u0027, \u0027Amazon\u0027, \u0027Agroforestry\u0027);", + "sql": "SELECT COUNT(*) FROM agroecology WHERE location \u003d \u0027Andes\u0027 AND type \u003d \u0027Indigenous\u0027;", + "sql_explanation": "This SQL query counts the number of indigenous food systems located in the \u0027Andes\u0027 region by selecting rows from the \u0027agroecology\u0027 table where the \u0027location\u0027 column is equal to \u0027Andes\u0027 and the \u0027type\u0027 column is equal to \u0027Indigenous\u0027. The COUNT() function is used to count the number of rows that meet these criteria." +}, { + "id": "4199", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total yield of organic crops in California and Texas in 2020?", + "sql_context": "CREATE TABLE organic_farms (id INT, state VARCHAR(2), year INT, yield INT); INSERT INTO organic_farms (id, state, year, yield) VALUES (1, \u0027CA\u0027, 2020, 1500), (2, \u0027TX\u0027, 2020, 1200);", + "sql": "SELECT SUM(yield) FROM organic_farms WHERE state IN (\u0027CA\u0027, \u0027TX\u0027) AND year \u003d 2020;", + "sql_explanation": "This query calculates the total yield of organic crops in California and Texas in 2020 by summing the yield column where the state is either CA or TX and the year is 2020." +}, { + "id": "4207", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total area of all community gardens in New York City?", + "sql_context": "CREATE TABLE community_gardens (garden_id INT, name TEXT, location TEXT, area REAL, city TEXT, state TEXT, zip_code TEXT); INSERT INTO community_gardens (garden_id, name, location, area, city, state, zip_code) VALUES (1, \u0027Green Oasis\u0027, \u0027123 Main St\u0027, 0.25, \u0027New York\u0027, \u0027NY\u0027, \u002710001\u0027);", + "sql": "SELECT SUM(area) FROM community_gardens WHERE city \u003d \u0027New York\u0027 AND state \u003d \u0027NY\u0027;", + "sql_explanation": "This SQL query calculates the total area of all community gardens located in New York City by summing up the \u0027area\u0027 column in the \u0027community_gardens\u0027 table, filtering by city and state." +}, { + "id": "4386", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of transactions involving \u0027non-GMO\u0027 produce in the \u0027Midwest\u0027 region.", + "sql_context": "CREATE TABLE transactions (id INT, product TEXT, region TEXT, non_gmo BOOLEAN); INSERT INTO transactions (id, product, region, non_gmo) VALUES (3, \u0027Product 3\u0027, \u0027Midwest\u0027, true), (4, \u0027Product 4\u0027, \u0027West\u0027, false);", + "sql": "SELECT COUNT(*) FROM transactions WHERE region \u003d \u0027Midwest\u0027 AND non_gmo \u003d true;", + "sql_explanation": "The SQL query calculates the number of transactions involving \u0027non-GMO\u0027 produce in the \u0027Midwest\u0027 region by counting the number of rows where the \u0027region\u0027 is \u0027Midwest\u0027 and the \u0027non_gmo\u0027 column is true." +}, { + "id": "4487", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of livestock in South American countries?", + "sql_context": "CREATE TABLE livestock_count (country VARCHAR(255), livestock_count INT); INSERT INTO livestock_count (country, livestock_count) VALUES (\u0027Brazil\u0027, 220000000), (\u0027Argentina\u0027, 55000000), (\u0027Colombia\u0027, 40000000);", + "sql": "SELECT MAX(livestock_count) FROM livestock_count WHERE country LIKE \u0027South%\u0027", + "sql_explanation": "Find the maximum number of livestock in South American countries by finding the maximum value in the \u0027livestock_count\u0027 column for rows with \u0027country\u0027 values starting with \u0027South\u0027." +}, { + "id": "4765", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum yield of \u0027corn\u0027 in \u0027region4\u0027?", + "sql_context": "CREATE TABLE farm (id INT, region VARCHAR(20), crop VARCHAR(20), yield INT); INSERT INTO farm (id, region, crop, yield) VALUES (1, \u0027region4\u0027, \u0027corn\u0027, 70), (2, \u0027region4\u0027, \u0027soybean\u0027, 80);", + "sql": "SELECT MAX(yield) FROM farm WHERE region \u003d \u0027region4\u0027 AND crop \u003d \u0027corn\u0027;", + "sql_explanation": "The SQL query finds the maximum yield of corn in region4 by selecting all records with crop \u0027corn\u0027 and region \u0027region4\u0027 and computing the maximum yield." +}, { + "id": "5056", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many crops are grown in \u0027family_farms\u0027 table for region \u002706\u0027?", + "sql_context": "CREATE TABLE family_farms (id INT, region VARCHAR(10), crop VARCHAR(20));", + "sql": "SELECT COUNT(DISTINCT crop) FROM family_farms WHERE region \u003d \u002706\u0027;", + "sql_explanation": "This query counts the number of unique crops grown in the \u0027family_farms\u0027 table for the region \u002706\u0027. It selects the COUNT function with DISTINCT keyword for the \u0027crop\u0027 column from the \u0027family_farms\u0027 table where the \u0027region\u0027 is \u002706\u0027." +}, { + "id": "5065", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average temperature in the greenhouses located in the \u0027urban\u0027 area?", + "sql_context": "CREATE TABLE greenhouses (id INT, name VARCHAR(20), location VARCHAR(10), temperature DECIMAL(5,2));", + "sql": "SELECT AVG(temperature) FROM greenhouses WHERE location \u003d \u0027urban\u0027;", + "sql_explanation": "This query calculates the average temperature from the \u0027temperature\u0027 column in the \u0027greenhouses\u0027 table for all greenhouses located in the \u0027urban\u0027 area." +}, { + "id": "5125", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of farms that practice sustainable agriculture in the sustainable_farms dataset?", + "sql_context": "CREATE TABLE sustainable_farms (id INT, farm_name VARCHAR(255), sustainable BOOLEAN);", + "sql": "SELECT COUNT(*) FROM sustainable_farms WHERE sustainable \u003d TRUE;", + "sql_explanation": "This query calculates the total number of farms that practice sustainable agriculture in the sustainable_farms table by applying the COUNT function on all records (*) and filtering the records where the sustainable column is TRUE." +}, { + "id": "5364", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average \u0027yield\u0027 for \u0027potatoes\u0027 in the \u0027crop_yields\u0027 table?", + "sql_context": "CREATE TABLE crop_yields (id INT, farm_id INT, crop VARCHAR(50), yield FLOAT);", + "sql": "SELECT AVG(yield) FROM crop_yields WHERE crop \u003d \u0027potatoes\u0027;", + "sql_explanation": "The query calculates the average yield for the \u0027potatoes\u0027 crop in the \u0027crop_yields\u0027 table." +}, { + "id": "5534", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of urban farms larger than 20 hectares?", + "sql_context": "CREATE TABLE urban_farms (farmer_id INT, farm_size FLOAT); INSERT INTO urban_farms (farmer_id, farm_size) VALUES (1, 15.3), (2, 22.8), (3, 19.4), (4, 35.1);", + "sql": "SELECT COUNT(*) FROM urban_farms WHERE farm_size \u003e 20;", + "sql_explanation": "This query counts the number of records in the \u0027urban_farms\u0027 table where the \u0027farm_size\u0027 is greater than 20 hectares." +}, { + "id": "5685", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum area (in hectares) of an indigenous food system?", + "sql_context": "CREATE TABLE indigenous_food_systems (system_id INT, system_name TEXT, area FLOAT); INSERT INTO indigenous_food_systems (system_id, system_name, area) VALUES (1, \u0027Acorn Farming\u0027, 12.5), (2, \u0027Maple Syrup Production\u0027, 18.7), (3, \u0027Bison Ranching\u0027, 25.0);", + "sql": "SELECT MAX(area) FROM indigenous_food_systems;", + "sql_explanation": "The SQL query identifies the highest \u0027area\u0027 value in the \u0027indigenous_food_systems\u0027 table, which represents the maximum area of an indigenous food system." +}, { + "id": "5748", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many farms are there in each country?", + "sql_context": "CREATE TABLE FarmCount (country VARCHAR(50), num_farms INT); INSERT INTO FarmCount (country, num_farms) VALUES (\u0027USA\u0027, 5000), (\u0027Canada\u0027, 4000), (\u0027Mexico\u0027, 3000);", + "sql": "SELECT country, num_farms FROM FarmCount;", + "sql_explanation": "The SQL query retrieves the number of farms in each country by selecting the country and num_farms columns from the FarmCount table." +}, { + "id": "5749", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records from the \u0027crops\u0027 table where the temperature is below 15 degrees.", + "sql_context": "CREATE TABLE crops (id INT, crop_type VARCHAR(255), temperature FLOAT); INSERT INTO crops (id, crop_type, temperature) VALUES (1, \u0027corn\u0027, 20.5), (2, \u0027soybean\u0027, 18.3), (3, \u0027wheat\u0027, 16.7), (4, \u0027rice\u0027, 14.2), (5, \u0027barley\u0027, 12.9);", + "sql": "DELETE FROM crops WHERE temperature \u003c 15;", + "sql_explanation": "The SQL query deletes all records from the \u0027crops\u0027 table where the temperature is below 15 degrees. It uses the WHERE clause to filter the records based on the \u0027temperature\u0027 column." +}, { + "id": "5764", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete the row with the animal id 2 from the \"animals\" table", + "sql_context": "CREATE TABLE animals (animal_id SERIAL PRIMARY KEY, name VARCHAR(255), species VARCHAR(255)); INSERT INTO animals (animal_id, name, species) VALUES (1, \u0027Lion\u0027, \u0027Feline\u0027), (2, \u0027Tiger\u0027, \u0027Feline\u0027), (3, \u0027Bear\u0027, \u0027Ursidae\u0027);", + "sql": "DELETE FROM animals WHERE animal_id \u003d 2;", + "sql_explanation": "1. Deletes the row with the animal id 2 from the \"animals\" table." +}, { + "id": "5800", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total area of urban gardens in the \u0027urban_gardens\u0027 table?", + "sql_context": "CREATE TABLE urban_gardens (id INT, name VARCHAR(20), location VARCHAR(30), area DECIMAL(5,2));", + "sql": "SELECT SUM(area) FROM urban_gardens;", + "sql_explanation": "This query calculates the total area of urban gardens by selecting the \u0027area\u0027 column and applying the SUM() function to it. It does not apply any filtering conditions." +}, { + "id": "1727", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many tons of seafood were imported by Japan from Australia in 2019?", + "sql_context": "CREATE TABLE seafood_exports (id INT, export_date DATE, export_country VARCHAR(50), import_country VARCHAR(50), quantity INT, unit_type VARCHAR(10)); INSERT INTO seafood_exports (id, export_date, export_country, import_country, quantity, unit_type) VALUES (1, \u00272019-01-01\u0027, \u0027Japan\u0027, \u0027Australia\u0027, 500, \u0027ton\u0027), (2, \u00272019-01-02\u0027, \u0027Canada\u0027, \u0027US\u0027, 300, \u0027ton\u0027), (3, \u00272021-01-01\u0027, \u0027Canada\u0027, \u0027Japan\u0027, 600, \u0027ton\u0027);", + "sql": "SELECT SUM(quantity) FROM seafood_exports WHERE export_country \u003d \u0027Australia\u0027 AND import_country \u003d \u0027Japan\u0027 AND EXTRACT(YEAR FROM export_date) \u003d 2019;", + "sql_explanation": "This query calculates the total quantity (SUM(quantity)) from the seafood_exports table where the exporting country is Australia (export_country \u003d \u0027Australia\u0027), the importing country is Japan (import_country \u003d \u0027Japan\u0027), and the export date is in 2019 (EXTRACT(YEAR FROM export_date) \u003d 2019)." +}, { + "id": "2232", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the minimum water temperature in the Atlantic Ocean Monitoring Station in June 2020?", + "sql_context": "CREATE TABLE atlantic_ocean_monitoring_station (date DATE, temperature FLOAT);", + "sql": "SELECT MIN(temperature) AS min_temperature FROM atlantic_ocean_monitoring_station WHERE date BETWEEN \u00272020-06-01\u0027 AND \u00272020-06-30\u0027;", + "sql_explanation": "This SQL query calculates the minimum water temperature in the Atlantic Ocean Monitoring Station in June 2020. It uses the MIN function to find the minimum temperature in the specified date range." +}, { + "id": "2346", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average water temperature in the Pacific Ocean for January, for the past 5 years, from the temperature_data table?", + "sql_context": "CREATE TABLE temperature_data (date DATE, ocean TEXT, temperature FLOAT); INSERT INTO temperature_data (date, ocean, temperature) VALUES (\u00272018-01-01\u0027, \u0027Pacific\u0027, 12.5); INSERT INTO temperature_data (date, ocean, temperature) VALUES (\u00272019-01-01\u0027, \u0027Pacific\u0027, 13.0); INSERT INTO temperature_data (date, ocean, temperature) VALUES (\u00272020-01-01\u0027, \u0027Pacific\u0027, 11.8); INSERT INTO temperature_data (date, ocean, temperature) VALUES (\u00272021-01-01\u0027, \u0027Pacific\u0027, 12.3); INSERT INTO temperature_data (date, ocean, temperature) VALUES (\u00272022-01-01\u0027, \u0027Pacific\u0027, 12.9);", + "sql": "SELECT AVG(temperature) FROM temperature_data WHERE ocean \u003d \u0027Pacific\u0027 AND MONTH(date) \u003d 1 AND YEAR(date) BETWEEN 2018 AND 2022;", + "sql_explanation": "Calculates the average temperature for January in the Pacific Ocean from the temperature_data table, considering the past 5 years." +}, { + "id": "3177", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the dissolved oxygen levels for fish farms in the Atlantic ocean?", + "sql_context": "CREATE TABLE atlantic_fish_farms (id INT, name VARCHAR(50), country VARCHAR(50), dissolved_oxygen FLOAT); INSERT INTO atlantic_fish_farms (id, name, country, dissolved_oxygen) VALUES (1, \u0027Farm G\u0027, \u0027USA\u0027, 6.8), (2, \u0027Farm H\u0027, \u0027Canada\u0027, 7.2), (3, \u0027Farm I\u0027, \u0027USA\u0027, 7.0), (4, \u0027Farm J\u0027, \u0027Brazil\u0027, 6.5);", + "sql": "SELECT country, dissolved_oxygen FROM atlantic_fish_farms WHERE country IN (\u0027USA\u0027, \u0027Canada\u0027, \u0027Brazil\u0027);", + "sql_explanation": "This query displays the dissolved oxygen levels for fish farms in the Atlantic ocean by filtering the atlantic_fish_farms table to only show rows with country values in (\u0027USA\u0027, \u0027Canada\u0027, \u0027Brazil\u0027)." +}, { + "id": "3920", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a table named \u0027fish_species\u0027", + "sql_context": "CREATE TABLE fish_species (id INT PRIMARY KEY, name VARCHAR(255), family VARCHAR(255));", + "sql": "CREATE TABLE fish_species (id INT PRIMARY KEY, name VARCHAR(255), family VARCHAR(255));", + "sql_explanation": "A new table named \u0027fish_species\u0027 is created with columns \u0027id\u0027, \u0027name\u0027, and \u0027family\u0027. The \u0027id\u0027 column is the primary key." +}, { + "id": "4036", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum pH level in brackish water shrimp farms in India?", + "sql_context": "CREATE TABLE water_params (id INT, farm_id INT, country TEXT, water_type TEXT, pH DECIMAL(4,2)); INSERT INTO water_params (id, farm_id, country, water_type, pH) VALUES (1, 1, \u0027India\u0027, \u0027Brackish\u0027, 7.8); INSERT INTO water_params (id, farm_id, country, water_type, pH) VALUES (2, 2, \u0027India\u0027, \u0027Brackish\u0027, 7.6); INSERT INTO water_params (id, farm_id, country, water_type, pH) VALUES (3, 3, \u0027India\u0027, \u0027Freshwater\u0027, 7.9);", + "sql": "SELECT MIN(pH) FROM water_params WHERE country \u003d \u0027India\u0027 AND water_type \u003d \u0027Brackish\u0027;", + "sql_explanation": "This query finds the minimum pH level in all brackish water shrimp farms in India." +}, { + "id": "4074", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many species of fish are present in sustainable seafood trend reports from the last 5 years?", + "sql_context": "CREATE TABLE seafood_trends (year INT, species VARCHAR(50)); INSERT INTO seafood_trends (year, species) VALUES (2017, \u0027Salmon\u0027), (2017, \u0027Tuna\u0027), (2017, \u0027Shrimp\u0027), (2018, \u0027Cod\u0027), (2018, \u0027Salmon\u0027), (2018, \u0027Tuna\u0027), (2019, \u0027Shrimp\u0027), (2019, \u0027Cod\u0027), (2019, \u0027Salmon\u0027), (2020, \u0027Tuna\u0027), (2020, \u0027Shrimp\u0027), (2021, \u0027Cod\u0027), (2021, \u0027Salmon\u0027), (2021, \u0027Tuna\u0027), (2021, \u0027Pollock\u0027);", + "sql": "SELECT COUNT(DISTINCT species) FROM seafood_trends WHERE year BETWEEN 2016 AND 2021;", + "sql_explanation": "This query calculates the number of distinct fish species in sustainable seafood trend reports from the last 5 years by counting the unique species values for which the year is between 2016 and 2021." +}, { + "id": "4093", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of fish in fish farms located in the North Atlantic Ocean?", + "sql_context": "CREATE TABLE fish_farms (id INT, name TEXT, location TEXT, number_of_fish INT); INSERT INTO fish_farms (id, name, location, number_of_fish) VALUES (1, \u0027Farm A\u0027, \u0027North Atlantic Ocean\u0027, 1000), (2, \u0027Farm B\u0027, \u0027South Atlantic Ocean\u0027, 1200), (3, \u0027Farm C\u0027, \u0027North Atlantic Ocean\u0027, 1500);", + "sql": "SELECT SUM(number_of_fish) FROM fish_farms WHERE location \u003d \u0027North Atlantic Ocean\u0027;", + "sql_explanation": "This query calculates the total number of fish in fish farms located in the North Atlantic Ocean by filtering the fish_farms table for rows with the location \u0027North Atlantic Ocean\u0027 and then using the SUM function to calculate the total number of fish." +}, { + "id": "4239", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many marine fish farms in Indonesia use recirculating aquaculture systems (RAS)?", + "sql_context": "CREATE TABLE marinefarms (country VARCHAR(20), uses_ras BOOLEAN); INSERT INTO marinefarms (country, uses_ras) VALUES (\u0027Indonesia\u0027, true), (\u0027Indonesia\u0027, false), (\u0027Philippines\u0027, true);", + "sql": "SELECT COUNT(*) FROM marinefarms WHERE country \u003d \u0027Indonesia\u0027 AND uses_ras \u003d true;", + "sql_explanation": "This query counts the number of marine fish farms in Indonesia (country) that use recirculating aquaculture systems (RAS) (uses_ras). It does this by selecting the count of all records where country is \u0027Indonesia\u0027 and uses_ras is true." +}, { + "id": "4298", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average stocking density of Tilapia in Egyptian ponds?", + "sql_context": "CREATE TABLE egyptian_farms (farmer_id INT, fish_species TEXT, stocking_density FLOAT); INSERT INTO egyptian_farms (farmer_id, fish_species, stocking_density) VALUES (1, \u0027Tilapia\u0027, 3.0), (2, \u0027Catfish\u0027, 2.5), (3, \u0027Tilapia\u0027, 2.8);", + "sql": "SELECT AVG(stocking_density) FROM egyptian_farms WHERE fish_species \u003d \u0027Tilapia\u0027;", + "sql_explanation": "This query calculates the average stocking density of Tilapia in Egyptian ponds by filtering the egyptian_farms table for records where the fish_species is Tilapia, and then computing the average stocking_density for those records." +}, { + "id": "4443", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average water temperature in the Baltic Sea region?", + "sql_context": "CREATE TABLE ocean_temperature (id INT, location TEXT, temperature FLOAT); INSERT INTO ocean_temperature (id, location, temperature) VALUES (1, \u0027Baltic Sea\u0027, 10.5); INSERT INTO ocean_temperature (id, location, temperature) VALUES (2, \u0027Atlantic Ocean\u0027, 15.8); INSERT INTO ocean_temperature (id, location, temperature) VALUES (3, \u0027Mediterranean Sea\u0027, 20.2); INSERT INTO ocean_temperature (id, location, temperature) VALUES (4, \u0027Pacific Ocean\u0027, 18.9);", + "sql": "SELECT AVG(temperature) FROM ocean_temperature WHERE location \u003d \u0027Baltic Sea\u0027;", + "sql_explanation": "This query calculates the average water temperature in the Baltic Sea region by using the AVG function on the temperature column, and filtering for rows where the location is \u0027Baltic Sea\u0027." +}, { + "id": "4529", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add a new coldwater fish species \u0027Trout\u0027 to fish_species table.", + "sql_context": "CREATE TABLE fish_species (id INT, name VARCHAR(255), species_type VARCHAR(255)); INSERT INTO fish_species (id, name, species_type) VALUES (1, \u0027Salmon\u0027, \u0027Coldwater\u0027), (2, \u0027Tilapia\u0027, \u0027Tropical\u0027);", + "sql": "INSERT INTO fish_species (name, species_type) VALUES (\u0027Trout\u0027, \u0027Coldwater\u0027);", + "sql_explanation": "Insert a new record into the fish_species table for fish species \u0027Trout\u0027." +}, { + "id": "4590", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average water temperature for tropical fish habitats?", + "sql_context": "CREATE TABLE fish_habitats (id INT, fish_id INT, habitat_type VARCHAR(20), temperature DECIMAL(5,2)); INSERT INTO fish_habitats (id, fish_id, habitat_type, temperature) VALUES (1, 1, \u0027tropical\u0027, 28.3); INSERT INTO fish_habitats (id, fish_id, habitat_type, temperature) VALUES (2, 2, \u0027temperate\u0027, 15.5);", + "sql": "SELECT AVG(temperature) FROM fish_habitats WHERE habitat_type \u003d \u0027tropical\u0027;", + "sql_explanation": "The SQL query calculates the average temperature for the tropical fish habitats by filtering the fish_habitats table where habitat_type is \u0027tropical\u0027 and then using the AVG function to find the average temperature." +}, { + "id": "5013", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the fish stock data for farm A to 5000", + "sql_context": "CREATE TABLE fish_stock (id INT, farm_name VARCHAR(50), fish_count INT); INSERT INTO fish_stock (id, farm_name, fish_count) VALUES (1, \u0027Farm A\u0027, 3000); INSERT INTO fish_stock (id, farm_name, fish_count) VALUES (2, \u0027Farm B\u0027, 4000);", + "sql": "UPDATE fish_stock SET fish_count \u003d 5000 WHERE farm_name \u003d \u0027Farm A\u0027;", + "sql_explanation": "The SQL query updates the fish_count value for farm_name \u0027Farm A\u0027 in the fish_stock table to 5000 using the UPDATE statement and the SET clause. The WHERE clause is used to filter the results to only include rows where farm_name is \u0027Farm A\u0027." +}, { + "id": "5101", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average phosphate levels in the Baltic Sea for the month of May.", + "sql_context": "CREATE TABLE Baltic_Sea (phosphate FLOAT, month DATE); INSERT INTO Baltic_Sea (phosphate, month) VALUES (0.25, \u00272022-05-01\u0027); INSERT INTO Baltic_Sea (phosphate, month) VALUES (0.32, \u00272022-05-15\u0027);", + "sql": "SELECT AVG(phosphate) FROM Baltic_Sea WHERE month \u003d \u00272022-05-01\u0027;", + "sql_explanation": "Joining the Baltic_Sea table, we filter for the month of May and calculate the average phosphate levels." +}, { + "id": "5368", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total capacity for fish farming in Colombia?", + "sql_context": "CREATE TABLE Farm (FarmID INT, FishSpecies VARCHAR(50), Capacity INT, Location VARCHAR(50)); INSERT INTO Farm (FarmID, FishSpecies, Capacity, Location) VALUES (1, \u0027Salmon\u0027, 5000, \u0027Norway\u0027), (2, \u0027Tilapia\u0027, 3000, \u0027Indonesia\u0027), (3, \u0027Carp\u0027, 4000, \u0027Canada\u0027), (4, \u0027Pangasius\u0027, 2000, \u0027Colombia\u0027);", + "sql": "SELECT SUM(Capacity) FROM Farm WHERE Location \u003d \u0027Colombia\u0027;", + "sql_explanation": "The SQL query selects the SUM of the Capacity column from the Farm table where the Location is Colombia. The query returns the total capacity for fish farming in Colombia." +}, { + "id": "5461", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum stocking density of fish in any farm type?", + "sql_context": "CREATE TABLE farm_stocking_density (farm_id INT, farm_type VARCHAR(255), stocking_density INT); INSERT INTO farm_stocking_density (farm_id, farm_type, stocking_density) VALUES (1, \u0027Pond\u0027, 1200), (2, \u0027Cage\u0027, 1500), (3, \u0027Recirculating\u0027, 2000), (4, \u0027Pond\u0027, 800), (5, \u0027Cage\u0027, 1200);", + "sql": "SELECT MIN(stocking_density) FROM farm_stocking_density;", + "sql_explanation": "This SQL query calculates the minimum stocking density of fish in any farm type. It uses the MIN function to find the lowest stocking density in the farm_stocking_density table." +}, { + "id": "5496", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new farm \u0027Farm C\u0027 into aquaculture_farms table.", + "sql_context": "CREATE TABLE aquaculture_farms (id INT, name VARCHAR(255)); INSERT INTO aquaculture_farms (id, name) VALUES (1, \u0027Farm A\u0027), (2, \u0027Farm B\u0027);", + "sql": "INSERT INTO aquaculture_farms (name) VALUES (\u0027Farm C\u0027);", + "sql_explanation": "Insert a new record into the aquaculture_farms table for farm \u0027Farm C\u0027." +}, { + "id": "5668", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete the feed type \u0027flakes\u0027 from the \u0027feeding\u0027 table", + "sql_context": "CREATE TABLE fish_stock (fish_id INT PRIMARY KEY, species VARCHAR(50), location VARCHAR(50), biomass FLOAT); CREATE TABLE feeding (feed_id INT PRIMARY KEY, feed_type VARCHAR(50), nutrients FLOAT); INSERT INTO feeding (feed_id, feed_type, nutrients) VALUES (1, \u0027pellets\u0027, 350), (2, \u0027flakes\u0027, 280), (3, \u0027mash\u0027, 420);", + "sql": "DELETE FROM feeding WHERE feed_type \u003d \u0027flakes\u0027;", + "sql_explanation": "Delete rows from the \u0027feeding\u0027 table where the \u0027feed_type\u0027 is \u0027flakes\u0027." +}, { + "id": "5726", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average water temperature for all fish habitats?", + "sql_context": "CREATE TABLE fish_habitats (id INT, fish_id INT, habitat_type VARCHAR(20), temperature DECIMAL(5,2)); INSERT INTO fish_habitats (id, fish_id, habitat_type, temperature) VALUES (1, 1, \u0027tropical\u0027, 28.3); INSERT INTO fish_habitats (id, fish_id, habitat_type, temperature) VALUES (2, 2, \u0027temperate\u0027, 15.5);", + "sql": "SELECT AVG(temperature) FROM fish_habitats;", + "sql_explanation": "The SQL query calculates the average temperature for all fish habitats by using the AVG function to find the average temperature from the fish_habitats table. There is no filtering in this query." +}, { + "id": "5730", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records for Tank6 that have a growth rate of less than 0.02 kg/day.", + "sql_context": "CREATE TABLE Tank6 (species VARCHAR(20), individual_id INT, growth_rate FLOAT); INSERT INTO Tank6 (species, individual_id, growth_rate) VALUES (\u0027Tilapia\u0027, 1, 0.025), (\u0027Tilapia\u0027, 2, 0.015), (\u0027Cod\u0027, 1, 0.02), (\u0027Cod\u0027, 2, 0.03);", + "sql": "DELETE FROM Tank6 WHERE growth_rate \u003c 0.02;", + "sql_explanation": "The SQL query deletes all records for Tank6 that have a growth rate of less than 0.02 kg/day. It uses the DELETE statement to remove the records that meet the specified condition." +}, { + "id": "5812", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total nutrient value of feed in the \u0027feeding\u0027 table?", + "sql_context": "CREATE TABLE fish_stock (fish_id INT PRIMARY KEY, species VARCHAR(50), location VARCHAR(50), biomass FLOAT); CREATE TABLE feeding (feed_id INT PRIMARY KEY, feed_type VARCHAR(50), nutrients FLOAT); INSERT INTO feeding (feed_id, feed_type, nutrients) VALUES (1, \u0027pellets\u0027, 350), (2, \u0027flakes\u0027, 280), (3, \u0027mash\u0027, 420);", + "sql": "SELECT SUM(nutrients) FROM feeding;", + "sql_explanation": "Sum the \u0027nutrients\u0027 column values from the \u0027feeding\u0027 table." +}, { + "id": "1846", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the hotels with the highest energy consumption per square meter.", + "sql_context": "CREATE TABLE hotels (hotel_id INT, hotel_name TEXT, city TEXT, country TEXT, energy_consumption FLOAT, area FLOAT); INSERT INTO hotels (hotel_id, hotel_name, city, country, energy_consumption, area) VALUES (1, \u0027Hotel A\u0027, \u0027Rome\u0027, \u0027Italy\u0027, 12000.0, 5000.0), (2, \u0027Hotel B\u0027, \u0027Paris\u0027, \u0027France\u0027, 15000.0, 7000.0);", + "sql": "SELECT hotel_name, energy_consumption/area as energy_consumption_per_square_meter FROM hotels ORDER BY energy_consumption_per_square_meter DESC;", + "sql_explanation": "This query calculates the energy consumption per square meter for each hotel by dividing the energy consumption by the area and returns the hotels with the highest energy consumption per square meter by using the ORDER BY clause." +}, { + "id": "2196", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add a new record to the \"sustainable_practices\" table with an ID of 6, a description of \u0027Recycling initiatives in housekeeping\u0027, and a category of \u0027Waste\u0027", + "sql_context": "CREATE TABLE sustainable_practices (practice_id INT, description TEXT, category VARCHAR(20));", + "sql": "INSERT INTO sustainable_practices (practice_id, description, category) VALUES (6, \u0027Recycling initiatives in housekeeping\u0027, \u0027Waste\u0027);", + "sql_explanation": "This query adds a new record to the \"sustainable_practices\" table with an ID of 6, a description of \u0027Recycling initiatives in housekeeping\u0027, and a category of \u0027Waste\u0027." +}, { + "id": "2323", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all museums in Italy with virtual tours and sustainable tourism certifications.", + "sql_context": "CREATE TABLE Museums (museum_id INT, museum_name VARCHAR(50), country VARCHAR(50), has_virtual_tour BOOLEAN, is_sustainable_tourism_certified BOOLEAN); INSERT INTO Museums (museum_id, museum_name, country, has_virtual_tour, is_sustainable_tourism_certified) VALUES (1, \u0027VirtualUffizi Florence\u0027, \u0027Italy\u0027, true, true), (2, \u0027LeonardoDaVinci Museum Milan\u0027, \u0027Italy\u0027, false, true), (3, \u0027Colosseum Rome\u0027, \u0027Italy\u0027, false, false);", + "sql": "SELECT museum_name FROM Museums WHERE country \u003d \u0027Italy\u0027 AND has_virtual_tour \u003d true AND is_sustainable_tourism_certified \u003d true;", + "sql_explanation": "This query lists all museums in Italy with virtual tours and sustainable tourism certifications by selecting all records with country Italy, has_virtual_tour set to true, and is_sustainable_tourism_certified set to true, then listing the museum_name column." +}, { + "id": "2563", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average daily revenue of local businesses in Barcelona that have not benefited from sustainable tourism?", + "sql_context": "CREATE TABLE local_businesses (business_id INT, name TEXT, city TEXT, daily_revenue FLOAT, benefited_from_sustainable_tourism BOOLEAN); INSERT INTO local_businesses (business_id, name, city, daily_revenue, benefited_from_sustainable_tourism) VALUES (1, \u0027La Boqueria Market Stall\u0027, \u0027Barcelona\u0027, 500, true), (2, \u0027Barcelona Gift Shop\u0027, \u0027Barcelona\u0027, 300, false);", + "sql": "SELECT AVG(daily_revenue) FROM local_businesses WHERE city \u003d \u0027Barcelona\u0027 AND benefited_from_sustainable_tourism \u003d false;", + "sql_explanation": "This query calculates the average daily revenue of local businesses in Barcelona that have not benefited from sustainable tourism. It uses the AVG function to find the mean value of the daily_revenue column for rows with \u0027Barcelona\u0027 in the city column and false in the benefited_from_sustainable_tourism column." +}, { + "id": "2600", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the carbon footprint ratings for eco-friendly hotels in Rio de Janeiro with a rating below 4.", + "sql_context": "CREATE TABLE eco_hotels (hotel_id INT, city VARCHAR(50), carbon_footprint DECIMAL(3,1)); INSERT INTO eco_hotels (hotel_id, city, carbon_footprint) VALUES (1, \u0027Rio de Janeiro\u0027, 3.5), (2, \u0027Rio de Janeiro\u0027, 4.2), (3, \u0027Rio de Janeiro\u0027, 3.8), (4, \u0027SÃŖo Paulo\u0027, 4.5);", + "sql": "UPDATE eco_hotels SET carbon_footprint \u003d carbon_footprint + 0.5 WHERE city \u003d \u0027Rio de Janeiro\u0027 AND carbon_footprint \u003c 4;", + "sql_explanation": "The query updates the carbon footprint ratings for eco-friendly hotels in Rio de Janeiro with a rating below 4 by increasing the rating by 0.5." +}, { + "id": "2616", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 3 most popular sustainable hotels in Japan and South Korea.", + "sql_context": "CREATE TABLE hotels (id INT, country VARCHAR(50), sustainable BOOLEAN, popularity INT); INSERT INTO hotels (id, country, sustainable, popularity) VALUES (1, \u0027Japan\u0027, TRUE, 50), (2, \u0027Japan\u0027, FALSE, 40), (3, \u0027South Korea\u0027, TRUE, 30), (4, \u0027South Korea\u0027, TRUE, 45);", + "sql": "SELECT * FROM hotels WHERE country IN (\u0027Japan\u0027, \u0027South Korea\u0027) AND sustainable \u003d TRUE ORDER BY popularity DESC LIMIT 3;", + "sql_explanation": "We find the top 3 most popular sustainable hotels in Japan and South Korea by selecting all columns from the hotels table where country is in the specified list and sustainable is TRUE, then ordering the results by popularity in descending order and limiting the output to 3 rows." +}, { + "id": "2626", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the revenue of eco-friendly hotels in France in Q2.", + "sql_context": "CREATE TABLE hotel_revenue (hotel_name TEXT, is_eco_friendly BOOLEAN, quarter TEXT, revenue INT); INSERT INTO hotel_revenue (hotel_name, is_eco_friendly, quarter, revenue) VALUES (\u0027Paris Eco Hotel\u0027, TRUE, \u0027Q2\u0027, 12000), (\u0027Nice Green Hotel\u0027, TRUE, \u0027Q2\u0027, 15000);", + "sql": "SELECT SUM(revenue) FROM hotel_revenue WHERE is_eco_friendly \u003d TRUE AND quarter \u003d \u0027Q2\u0027 AND hotel_name LIKE \u0027%France%\u0027;", + "sql_explanation": "The SQL query filters the hotel_revenue table for eco-friendly hotels in France in Q2 and calculates the total revenue for these hotels using the SUM function." +}, { + "id": "2692", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many virtual tours were conducted in Canada in Q2 2022?", + "sql_context": "CREATE TABLE can_virtual_tours (tour_id INT, location VARCHAR(255), country VARCHAR(255), tour_date DATE); INSERT INTO can_virtual_tours (tour_id, location, country, tour_date) VALUES (1, \u0027CN Tower\u0027, \u0027Canada\u0027, \u00272022-04-01\u0027), (2, \u0027Niagara Falls\u0027, \u0027Canada\u0027, \u00272022-05-15\u0027);", + "sql": "SELECT COUNT(*) FROM can_virtual_tours WHERE country \u003d \u0027Canada\u0027 AND tour_date BETWEEN \u00272022-04-01\u0027 AND \u00272022-06-30\u0027;", + "sql_explanation": "The SQL query calculates the number of virtual tours conducted in Canada in Q2 2022 by using the COUNT function on the star column, filtering the data for Canada in the WHERE clause, and specifying the date range with the BETWEEN operator." +}, { + "id": "2708", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record of a virtual tourism center in Tokyo with 15 rooms.", + "sql_context": "CREATE TABLE tourism_centers (id INT, name TEXT, city TEXT, type TEXT, num_rooms INT);", + "sql": "INSERT INTO tourism_centers (name, city, type, num_rooms) VALUES (\u0027Virtual Tourism Center\u0027, \u0027Tokyo\u0027, \u0027virtual\u0027, 15);", + "sql_explanation": "The SQL query inserts a new record of a virtual tourism center in Tokyo with 15 rooms. It uses the INSERT INTO statement to add a new row to the \u0027tourism_centers\u0027 table, specifying the column values for the name, city, type, and num_rooms columns." +}, { + "id": "3095", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of cultural heritage sites in Italy and Spain?", + "sql_context": "CREATE TABLE sites (id INT, country VARCHAR(50), type VARCHAR(50)); INSERT INTO sites (id, country, type) VALUES (1, \u0027Italy\u0027, \u0027Cultural\u0027), (2, \u0027Spain\u0027, \u0027Cultural\u0027), (3, \u0027France\u0027, \u0027Natural\u0027);", + "sql": "SELECT SUM(CASE WHEN type \u003d \u0027Cultural\u0027 THEN 1 ELSE 0 END) FROM sites WHERE country IN (\u0027Italy\u0027, \u0027Spain\u0027);", + "sql_explanation": "We calculate the total number of cultural heritage sites in Italy and Spain by using a conditional statement to sum the rows with type \u0027Cultural\u0027 for the specified countries." +}, { + "id": "3166", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the names of heritage sites in India and Japan that have a UNESCO World Heritage designation.", + "sql_context": "CREATE TABLE Heritage_Sites (site_id INT, site_name VARCHAR(50), country VARCHAR(50), unesco_designation VARCHAR(50)); INSERT INTO Heritage_Sites (site_id, site_name, country, unesco_designation) VALUES (1, \u0027Taj Mahal\u0027, \u0027India\u0027, \u0027Yes\u0027), (2, \u0027Forbidden City\u0027, \u0027China\u0027, \u0027Yes\u0027), (3, \u0027Mount Fuji\u0027, \u0027Japan\u0027, \u0027Yes\u0027), (4, \u0027Hawa Mahal\u0027, \u0027India\u0027, \u0027No\u0027), (5, \u0027Fushimi Inari Shrine\u0027, \u0027Japan\u0027, \u0027Yes\u0027);", + "sql": "SELECT site_name FROM Heritage_Sites WHERE country IN (\u0027India\u0027, \u0027Japan\u0027) AND unesco_designation \u003d \u0027Yes\u0027;", + "sql_explanation": "The SQL query lists the names of heritage sites in India and Japan that have a UNESCO World Heritage designation by filtering the Heritage_Sites table using the IN and \u003d keywords, and then selecting the site_name column." +}, { + "id": "3440", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all local artisan workshops in New York City and Los Angeles.", + "sql_context": "CREATE TABLE artisan_workshops (workshop_id INT, workshop_name TEXT, city TEXT); INSERT INTO artisan_workshops (workshop_id, workshop_name, city) VALUES (1, \u0027Brooklyn Ceramics\u0027, \u0027New York City\u0027), (2, \u0027Echo Park Guitars\u0027, \u0027Los Angeles\u0027);", + "sql": "SELECT workshop_name, city FROM artisan_workshops WHERE city IN (\u0027New York City\u0027, \u0027Los Angeles\u0027);", + "sql_explanation": "This query lists all local artisan workshops in New York City and Los Angeles. It selects the workshop name and city from the artisan_workshops table, filtering for rows where the city is either New York City or Los Angeles." +}, { + "id": "3446", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names and annual revenues of all cultural heritage sites located in Tokyo, Japan?", + "sql_context": "CREATE TABLE Cultural_Heritage_Sites (id INT, name VARCHAR(255), location VARCHAR(255), year_established INT, PRIMARY KEY(id)); INSERT INTO Cultural_Heritage_Sites (id, name, location, year_established) VALUES (1, \u0027Todai-ji Temple\u0027, \u0027Nara, Japan\u0027, 745);", + "sql": "SELECT c.name, c.annual_revenue FROM Cultural_Heritage_Sites c WHERE c.location \u003d \u0027Tokyo, Japan\u0027;", + "sql_explanation": "This query selects the name and annual_revenue columns from the \u0027Cultural_Heritage_Sites\u0027 table where the location is \u0027Tokyo, Japan\u0027. It returns the names and annual revenues of the cultural heritage sites located in Tokyo, Japan." +}, { + "id": "3515", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a table named \"sustainable_practices\" with columns \"practice_id\", \"practice_name\", \"description\" of types integer, text, text respectively", + "sql_context": "CREATE TABLE sustainable_practices (practice_id integer, practice_name text, description text);", + "sql": "CREATE TABLE sustainable_practices (practice_id integer, practice_name text, description text);", + "sql_explanation": "Creates a new table called \"sustainable_practices\" with columns \"practice_id\", \"practice_name\", and \"description\" of types integer, text, and text respectively." +}, { + "id": "3608", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average rating of sustainable tourism activities in Australia?", + "sql_context": "CREATE TABLE activities(id INT, name TEXT, country TEXT, rating INT, sustainability_rating INT); INSERT INTO activities (id, name, country, rating, sustainability_rating) VALUES (1, \u0027Hiking Trail A\u0027, \u0027Australia\u0027, 4, 5), (2, \u0027Bike Tour B\u0027, \u0027Australia\u0027, 5, 5), (3, \u0027Kayak Tour C\u0027, \u0027Australia\u0027, 4, 4);", + "sql": "SELECT AVG(rating) FROM activities WHERE country \u003d \u0027Australia\u0027 AND sustainability_rating \u003d 5;", + "sql_explanation": "This SQL query calculates the average rating of sustainable tourism activities in Australia by using the AVG() function on the rating column, filtering the data by country and sustainability rating." +}, { + "id": "3617", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum revenue per night for eco-friendly hotels in Australia?", + "sql_context": "CREATE TABLE hotels (id INT, name TEXT, country TEXT, is_eco_friendly BOOLEAN, daily_revenue INT); INSERT INTO hotels (id, name, country, is_eco_friendly, daily_revenue) VALUES (1, \u0027Sydney Green Hotel\u0027, \u0027Australia\u0027, true, 200), (2, \u0027Melbourne Eco Hotel\u0027, \u0027Australia\u0027, true, 250), (3, \u0027Brisbane Sustainable Hotel\u0027, \u0027Australia\u0027, true, 150);", + "sql": "SELECT MIN(daily_revenue) FROM hotels WHERE country \u003d \u0027Australia\u0027 AND is_eco_friendly \u003d true;", + "sql_explanation": "This SQL query calculates the minimum revenue per night for eco-friendly hotels in Australia. It does this by filtering for eco-friendly hotels in Australia and then calculating the minimum daily revenue for those hotels." +}, { + "id": "3743", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many hotels in New York have achieved a sustainability rating?", + "sql_context": "CREATE TABLE hotels (hotel_id INT, hotel_name TEXT, city TEXT, sustainability_rating INT); INSERT INTO hotels (hotel_id, hotel_name, city, sustainability_rating) VALUES (1, \u0027Hotel A\u0027, \u0027New York\u0027, 3), (2, \u0027Hotel B\u0027, \u0027New York\u0027, NULL), (3, \u0027Hotel C\u0027, \u0027New York\u0027, 5);", + "sql": "SELECT COUNT(*) FROM hotels WHERE city \u003d \u0027New York\u0027 AND sustainability_rating IS NOT NULL;", + "sql_explanation": "This query calculates the number of hotels in New York with a sustainability rating. It does so by filtering the hotels table by the city column, limiting the results to only those with \u0027New York\u0027 as the value. The results are then filtered further by the sustainability_rating column, limiting the results to only those with a non-null value. The number of rows that meet these conditions is then counted." +}, { + "id": "3769", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many cultural heritage sites are in Japan and Spain?", + "sql_context": "CREATE TABLE Countries (country_id INT, name TEXT, region TEXT); CREATE TABLE Cultural_Heritage_Sites (site_id INT, country_id INT, name TEXT); INSERT INTO Countries (country_id, name, region) VALUES (1, \u0027Japan\u0027, \u0027Asia\u0027), (2, \u0027Spain\u0027, \u0027Europe\u0027); INSERT INTO Cultural_Heritage_Sites (site_id, country_id, name) VALUES (1, 1, \u0027Mount Fuji\u0027), (2, 1, \u0027Himeji Castle\u0027), (3, 2, \u0027Alhambra\u0027), (4, 2, \u0027Sagrada Familia\u0027);", + "sql": "SELECT COUNT(DISTINCT country_id) FROM Cultural_Heritage_Sites WHERE country_id IN (1, 2);", + "sql_explanation": "This query counts the number of distinct cultural heritage sites in Japan and Spain by checking the country_id field in the Cultural_Heritage_Sites table." +}, { + "id": "4004", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of tourists who visited Germany in 2021?", + "sql_context": "CREATE TABLE tourists_visits (id INT, country TEXT, visit_year INT); INSERT INTO tourists_visits (id, country, visit_year) VALUES (1, \u0027Germany\u0027, 2021), (2, \u0027Germany\u0027, 2022), (3, \u0027Germany\u0027, 2021);", + "sql": "SELECT COUNT(*) FROM tourists_visits WHERE country \u003d \u0027Germany\u0027 AND visit_year \u003d 2021;", + "sql_explanation": "The SQL query counts the number of rows in the tourists_visits table where the country is \u0027Germany\u0027 and the visit year is 2021." +}, { + "id": "4091", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of eco-friendly accommodations in Thailand and Indonesia?", + "sql_context": "CREATE TABLE eco_accommodations (accom_id INT, accom_name TEXT, location TEXT); INSERT INTO eco_accommodations (accom_id, accom_name, location) VALUES (1, \u0027Eco Lodge\u0027, \u0027Thailand\u0027), (2, \u0027Green Villa\u0027, \u0027Indonesia\u0027);", + "sql": "SELECT COUNT(*) FROM eco_accommodations WHERE location IN (\u0027Thailand\u0027, \u0027Indonesia\u0027);", + "sql_explanation": "The SQL query first filters the eco_accommodations table to only include rows where the location is Thailand or Indonesia. It then counts the total number of rows." +}, { + "id": "4178", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete virtual tours that have not been updated in the last year.", + "sql_context": "CREATE TABLE virtual_tours (tour_id INT, tour_name TEXT, location TEXT, price DECIMAL(5,2), updated_at DATETIME); INSERT INTO virtual_tours (tour_id, tour_name, location, price, updated_at) VALUES (1, \u0027Louvre VR Experience\u0027, \u0027Paris\u0027, 24.99, NOW()), (2, \u0027Gondola Tour in Venice\u0027, \u0027Venice\u0027, 19.99, NOW()), (3, \u0027Great Wall of China Virtual Walk\u0027, \u0027China\u0027, 29.99, NOW());", + "sql": "DELETE FROM virtual_tours WHERE updated_at \u003c DATE_SUB(curdate(), INTERVAL 1 YEAR);", + "sql_explanation": "This query deletes records from the virtual_tours table where the updated_at timestamp is older than one year. As a result, outdated virtual tour records are removed from the table." +}, { + "id": "4192", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many eco-friendly hotels are there in Canada with a rating above 4.5?", + "sql_context": "CREATE TABLE eco_hotels_canada (hotel_id INT, hotel_name TEXT, country TEXT, rating FLOAT); INSERT INTO eco_hotels_canada (hotel_id, hotel_name, country, rating) VALUES (1, \u0027Eco-Hotel Vancouver\u0027, \u0027Canada\u0027, 4.6), (2, \u0027Green Living Toronto\u0027, \u0027Canada\u0027, 4.7);", + "sql": "SELECT COUNT(*) FROM eco_hotels_canada WHERE country \u003d \u0027Canada\u0027 AND rating \u003e 4.5;", + "sql_explanation": "This query counts the number of eco-friendly hotels in Canada with a rating above 4.5 by selecting COUNT(*) on the hotel_id column, filtering the data by the country column equal to \u0027Canada\u0027 and rating greater than 4.5." +}, { + "id": "4197", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of virtual tours in Germany and Switzerland?", + "sql_context": "CREATE TABLE virtual_tours (id INT, country VARCHAR(20), tours INT); INSERT INTO virtual_tours (id, country, tours) VALUES (1, \u0027Germany\u0027, 300), (2, \u0027Switzerland\u0027, 200), (3, \u0027Austria\u0027, 150);", + "sql": "SELECT SUM(tours) FROM virtual_tours WHERE country IN (\u0027Germany\u0027, \u0027Switzerland\u0027);", + "sql_explanation": "This query calculates the total number of virtual tours in Germany and Switzerland by summing the tours of all records where the country is either \u0027Germany\u0027 or \u0027Switzerland\u0027." +}, { + "id": "4286", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many eco-friendly hotels are there in Amsterdam?", + "sql_context": "CREATE TABLE hotels (id INT, name TEXT, city TEXT, is_eco_friendly BOOLEAN); INSERT INTO hotels (id, name, city, is_eco_friendly) VALUES (1, \u0027Eco Hotel Amsterdam\u0027, \u0027Amsterdam\u0027, TRUE), (2, \u0027Green Hotel Amsterdam\u0027, \u0027Amsterdam\u0027, TRUE);", + "sql": "SELECT COUNT(*) FROM hotels WHERE city \u003d \u0027Amsterdam\u0027 AND is_eco_friendly \u003d TRUE;", + "sql_explanation": "The SQL query counts the number of eco-friendly hotels in Amsterdam by using the COUNT function with an asterisk (*) to count all rows, filtering rows with the WHERE clause to only consider eco-friendly hotels in Amsterdam." +}, { + "id": "4395", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many sustainable tourism experiences are available in Sydney?", + "sql_context": "CREATE TABLE experiences (experience_id INT, experience_name TEXT, city TEXT, sustainable BOOLEAN, type TEXT); INSERT INTO experiences (experience_id, experience_name, city, sustainable, type) VALUES (1, \u0027Eco-Hiking Sydney\u0027, \u0027Sydney\u0027, TRUE, \u0027Outdoor\u0027), (2, \u0027Sustainable Food Tour Sydney\u0027, \u0027Sydney\u0027, TRUE, \u0027Culinary\u0027), (3, \u0027City Sightseeing Sydney\u0027, \u0027Sydney\u0027, FALSE, \u0027Sightseeing\u0027);", + "sql": "SELECT COUNT(*) FROM experiences WHERE city \u003d \u0027Sydney\u0027 AND sustainable \u003d TRUE;", + "sql_explanation": "This query counts the number of sustainable tourism experiences available in Sydney. It filters the results based on the city and sustainability, using the WHERE clause." +}, { + "id": "4513", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the name of the hotel in Spain with ID 5 to \u0027Hotel Renamed\u0027.", + "sql_context": "CREATE TABLE hotels(id INT, name TEXT, country TEXT); INSERT INTO hotels(id, name, country) VALUES (5, \u0027Hotel Original\u0027, \u0027Spain\u0027), (6, \u0027Hotel Another\u0027, \u0027Portugal\u0027);", + "sql": "UPDATE hotels SET name \u003d \u0027Hotel Renamed\u0027 WHERE id \u003d 5 AND country \u003d \u0027Spain\u0027;", + "sql_explanation": "Update the name of the hotel with ID 5 in Spain to \u0027Hotel Renamed\u0027 by filtering rows based on ID and country." +}, { + "id": "4514", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the number of local employees for a cultural heritage site", + "sql_context": "CREATE TABLE heritage_site_employment (id INT PRIMARY KEY, site_id INT, local_employment INT);", + "sql": "UPDATE heritage_site_employment SET local_employment \u003d 25 WHERE site_id \u003d 1;", + "sql_explanation": "The number of local employees for a cultural heritage site is updated in the \u0027heritage_site_employment\u0027 table." +}, { + "id": "4540", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of visitors for a cultural heritage site in Germany?", + "sql_context": "CREATE TABLE cultural_heritage_sites (id INT, name TEXT, country TEXT, visitors INT); INSERT INTO cultural_heritage_sites (id, name, country, visitors) VALUES (1, \u0027Brandenburg Gate\u0027, \u0027Germany\u0027, 15000), (2, \u0027Neuschwanstein Castle\u0027, \u0027Germany\u0027, 6000), (3, \u0027Berlin Wall Memorial\u0027, \u0027Germany\u0027, 10000);", + "sql": "SELECT MAX(visitors) FROM cultural_heritage_sites WHERE country \u003d \u0027Germany\u0027;", + "sql_explanation": "This SQL query finds the maximum number of visitors for a cultural heritage site in Germany. It does this by filtering for sites in Germany and then finding the maximum number of visitors for those sites." +}, { + "id": "4568", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average revenue per sustainable hotel in Paris?", + "sql_context": "CREATE TABLE paris_sustainable_hotels(id INT, name TEXT, sustainable BOOLEAN, revenue FLOAT); INSERT INTO paris_sustainable_hotels(id, name, sustainable, revenue) VALUES (1, \u0027EcoHotel Paris\u0027, true, 12000.0), (2, \u0027Paris Green Suites\u0027, true, 15000.0), (3, \u0027Paris Urban Hotel\u0027, false, 10000.0);", + "sql": "SELECT AVG(revenue) FROM paris_sustainable_hotels WHERE sustainable \u003d true;", + "sql_explanation": "This query calculates the average revenue per sustainable hotel in Paris by averaging the revenue of hotels in Paris that have sustainable set to true." +}, { + "id": "4629", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue generated by eco-friendly accommodations in Australia?", + "sql_context": "CREATE TABLE AccommodationRevenue (AccommodationID INT, Country VARCHAR(50), Revenue FLOAT); INSERT INTO AccommodationRevenue (AccommodationID, Country, Revenue) VALUES (1, \u0027Australia\u0027, 3000), (2, \u0027Australia\u0027, 3500);", + "sql": "SELECT SUM(Revenue) FROM AccommodationRevenue WHERE Country \u003d \u0027Australia\u0027;", + "sql_explanation": "Calculate the total revenue generated by eco-friendly accommodations in Australia by summing the Revenue column for the two records with Country \u003d \u0027Australia\u0027." +}, { + "id": "4657", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all cultural heritage sites in Spain and Italy.", + "sql_context": "CREATE TABLE Cultural_Sites (site_id INT, site_name VARCHAR(50), country VARCHAR(50)); INSERT INTO Cultural_Sites (site_id, site_name, country) VALUES (1, \u0027Alhambra\u0027, \u0027Spain\u0027), (2, \u0027Colosseum\u0027, \u0027Italy\u0027);", + "sql": "SELECT site_name FROM Cultural_Sites WHERE country IN (\u0027Spain\u0027, \u0027Italy\u0027);", + "sql_explanation": "The SQL query lists all cultural heritage sites in Spain and Italy by filtering the Cultural_Sites table using the IN keyword." +}, { + "id": "4672", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum rating of eco-friendly hotels in Australia?", + "sql_context": "CREATE TABLE eco_hotels_australia (hotel_id INT, hotel_name TEXT, country TEXT, rating FLOAT); INSERT INTO eco_hotels_australia (hotel_id, hotel_name, country, rating) VALUES (1, \u0027Eco-Retreat Australia\u0027, \u0027Australia\u0027, 4.2), (2, \u0027Green Hotel Sydney\u0027, \u0027Australia\u0027, 4.5);", + "sql": "SELECT MIN(rating) FROM eco_hotels_australia WHERE country \u003d \u0027Australia\u0027;", + "sql_explanation": "This query calculates the minimum rating of eco-friendly hotels in Australia by selecting the MIN function on the rating column, filtering the data by the country column equal to \u0027Australia\u0027." +}, { + "id": "4793", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum revenue generated by virtual tours in France?", + "sql_context": "CREATE TABLE virtual_tours_france (tour_id INT, tour_name TEXT, country TEXT, revenue FLOAT); INSERT INTO virtual_tours_france (tour_id, tour_name, country, revenue) VALUES (1, \u0027Eiffel Tower Virtual Tour\u0027, \u0027France\u0027, 30000), (2, \u0027Louvre Virtual Tour\u0027, \u0027France\u0027, 35000);", + "sql": "SELECT MAX(revenue) FROM virtual_tours_france WHERE country \u003d \u0027France\u0027;", + "sql_explanation": "This query calculates the maximum revenue generated by virtual tours in France by selecting the MAX function on the revenue column, filtering the data by the country column equal to \u0027France\u0027." +}, { + "id": "4878", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average carbon footprint of virtual tours in Paris?", + "sql_context": "CREATE TABLE virtual_tours (tour_id INT, name TEXT, city TEXT, carbon_footprint FLOAT); INSERT INTO virtual_tours (tour_id, name, city, carbon_footprint) VALUES (1, \u0027Tour A\u0027, \u0027Paris\u0027, 5.6), (2, \u0027Tour B\u0027, \u0027Paris\u0027, 4.9), (3, \u0027Tour C\u0027, \u0027Paris\u0027, 6.3);", + "sql": "SELECT AVG(carbon_footprint) FROM virtual_tours WHERE city \u003d \u0027Paris\u0027;", + "sql_explanation": "This query calculates the average carbon footprint of virtual tours in Paris. It does so by selecting the carbon_footprint column and filtering the results by the city column, limiting the results to only those with \u0027Paris\u0027 as the value. The average of the carbon_footprint column is then calculated." +}, { + "id": "4884", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average local economic impact of sustainable tourism in Paris?", + "sql_context": "CREATE TABLE paris_impact (site_id INT, name VARCHAR(255), type VARCHAR(255), local_impact DECIMAL(10,2)); INSERT INTO paris_impact (site_id, name, type, local_impact) VALUES (1, \u0027Notre-Dame\u0027, \u0027historical\u0027, 5000.00), (2, \u0027Louvre Museum\u0027, \u0027art\u0027, 7000.00);", + "sql": "SELECT AVG(local_impact) FROM paris_impact WHERE type \u003d \u0027historical\u0027;", + "sql_explanation": "Calculates the average local economic impact of sustainable tourism in Paris by summing the local_impact column and dividing by the count of rows in the paris_impact table with type \u0027historical\u0027." +}, { + "id": "5017", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all virtual tours in Canada with a price over 20 CAD.", + "sql_context": "CREATE TABLE VirtualTours (id INT, name TEXT, country TEXT, price FLOAT); INSERT INTO VirtualTours (id, name, country, price) VALUES (1, \u0027Virtual Niagara Falls Tour\u0027, \u0027Canada\u0027, 25.0), (2, \u0027Canada Virtual Wildlife Tour\u0027, \u0027Canada\u0027, 18.5);", + "sql": "SELECT * FROM VirtualTours WHERE country \u003d \u0027Canada\u0027 AND price \u003e 20;", + "sql_explanation": "This query retrieves all virtual tours in Canada with a price higher than 20 CAD by filtering the VirtualTours table based on the country and price columns." +}, { + "id": "5031", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum revenue generated from any eco-friendly tour in Mexico?", + "sql_context": "CREATE TABLE mexico_tours (id INT, type VARCHAR(255), revenue FLOAT); INSERT INTO mexico_tours (id, type, revenue) VALUES (1, \u0027Eco-friendly\u0027, 600.00), (2, \u0027Eco-friendly\u0027, 700.00);", + "sql": "SELECT MIN(revenue) FROM mexico_tours WHERE type \u003d \u0027Eco-friendly\u0027;", + "sql_explanation": "This query calculates the minimum revenue generated from any eco-friendly tour in Mexico by finding the minimum revenue value in the \u0027mexico_tours\u0027 table where the type is \u0027Eco-friendly\u0027." +}, { + "id": "5067", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which sustainable tourism activities are available in Costa Rica?", + "sql_context": "CREATE TABLE sustain_activities (activity_id INT, name VARCHAR(255), location VARCHAR(255)); INSERT INTO sustain_activities (activity_id, name, location) VALUES (3, \u0027Ziplining in Rainforests\u0027, \u0027Costa Rica\u0027), (4, \u0027Volcano Hiking Tours\u0027, \u0027Costa Rica\u0027);", + "sql": "SELECT name FROM sustain_activities WHERE location \u003d \u0027Costa Rica\u0027;", + "sql_explanation": "The SQL query lists sustainable tourism activities available in Costa Rica by selecting the name column, filtering the data for Costa Rica in the WHERE clause." +}, { + "id": "5092", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of daily visitors to national parks in Kenya?", + "sql_context": "CREATE TABLE visitors (site_id INT, name VARCHAR(255), country VARCHAR(255), daily_visitors INT); INSERT INTO visitors (site_id, name, country, daily_visitors) VALUES (1, \u0027Masai Mara National Reserve\u0027, \u0027Kenya\u0027, 2000), (2, \u0027Amboseli National Park\u0027, \u0027Kenya\u0027, 1500), (3, \u0027Tsavo National Park\u0027, \u0027Kenya\u0027, 1200);", + "sql": "SELECT AVG(daily_visitors) FROM visitors WHERE country \u003d \u0027Kenya\u0027;", + "sql_explanation": "This query calculates the average number of daily visitors to national parks in Kenya by selecting AVG function on the daily_visitors column, while filtering the results by the country column with the value \u0027Kenya\u0027." +}, { + "id": "5197", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many cultural heritage sites in Egypt have virtual tours?", + "sql_context": "CREATE TABLE egypt_sites (site_id INT, site_name TEXT, has_virtual_tour BOOLEAN); INSERT INTO egypt_sites (site_id, site_name, has_virtual_tour) VALUES (1, \u0027Pyramids of Giza\u0027, true), (2, \u0027Temple of Karnak\u0027, false);", + "sql": "SELECT COUNT(*) FROM egypt_sites WHERE has_virtual_tour \u003d true;", + "sql_explanation": "Count the number of cultural heritage sites in Egypt with virtual tours by using the COUNT() function and filtering by has_virtual_tour." +}, { + "id": "5206", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names of all heritage sites in France?", + "sql_context": "CREATE TABLE heritage_sites (site_id INT, site_name TEXT, country TEXT); INSERT INTO heritage_sites (site_id, site_name, country) VALUES (1, \u0027Eiffel Tower\u0027, \u0027France\u0027), (2, \u0027Statue of Liberty\u0027, \u0027USA\u0027), (3, \u0027Sydney Opera House\u0027, \u0027Australia\u0027);", + "sql": "SELECT site_name FROM heritage_sites WHERE country \u003d \u0027France\u0027;", + "sql_explanation": "This query filters the heritage_sites table to only include rows where the country is France, then selects the site_name column to display the names of all heritage sites in France." +}, { + "id": "5333", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of reviews for eco-friendly hotels in India?", + "sql_context": "CREATE TABLE eco_hotels (hotel_id INT, hotel_name TEXT, reviews INT, country TEXT); INSERT INTO eco_hotels (hotel_id, hotel_name, reviews, country) VALUES (1, \u0027Eco Lodge Jaipur\u0027, 100, \u0027India\u0027), (2, \u0027Green Hotel New Delhi\u0027, 150, \u0027India\u0027);", + "sql": "SELECT MAX(reviews) FROM eco_hotels WHERE country \u003d \u0027India\u0027;", + "sql_explanation": "This query calculates the maximum number of reviews for eco-friendly hotels in India by using the MAX() function on the \u0027reviews\u0027 column and filtering for hotels in India." +}, { + "id": "5525", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum rating of hotels in Peru?", + "sql_context": "CREATE TABLE hotels(id INT, name TEXT, country TEXT, rating FLOAT); INSERT INTO hotels (id, name, country, rating) VALUES (1, \u0027Hotel Cusco\u0027, \u0027Peru\u0027, 4.2), (2, \u0027Hotel Lima\u0027, \u0027Peru\u0027, 4.5);", + "sql": "SELECT MIN(rating) FROM hotels WHERE country \u003d \u0027Peru\u0027;", + "sql_explanation": "This query retrieves the minimum rating of hotels in Peru. It calculates the minimum rating of the hotels table where the country is \u0027Peru\u0027." +}, { + "id": "5562", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get the local economic impact of tourism in Mexico.", + "sql_context": "CREATE TABLE impact (id INT, country TEXT, tourism INT, economic INT); INSERT INTO impact (id, country, tourism, economic) VALUES (1, \u0027Mexico\u0027, 30000, 500000);", + "sql": "SELECT economic FROM impact WHERE country \u003d \u0027Mexico\u0027;", + "sql_explanation": "This query retrieves the local economic impact of tourism in Mexico by finding the record with country \u0027Mexico\u0027 in the \u0027impact\u0027 table." +}, { + "id": "5828", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many eco-friendly hotels are there in total in the eco_hotels table?", + "sql_context": "CREATE TABLE eco_hotels (hotel_id INT, name TEXT, city TEXT); INSERT INTO eco_hotels (hotel_id, name, city) VALUES (1, \u0027Green Hotel\u0027, \u0027Paris\u0027), (2, \u0027Eco Lodge\u0027, \u0027Paris\u0027), (3, \u0027Eco Inn\u0027, \u0027London\u0027), (4, \u0027Sustainable Hotel\u0027, \u0027Rome\u0027);", + "sql": "SELECT COUNT(*) FROM eco_hotels;", + "sql_explanation": "Count the total number of eco-friendly hotels in the eco_hotels table by counting the rows in the table." +}, { + "id": "1161", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert data into \u0027oil_reservoirs\u0027", + "sql_context": "CREATE TABLE oil_reservoirs (reservoir_id INT, reservoir_name VARCHAR(100), location VARCHAR(100), oil_capacity FLOAT); INSERT INTO oil_reservoirs (reservoir_id, reservoir_name, location, oil_capacity) VALUES (1, \u0027Girassol\u0027, \u0027Angola\u0027, 800), (2, \u0027Jazireh-e-Jafar\u0027, \u0027Iran\u0027, 1500);", + "sql": "INSERT INTO oil_reservoirs (reservoir_id, reservoir_name, location, oil_capacity) VALUES (3, \u0027Thunder Horse\u0027, \u0027Gulf of Mexico\u0027, 1200), (4, \u0027Kashagan\u0027, \u0027Caspian Sea\u0027, 1100);", + "sql_explanation": "This SQL statement inserts 2 new rows into the oil_reservoirs table, adding the Thunder Horse and Kashagan oil reservoirs with their respective capacities." +}, { + "id": "1479", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total oil production in Q1 2020 for wells in the North Sea?", + "sql_context": "CREATE TABLE wells (well_id INT, well_name VARCHAR(50), oil_production FLOAT, gas_production FLOAT, location VARCHAR(50)); INSERT INTO wells (well_id, well_name, oil_production, gas_production, location) VALUES (1, \u0027Well A\u0027, 1500, 2000, \u0027North Sea\u0027), (2, \u0027Well B\u0027, 1200, 1800, \u0027North Sea\u0027);", + "sql": "SELECT SUM(oil_production) FROM wells WHERE location \u003d \u0027North Sea\u0027 AND EXTRACT(MONTH FROM timestamp) BETWEEN 1 AND 3 AND EXTRACT(YEAR FROM timestamp) \u003d 2020;", + "sql_explanation": "The SQL query calculates the total oil production for wells located in the North Sea in Q1 2020 by summing the oil_production column, filtering the data by the location and extraction timestamps." +}, { + "id": "1850", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total production of oil and gas in the Gulf of Mexico from 2015 to 2020", + "sql_context": "CREATE TABLE gov_production (field VARCHAR(50), year INT, oil_production FLOAT, gas_production FLOAT); INSERT INTO gov_production (field, year, oil_production, gas_production) VALUES (\u0027MC123\u0027, 2015, 1234.5, 678.9); INSERT INTO gov_production (field, year, oil_production, gas_production) VALUES (\u0027MC456\u0027, 2016, 2345.6, 789.0); INSERT INTO gov_production (field, year, oil_production, gas_production) VALUES (\u0027MC789\u0027, 2017, 3456.7, 890.1); INSERT INTO gov_production (field, year, oil_production, gas_production) VALUES (\u0027MC111\u0027, 2018, 4567.8, 901.2); INSERT INTO gov_production (field, year, oil_production, gas_production) VALUES (\u0027MC222\u0027, 2019, 5678.9, 1001.3); INSERT INTO gov_production (field, year, oil_production, gas_production) VALUES (\u0027MC333\u0027, 2020, 6789.0, 1101.4);", + "sql": "SELECT SUM(oil_production) + SUM(gas_production) as total_production FROM gov_production WHERE year BETWEEN 2015 AND 2020 AND field LIKE \u0027MC%\u0027;", + "sql_explanation": "This query calculates the total production by adding oil_production and gas_production, filters for records in the Gulf of Mexico (fields starting with \u0027MC\u0027) and the years 2015 to 2020, and finally returns the total production for that period." +}, { + "id": "2138", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average daily gas production, in cubic feet, for all wells in the Marcellus Shale, for the last 6 months?", + "sql_context": "CREATE TABLE GasProduction (ProductionID INT, Location VARCHAR(20), ProductionMonth DATE, GasProduction INT); INSERT INTO GasProduction (ProductionID, Location, ProductionMonth, GasProduction) VALUES (1, \u0027Marcellus Shale\u0027, \u00272022-06-01\u0027, 1200000), (2, \u0027Marcellus Shale\u0027, \u00272022-05-01\u0027, 1100000), (3, \u0027Barnett Shale\u0027, \u00272022-04-01\u0027, 1000000);", + "sql": "SELECT AVG(GasProduction) FROM GasProduction WHERE Location \u003d \u0027Marcellus Shale\u0027 AND ProductionMonth \u003e\u003d DATEADD(month, -6, GETDATE());", + "sql_explanation": "This SQL query calculates the average daily gas production, in cubic feet, for all wells in the Marcellus Shale for the last 6 months. It uses the AVG aggregate function to calculate the average gas production, and the WHERE clause to filter the rows to only those that are from the Marcellus Shale and have a production month within the last 6 months." +}, { + "id": "2409", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total oil production for OPEC members in 2019?", + "sql_context": "CREATE TABLE opec_data (country text, year integer, oil_production real); INSERT INTO opec_data (country, year, oil_production) VALUES (\u0027Saudi Arabia\u0027, 2019, 500000), (\u0027Iran\u0027, 2019, 350000), (\u0027Iraq\u0027, 2019, 400000), (\u0027UAE\u0027, 2019, 250000), (\u0027Kuwait\u0027, 2019, 200000);", + "sql": "SELECT SUM(oil_production) FROM opec_data WHERE year \u003d 2019 AND country IN (\u0027Saudi Arabia\u0027, \u0027Iran\u0027, \u0027Iraq\u0027, \u0027UAE\u0027, \u0027Kuwait\u0027);", + "sql_explanation": "This query sums the oil_production column in the opec_data table for the year 2019 for the countries \u0027Saudi Arabia\u0027, \u0027Iran\u0027, \u0027Iraq\u0027, \u0027UAE\u0027, and \u0027Kuwait\u0027 to find the total oil production for OPEC members in 2019." +}, { + "id": "2723", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many wells were drilled in \u0027FieldH\u0027 between 2017 and 2019?", + "sql_context": "CREATE TABLE wells (well_id varchar(10), field varchar(10), datetime date); INSERT INTO wells (well_id, field, datetime) VALUES (\u0027W009\u0027, \u0027FieldH\u0027, \u00272018-01-01\u0027), (\u0027W010\u0027, \u0027FieldH\u0027, \u00272019-01-01\u0027);", + "sql": "SELECT COUNT(DISTINCT well_id) FROM wells WHERE field \u003d \u0027FieldH\u0027 AND datetime BETWEEN \u00272017-01-01\u0027 AND \u00272019-12-31\u0027;", + "sql_explanation": "This query counts the number of wells in \u0027FieldH\u0027 that were drilled between 2017 and 2019 by counting the distinct well_ids where the field is \u0027FieldH\u0027 and the datetime is between \u00272017-01-01\u0027 and \u00272019-12-31\u0027." +}, { + "id": "2735", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the producing wells in the North Sea and Gulf of Mexico", + "sql_context": "CREATE TABLE wells (id INT, well_name VARCHAR(100), location VARCHAR(50), status VARCHAR(20)); INSERT INTO wells VALUES (1, \u0027Well A\u0027, \u0027North Sea\u0027, \u0027Producing\u0027); INSERT INTO wells VALUES (2, \u0027Well B\u0027, \u0027Gulf of Mexico\u0027, \u0027Abandoned\u0027); INSERT INTO wells VALUES (3, \u0027Well C\u0027, \u0027Gulf of Mexico\u0027, \u0027Producing\u0027); INSERT INTO wells VALUES (4, \u0027Well D\u0027, \u0027North Sea\u0027, \u0027Producing\u0027);", + "sql": "SELECT well_name FROM wells WHERE (location \u003d \u0027North Sea\u0027 OR location \u003d \u0027Gulf of Mexico\u0027) AND status \u003d \u0027Producing\u0027;", + "sql_explanation": "This query selects the \u0027well_name\u0027 from the \u0027wells\u0027 table where the \u0027location\u0027 is either \u0027North Sea\u0027 or \u0027Gulf of Mexico\u0027 and the \u0027status\u0027 is \u0027Producing\u0027." +}, { + "id": "2863", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum daily production rate of oil wells in the Arctic Ocean that were drilled after 2016?", + "sql_context": "CREATE TABLE arctic_ocean (id INT, well_name VARCHAR(255), drill_date DATE, daily_production_oil FLOAT);", + "sql": "SELECT MAX(daily_production_oil) as max_daily_production_oil FROM arctic_ocean WHERE drill_date \u003e \u00272016-12-31\u0027;", + "sql_explanation": "The query calculates the maximum daily production rate of oil wells in the Arctic Ocean that were drilled after 2016 by using the MAX function." +}, { + "id": "2888", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the total number of pipelines in the United States and Canada", + "sql_context": "CREATE TABLE pipelines_us_canada (pipeline_name VARCHAR(50), country VARCHAR(50), length INT); INSERT INTO pipelines_us_canada (pipeline_name, country, length) VALUES (\u0027Keystone XL\u0027, \u0027Canada\u0027, 1900), (\u0027Energy East\u0027, \u0027Canada\u0027, 4600), (\u0027Dakota Access\u0027, \u0027United States\u0027, 1172), (\u0027Gulf Coast\u0027, \u0027United States\u0027, 979);", + "sql": "SELECT SUM(IIF(country \u003d \u0027Canada\u0027, 1, 0)) + SUM(IIF(country \u003d \u0027United States\u0027, 1, 0)) FROM pipelines_us_canada;", + "sql_explanation": "This query determines the total number of pipelines in the United States and Canada by summing 1 for each row where the country is either \u0027Canada\u0027 or \u0027United States\u0027 using the IIF function." +}, { + "id": "2990", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all production data for the \u0027Gas\u0027 production type, for wells located in \u0027Norway\u0027, ordered by production date in descending order.", + "sql_context": "CREATE TABLE production (production_id INT, well_id INT, production_date DATE, production_type TEXT, country TEXT); INSERT INTO production (production_id, well_id, production_date, production_type, country) VALUES (1, 1, \u00272018-01-01\u0027, \u0027Oil\u0027, \u0027USA\u0027), (2, 1, \u00272018-01-02\u0027, \u0027Gas\u0027, \u0027Norway\u0027), (3, 2, \u00272019-05-03\u0027, \u0027Oil\u0027, \u0027Canada\u0027), (4, 3, \u00272020-02-04\u0027, \u0027Gas\u0027, \u0027Norway\u0027), (5, 4, \u00272021-03-09\u0027, \u0027Oil\u0027, \u0027Brazil\u0027), (6, 5, \u00272021-04-15\u0027, \u0027Gas\u0027, \u0027India\u0027);", + "sql": "SELECT * FROM production WHERE production_type \u003d \u0027Gas\u0027 AND country \u003d \u0027Norway\u0027 ORDER BY production_date DESC;", + "sql_explanation": "This SQL query filters the \u0027production\u0027 table to only include records with a \u0027production_type\u0027 of \u0027Gas\u0027 and a \u0027country\u0027 of \u0027Norway\u0027. It then orders the results by the \u0027production_date\u0027 column in descending order." +}, { + "id": "3165", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new well with well_id 4, well_type \u0027onshore\u0027, location \u0027Alberta\u0027, and production_rate 900.", + "sql_context": "CREATE TABLE wells (well_id INT, well_type VARCHAR(10), location VARCHAR(20), production_rate FLOAT);", + "sql": "INSERT INTO wells (well_id, well_type, location, production_rate) VALUES (4, \u0027onshore\u0027, \u0027Alberta\u0027, 900);", + "sql_explanation": "Insert a new well with well_id 4, well_type \u0027onshore\u0027, location \u0027Alberta\u0027, and production_rate 900 by using the INSERT INTO statement, specifying the wells table and the values for the well_id, well_type, location, and production_rate columns." +}, { + "id": "3249", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the number of offshore drilling platforms in the South China Sea as of 2018.", + "sql_context": "CREATE TABLE south_china_sea_platforms (year INT, region VARCHAR(20), num_platforms INT); INSERT INTO south_china_sea_platforms (year, region, num_platforms) VALUES (2015, \u0027South China Sea\u0027, 1500), (2016, \u0027South China Sea\u0027, 1550), (2017, \u0027South China Sea\u0027, 1600), (2018, \u0027South China Sea\u0027, 1650), (2019, \u0027South China Sea\u0027, 1700), (2020, \u0027South China Sea\u0027, 1750);", + "sql": "SELECT num_platforms FROM south_china_sea_platforms WHERE year \u003d 2018 AND region \u003d \u0027South China Sea\u0027;", + "sql_explanation": "This query filters the \u0027south_china_sea_platforms\u0027 table for the year 2018 and the South China Sea region, then returns the number of platforms for the specified criteria." +}, { + "id": "3277", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total production volume for wells in the Middle East in January 2022", + "sql_context": "CREATE TABLE wells (id INT, region VARCHAR(20), volume INT, date DATE); INSERT INTO wells (id, region, volume, date) VALUES (1, \u0027Middle East\u0027, 1000, \u00272022-01-01\u0027); INSERT INTO wells (id, region, volume, date) VALUES (2, \u0027Middle East\u0027, 2000, \u00272022-01-02\u0027); INSERT INTO wells (id, region, volume, date) VALUES (3, \u0027Middle East\u0027, 3000, \u00272022-01-03\u0027);", + "sql": "SELECT SUM(volume) FROM wells WHERE region \u003d \u0027Middle East\u0027 AND MONTH(date) \u003d 1 AND YEAR(date) \u003d 2022;", + "sql_explanation": "This SQL query finds the total production volume for wells in the Middle East in January 2022 by selecting all rows with a region of \u0027Middle East\u0027 and a date in January 2022, and then summing the volume of those rows." +}, { + "id": "3727", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 3 wells with the highest daily gas production", + "sql_context": "CREATE TABLE wells (well_id INT, daily_gas_production FLOAT); INSERT INTO wells (well_id, daily_gas_production) VALUES (1, 1000000), (2, 2000000), (3, 1500000), (4, 2500000), (5, 3000000);", + "sql": "SELECT well_id, daily_gas_production FROM wells ORDER BY daily_gas_production DESC LIMIT 3;", + "sql_explanation": "This query orders the wells table by daily gas production in descending order and returns the top 3 rows with the highest daily gas production." +}, { + "id": "4122", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total production of well \u0027W001\u0027 in the year 2020?", + "sql_context": "CREATE TABLE wells (well_id varchar(10), production int); INSERT INTO wells (well_id, production) VALUES (\u0027W001\u0027, 1500), (\u0027W002\u0027, 1200);", + "sql": "SELECT SUM(production) FROM wells WHERE well_id \u003d \u0027W001\u0027 AND YEAR(datetime) \u003d 2020;", + "sql_explanation": "This query calculates the total production of well \u0027W001\u0027 in 2020 by summing the production values where the well_id is \u0027W001\u0027 and the year of the datetime is 2020." +}, { + "id": "4420", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records from the \u0027wells\u0027 table where the \u0027well_type\u0027 is \u0027deviated\u0027 and the \u0027region\u0027 is \u0027Gulf of Mexico\u0027", + "sql_context": "CREATE TABLE wells (well_id INT, well_name VARCHAR(50), well_type VARCHAR(20), region VARCHAR(30)); INSERT INTO wells (well_id, well_name, well_type, region) VALUES (1, \u0027Well A\u0027, \u0027deviated\u0027, \u0027Gulf of Mexico\u0027); INSERT INTO wells (well_id, well_name, well_type, region) VALUES (2, \u0027Well B\u0027, \u0027horizontal\u0027, \u0027Gulf of Mexico\u0027);", + "sql": "DELETE FROM wells WHERE well_type \u003d \u0027deviated\u0027 AND region \u003d \u0027Gulf of Mexico\u0027;", + "sql_explanation": "This query deletes all records from the \u0027wells\u0027 table where the \u0027well_type\u0027 is \u0027deviated\u0027 and the \u0027region\u0027 is \u0027Gulf of Mexico\u0027." +}, { + "id": "4428", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many inactive rigs are there in the \u0027North Sea\u0027 region?", + "sql_context": "CREATE TABLE rigs (rig_id INT, rig_name VARCHAR(255), status VARCHAR(255), region VARCHAR(255)); INSERT INTO rigs (rig_id, rig_name, status, region) VALUES (1, \u0027Rig1\u0027, \u0027active\u0027, \u0027Alaska\u0027), (2, \u0027Rig2\u0027, \u0027inactive\u0027, \u0027Alaska\u0027), (3, \u0027Rig3\u0027, \u0027active\u0027, \u0027North Sea\u0027);", + "sql": "SELECT COUNT(*) FROM rigs WHERE status \u003d \u0027inactive\u0027 AND region \u003d \u0027North Sea\u0027;", + "sql_explanation": "This query counts the number of inactive rigs (COUNT(*)) in the \u0027North Sea\u0027 (WHERE status \u003d \u0027inactive\u0027 AND region \u003d \u0027North Sea\u0027) from the rigs table." +}, { + "id": "4489", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total production for wells in the \u0027offshore\u0027 region in 2020?", + "sql_context": "CREATE TABLE wells (well_id INT, well_name VARCHAR(50), region VARCHAR(20), production FLOAT, year INT); INSERT INTO wells (well_id, well_name, region, production, year) VALUES (1, \u0027Well A\u0027, \u0027onshore\u0027, 100.0, 2019); INSERT INTO wells (well_id, well_name, region, production, year) VALUES (2, \u0027Well B\u0027, \u0027offshore\u0027, 200.0, 2020); INSERT INTO wells (well_id, well_name, region, production, year) VALUES (3, \u0027Well C\u0027, \u0027onshore\u0027, 150.0, 2021);", + "sql": "SELECT SUM(production) FROM wells WHERE region \u003d \u0027offshore\u0027 AND year \u003d 2020;", + "sql_explanation": "The SQL query calculates the total production for wells located in the \u0027offshore\u0027 region in the year 2020 by using the SUM function on the \u0027production\u0027 column, and filtering the rows by the \u0027region\u0027 and \u0027year\u0027 columns with the values \u0027offshore\u0027 and 2020, respectively." +}, { + "id": "4704", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all the fields for wells drilled in California in 2019", + "sql_context": "CREATE TABLE wells (id INT, well_name VARCHAR(255), location VARCHAR(255), drill_year INT, company VARCHAR(255)); INSERT INTO wells (id, well_name, location, drill_year, company) VALUES (1, \u0027Well001\u0027, \u0027Texas\u0027, 2020, \u0027CompanyA\u0027); INSERT INTO wells (id, well_name, location, drill_year, company) VALUES (2, \u0027Well002\u0027, \u0027Colorado\u0027, 2019, \u0027CompanyB\u0027); INSERT INTO wells (id, well_name, location, drill_year, company) VALUES (3, \u0027Well003\u0027, \u0027California\u0027, 2019, \u0027CompanyC\u0027);", + "sql": "SELECT * FROM wells WHERE drill_year \u003d 2019 AND location \u003d \u0027California\u0027;", + "sql_explanation": "This query retrieves all fields from the \u0027wells\u0027 table where the \u0027drill_year\u0027 is 2019 and the \u0027location\u0027 is \u0027California\u0027." +}, { + "id": "4758", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total production figure for all wells located in the Caspian Sea?", + "sql_context": "CREATE TABLE wells (well_id varchar(10), region varchar(20), production_figures int); INSERT INTO wells (well_id, region, production_figures) VALUES (\u0027W005\u0027, \u0027Caspian Sea\u0027, 2500), (\u0027W006\u0027, \u0027Caspian Sea\u0027, 1500);", + "sql": "SELECT SUM(production_figures) FROM wells WHERE region \u003d \u0027Caspian Sea\u0027;", + "sql_explanation": "This SQL query calculates the total production figure for all wells located in the Caspian Sea by summing up the production_figures of those wells." +}, { + "id": "4847", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all the drilling rigs with their respective operators located in the North Sea region.", + "sql_context": "CREATE TABLE drilling_rigs (id INT, name VARCHAR(50), operator VARCHAR(50), location VARCHAR(50)); INSERT INTO drilling_rigs VALUES (1, \u0027Rig A\u0027, \u0027Company X\u0027, \u0027North Sea\u0027); INSERT INTO drilling_rigs VALUES (2, \u0027Rig B\u0027, \u0027Company Y\u0027, \u0027North Sea\u0027); INSERT INTO drilling_rigs VALUES (3, \u0027Rig C\u0027, \u0027Company Z\u0027, \u0027Baltic Sea\u0027);", + "sql": "SELECT name, operator FROM drilling_rigs WHERE location \u003d \u0027North Sea\u0027;", + "sql_explanation": "This query lists all the drilling rigs located in the North Sea region along with their respective operators. It does so by filtering the records based on the location column and selecting the name and operator columns for the final result." +}, { + "id": "5040", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many exploration projects have been initiated in the \u0027Africa\u0027 region?", + "sql_context": "CREATE TABLE exploration_projects (project_id INT, project_name VARCHAR(50), region VARCHAR(50)); INSERT INTO exploration_projects (project_id, project_name, region) VALUES (1, \u0027Project X\u0027, \u0027Africa\u0027), (2, \u0027Project Y\u0027, \u0027Europe\u0027);", + "sql": "SELECT COUNT(*) FROM exploration_projects WHERE region \u003d \u0027Africa\u0027;", + "sql_explanation": "This query calculates the number of exploration projects in the \u0027Africa\u0027 region by counting the number of rows in the \u0027exploration_projects\u0027 table with a \u0027region\u0027 value of \u0027Africa\u0027." +}, { + "id": "5060", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average production volume in the Middle East?", + "sql_context": "CREATE TABLE production (id INT, location VARCHAR(20), volume INT); INSERT INTO production (id, location, volume) VALUES (1, \u0027Middle East\u0027, 100000); INSERT INTO production (id, location, volume) VALUES (2, \u0027Middle East\u0027, 110000); INSERT INTO production (id, location, volume) VALUES (3, \u0027Europe\u0027, 70000);", + "sql": "SELECT AVG(volume) FROM production WHERE location \u003d \u0027Middle East\u0027;", + "sql_explanation": "This query calculates the average production volume in the Middle East. It selects the average value of the \u0027volume\u0027 column for rows with \u0027location\u0027 equal to \u0027Middle East\u0027." +}, { + "id": "5176", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average production rate (bpd) of wells in Brazil.", + "sql_context": "CREATE TABLE well_production (id INT, country VARCHAR(50), rate FLOAT);", + "sql": "SELECT AVG(rate) FROM well_production WHERE country \u003d \u0027Brazil\u0027;", + "sql_explanation": "This query calculates the average well production rate (bpd) for wells located in Brazil by averaging the \u0027rate\u0027 column in the well_production table." +}, { + "id": "5279", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total production of well \u0027W001\u0027 in the Oil_Production table?", + "sql_context": "CREATE TABLE Oil_Production (well text, production_date date, quantity real); INSERT INTO Oil_Production (well, production_date, quantity) VALUES (\u0027W001\u0027, \u00272021-01-01\u0027, 150.5), (\u0027W001\u0027, \u00272021-01-02\u0027, 160.3);", + "sql": "SELECT SUM(quantity) FROM Oil_Production WHERE well \u003d \u0027W001\u0027;", + "sql_explanation": "The SQL query calculates the total production of well \u0027W001\u0027 by summing up the quantity for all rows with well \u0027W001\u0027." +}, { + "id": "5282", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the maximum gas production volume for the year 2021 from the \u0027gas_production\u0027 table", + "sql_context": "CREATE TABLE gas_production (well_id INT, year INT, gas_volume FLOAT);", + "sql": "SELECT MAX(gas_volume) FROM gas_production WHERE year \u003d 2021;", + "sql_explanation": "This SQL query finds the maximum gas production volume for the year 2021 by selecting the maximum value from the \u0027gas_volume\u0027 column in the \u0027gas_production\u0027 table where the \u0027year\u0027 column is equal to 2021." +}, { + "id": "5324", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which wells are located in the \u0027south\u0027 region?", + "sql_context": "CREATE TABLE well_locations (well_name TEXT, region TEXT); INSERT INTO well_locations (well_name, region) VALUES (\u0027Well A\u0027, \u0027north\u0027), (\u0027Well B\u0027, \u0027south\u0027), (\u0027Well C\u0027, \u0027north\u0027);", + "sql": "SELECT well_name FROM well_locations WHERE region \u003d \u0027south\u0027;", + "sql_explanation": "This SQL query selects the \u0027well_name\u0027 column from the \u0027well_locations\u0027 table where the \u0027region\u0027 is equal to \u0027south\u0027. It returns the names of all wells located in the south region." +}, { + "id": "5326", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records from the \u0027fields\u0027 table where the location is not in the Gulf of Mexico.", + "sql_context": "CREATE TABLE fields (field_id INT, field_name TEXT, location TEXT); INSERT INTO fields (field_id, field_name, location) VALUES (1, \u0027Field A\u0027, \u0027Gulf of Mexico\u0027), (2, \u0027Field B\u0027, \u0027North Sea\u0027), (3, \u0027Field C\u0027, \u0027Brazil\u0027);", + "sql": "DELETE FROM fields WHERE location NOT IN (\u0027Gulf of Mexico\u0027);", + "sql_explanation": "This query deletes records from the fields table where the location is not \u0027Gulf of Mexico\u0027." +}, { + "id": "5344", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all the wells in the \u0027Permian\u0027 basin that were completed after 2018.", + "sql_context": "CREATE TABLE permian_wells (well text, completion_year integer); INSERT INTO permian_wells VALUES (\u0027Well1\u0027, 2016), (\u0027Well2\u0027, 2017), (\u0027Well3\u0027, 2019), (\u0027Well4\u0027, 2018), (\u0027Well5\u0027, 2020);", + "sql": "SELECT well FROM permian_wells WHERE completion_year \u003e 2018;", + "sql_explanation": "We selected all the wells in the \u0027Permian\u0027 basin that were completed after 2018 by filtering the completion year in the permian_wells table." +}, { + "id": "5347", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List well IDs and their depths in the \u0027ArcticOcean\u0027 schema, ordered by depth.", + "sql_context": "CREATE TABLE ArcticOcean.wells (well_id INT, depth FLOAT); INSERT INTO ArcticOcean.wells (well_id, depth) VALUES (1, 1200.5), (2, 1500.3), (3, 1750.2);", + "sql": "SELECT well_id, depth FROM ArcticOcean.wells ORDER BY depth;", + "sql_explanation": "This query retrieves all records from the \u0027wells\u0027 table in the \u0027ArcticOcean\u0027 schema, returning the \u0027well_id\u0027 and corresponding \u0027depth\u0027 values, and sorts the result set by the \u0027depth\u0027 column in ascending order." +}, { + "id": "5450", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records from the \u0027exploration_data\u0027 table where the \u0027discovery_year\u0027 is less than 2000", + "sql_context": "CREATE TABLE exploration_data (id INT, field VARCHAR(50), country VARCHAR(50), discovery_year INT, discovery_number FLOAT); INSERT INTO exploration_data (id, field, country, discovery_year, discovery_number) VALUES (1, \u0027Sahara\u0027, \u0027Algeria\u0027, 1999, 12345.67); INSERT INTO exploration_data (id, field, country, discovery_year, discovery_number) VALUES (2, \u0027Gulf of Mexico\u0027, \u0027USA\u0027, 2000, 9876.54);", + "sql": "DELETE FROM exploration_data WHERE discovery_year \u003c 2000;", + "sql_explanation": "This query deletes all records from the exploration_data table where the discovery_year is less than 2000." +}, { + "id": "5608", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Maximum production rate (bpd) for wells in the Beaufort Sea.", + "sql_context": "CREATE TABLE well_production_beaufort_sea (id INT, rate FLOAT); INSERT INTO well_production_beaufort_sea (id, rate) VALUES (1, 4000), (2, 8000), (3, 9000);", + "sql": "SELECT MAX(rate) FROM well_production_beaufort_sea;", + "sql_explanation": "This query calculates the maximum production rate (bpd) for wells in the Beaufort Sea by finding the maximum \u0027rate\u0027 in the well_production_beaufort_sea table." +}, { + "id": "5674", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records in the \u0027oil_rig\u0027 table where the \u0027company\u0027 is \u0027XYZ Inc.\u0027", + "sql_context": "CREATE TABLE oil_rig (id INT, company VARCHAR(255), location VARCHAR(255), status VARCHAR(255));", + "sql": "DELETE FROM oil_rig WHERE company \u003d \u0027XYZ Inc.\u0027;", + "sql_explanation": "This SQL query deletes all records in the \u0027oil_rig\u0027 table where the \u0027company\u0027 is \u0027XYZ Inc.\u0027" +}, { + "id": "123", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the number of co-owned properties in New York City by gender.", + "sql_context": "CREATE TABLE NYC_Properties (PropertyID INT, Owner1 VARCHAR(255), Owner2 VARCHAR(255), Owner1Gender VARCHAR(10), Owner2Gender VARCHAR(10)); INSERT INTO NYC_Properties (PropertyID, Owner1, Owner2, Owner1Gender, Owner2Gender) VALUES (1, \u0027John\u0027, \u0027Alex\u0027, \u0027Male\u0027, \u0027Female\u0027), (2, \u0027Lisa\u0027, \u0027Ben\u0027, \u0027Female\u0027, \u0027Male\u0027), (3, \u0027Mike\u0027, \u0027Steve\u0027, \u0027Male\u0027, \u0027Male\u0027), (4, \u0027Sarah\u0027, \u0027Dan\u0027, \u0027Female\u0027, \u0027Male\u0027);", + "sql": "SELECT SUM(CASE WHEN Owner1Gender \u003d \u0027Male\u0027 THEN 1 ELSE 0 END) + SUM(CASE WHEN Owner2Gender \u003d \u0027Male\u0027 THEN 1 ELSE 0 END) AS MaleProperties, SUM(CASE WHEN Owner1Gender \u003d \u0027Female\u0027 THEN 1 ELSE 0 END) + SUM(CASE WHEN Owner2Gender \u003d \u0027Female\u0027 THEN 1 ELSE 0 END) AS FemaleProperties FROM NYC_Properties;", + "sql_explanation": "The SQL query calculates the number of co-owned properties in New York City by gender in the NYC_Properties table using the CASE statement, SUM() function, and filters the rows based on the Owner1Gender and Owner2Gender columns with the SELECT and FROM clauses." +}, { + "id": "1102", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of properties and the average property price for properties with a housing affordability score greater than 80 in the \"GreenCity\" schema?", + "sql_context": "CREATE TABLE Property (id INT, affordability_score INT, price FLOAT, city VARCHAR(20)); INSERT INTO Property (id, affordability_score, price, city) VALUES (1, 85, 500000, \u0027GreenCity\u0027), (2, 70, 700000, \u0027GreenCity\u0027), (3, 90, 300000, \u0027GreenCity\u0027);", + "sql": "SELECT COUNT(Property.id) AS total_properties, AVG(Property.price) AS avg_property_price FROM Property WHERE Property.city \u003d \u0027GreenCity\u0027 AND Property.affordability_score \u003e 80;", + "sql_explanation": "This query filters the Property table to only include rows where the city is \"GreenCity\" and the housing affordability score is greater than 80. It then calculates the total number of properties and the average property price for the filtered results." +}, { + "id": "2673", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average square footage of co-living units in the city of Austin?", + "sql_context": "CREATE TABLE properties (id INT, city VARCHAR(50), square_footage FLOAT); INSERT INTO properties (id, city, square_footage) VALUES (1, \u0027Austin\u0027, 1200.0), (2, \u0027Austin\u0027, 1500.0), (3, \u0027Seattle\u0027, 1800.0);", + "sql": "SELECT AVG(square_footage) FROM properties WHERE city \u003d \u0027Austin\u0027 AND square_footage IS NOT NULL AND city IS NOT NULL;", + "sql_explanation": "The SQL query calculates the average square footage of co-living units in the city of Austin. It does this by using the AVG function on the square_footage column, filtering the data by city using the WHERE clause." +}, { + "id": "2906", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a new table named \u0027sustainable_buildings\u0027 to store information about green-certified commercial properties, including the building_id, certification_body, and certification_date.", + "sql_context": "CREATE TABLE sustainable_buildings (building_id INT, certification_body VARCHAR(255), certification_date DATE);", + "sql": "CREATE TABLE sustainable_buildings (building_id INT, certification_body VARCHAR(255), certification_date DATE);", + "sql_explanation": "The SQL query creates a new table called \u0027sustainable_buildings\u0027 with three columns: building_id, certification_body, and certification_date." +}, { + "id": "3057", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of properties in the city of San Antonio that are both green-certified and co-owned?", + "sql_context": "CREATE TABLE properties (id INT, city VARCHAR(20), green_certified BOOLEAN, co_owned BOOLEAN); INSERT INTO properties (id, city, green_certified, co_owned) VALUES (1, \u0027San Antonio\u0027, true, true), (2, \u0027San Antonio\u0027, false, false), (3, \u0027San Antonio\u0027, true, false);", + "sql": "SELECT COUNT(*) FROM properties WHERE city \u003d \u0027San Antonio\u0027 AND green_certified \u003d true AND co_owned \u003d true;", + "sql_explanation": "This SQL query counts the total number of properties in the city of San Antonio that are both green-certified and co-owned. It does so by filtering the properties table for records where the city is San Antonio, the property is green-certified, and the property is co-owned, then counting the number of records." +}, { + "id": "3146", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a table named \"property_coowners\" with columns \"property_id\", \"coowner_name\", and \"coowner_share\"", + "sql_context": "CREATE TABLE property_coowners (property_id INT, coowner_name VARCHAR(255), coowner_share DECIMAL(5,2));", + "sql": "CREATE TABLE property_coowners (property_id INT, coowner_name VARCHAR(255), coowner_share DECIMAL(5,2));", + "sql_explanation": "Creating a new table called \"property_coowners\" with three columns: \"property_id\" (integer), \"coowner_name\" (string of maximum length 255), and \"coowner_share\" (decimal number with two digits after the decimal point)." +}, { + "id": "3149", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum price of a property in a sustainable urbanism project in San Francisco?", + "sql_context": "CREATE TABLE sustainable_urbanism (property_id INT, city VARCHAR(50), price INT, sustainable_project BOOLEAN); INSERT INTO sustainable_urbanism (property_id, city, price, sustainable_project) VALUES (1, \u0027San Francisco\u0027, 900000, TRUE), (2, \u0027Portland\u0027, 400000, FALSE), (3, \u0027San Francisco\u0027, 1000000, TRUE), (4, \u0027Seattle\u0027, 700000, FALSE);", + "sql": "SELECT MIN(price) FROM sustainable_urbanism WHERE city \u003d \u0027San Francisco\u0027 AND sustainable_project \u003d TRUE;", + "sql_explanation": "The SQL query calculates the minimum price of a property in a sustainable urbanism project in San Francisco. It does this by filtering the sustainable_urbanism table for rows where the city is \u0027San Francisco\u0027 and the sustainable_project is TRUE, and then calculating the minimum price of those properties." +}, { + "id": "3281", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of properties in areas with green building certifications and sustainability ratings above 70?", + "sql_context": "CREATE TABLE properties (id INT, green_building_certified BOOLEAN, sustainability_rating FLOAT); INSERT INTO properties (id, green_building_certified, sustainability_rating) VALUES (1, true, 80.5), (2, false, 60.0);", + "sql": "SELECT COUNT(*) FROM properties WHERE green_building_certified \u003d true AND sustainability_rating \u003e 70;", + "sql_explanation": "This query counts the number of rows in the properties table where the property has a green building certification (green_building_certified \u003d true) and a sustainability rating above 70 (sustainability_rating \u003e 70)." +}, { + "id": "3729", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average size in square feet of properties in urban areas, owned by women?", + "sql_context": "CREATE TABLE property (id INT, size_sqft INT, co_owner VARCHAR(255), area VARCHAR(255)); INSERT INTO property (id, size_sqft, co_owner, area) VALUES (1, 1200, \u0027Maria\u0027, \u0027urban\u0027), (2, 800, \u0027John\u0027, \u0027rural\u0027), (3, 1500, \u0027Lisa\u0027, \u0027urban\u0027), (4, 900, \u0027James\u0027, \u0027urban\u0027);", + "sql": "SELECT AVG(size_sqft) FROM property WHERE co_owner IN (\u0027Maria\u0027, \u0027Lisa\u0027) AND area \u003d \u0027urban\u0027;", + "sql_explanation": "The SQL query calculates the average size (in square feet) of properties in urban areas, owned by women (Maria and Lisa) by filtering the property table where the co_owner column matches the names and area is urban, then applying the AVG function to the size_sqft column." +}, { + "id": "3835", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many co-owned properties are there in the state of Texas with a size greater than 2000 square feet?", + "sql_context": "CREATE TABLE property (id INT, size INT, state VARCHAR(20), co_owned BOOLEAN);", + "sql": "SELECT COUNT(*) FROM property WHERE state \u003d \u0027Texas\u0027 AND co_owned \u003d TRUE AND size \u003e 2000;", + "sql_explanation": "1. Filter rows from the property table for those located in the state of Texas. 2. Filter further for rows where co_owned is TRUE. 3. Filter further for rows where size is greater than 2000 square feet. 4. Count the number of rows from the filtered dataset." +}, { + "id": "3839", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the sum of the total square footage of properties in the \u0027sustainable_urbanism\u0027 view that are located in the city of \u0027San Francisco\u0027?", + "sql_context": "CREATE VIEW sustainable_urbanism AS SELECT properties.id, properties.city, SUM(properties.square_footage) as total_square_footage FROM properties JOIN sustainable_developments ON properties.id \u003d sustainable_developments.id GROUP BY properties.id, properties.city; INSERT INTO properties (id, city, square_footage) VALUES (1, \u0027Austin\u0027, 1800.0), (2, \u0027San Francisco\u0027, 2200.0), (3, \u0027Seattle\u0027, 1500.0); INSERT INTO sustainable_developments (id, property_name, low_income_area) VALUES (1, \u0027Green Heights\u0027, true), (2, \u0027Eco Estates\u0027, false), (3, \u0027Solar Vista\u0027, true);", + "sql": "SELECT SUM(total_square_footage) FROM sustainable_urbanism WHERE city \u003d \u0027San Francisco\u0027;", + "sql_explanation": "This query calculates the sum of the total square footage of properties in the \u0027sustainable_urbanism\u0027 view that are located in the city of \u0027San Francisco\u0027 by using the SUM function on the total_square_footage column, filtering the data by using the WHERE clause." +}, { + "id": "3956", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the green space percentage for the GreenVille area in the SustainableUrbanism table.", + "sql_context": "CREATE TABLE SustainableUrbanism (area TEXT, green_space_percentage FLOAT, public_transportation_score INT, walkability_score INT); INSERT INTO SustainableUrbanism (area, green_space_percentage, public_transportation_score, walkability_score) VALUES (\u0027Eastside\u0027, 0.3, 8, 9), (\u0027Westside\u0027, 0.5, 7, 8), (\u0027GreenVille\u0027, 0.4, 8, 8);", + "sql": "UPDATE SustainableUrbanism SET green_space_percentage \u003d 0.5 WHERE area \u003d \u0027GreenVille\u0027;", + "sql_explanation": "This query updates the green_space_percentage column in the SustainableUrbanism table to have a value of 0.5 for rows where the area is \u0027GreenVille\u0027." +}, { + "id": "4120", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records from the \u0027housing_subsidies\u0027 table where the policy has expired or is not yet in effect.", + "sql_context": "CREATE TABLE housing_subsidies (id INT, policy_name TEXT, start_date DATE, end_date DATE);", + "sql": "DELETE FROM housing_subsidies WHERE end_date \u003c CURDATE() OR start_date \u003e CURDATE();", + "sql_explanation": "This query deletes records from the \u0027housing_subsidies\u0027 table where the \u0027end_date\u0027 is before the current date or the \u0027start_date\u0027 is after the current date." +}, { + "id": "4496", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of properties in the city of Seattle with inclusive housing policies?", + "sql_context": "CREATE TABLE properties (id INT, property_id INT, city TEXT, inclusive BOOLEAN); INSERT INTO properties (id, property_id, city, inclusive) VALUES (1, 101, \u0027Austin\u0027, FALSE), (2, 102, \u0027Seattle\u0027, TRUE), (3, 103, \u0027Seattle\u0027, TRUE);", + "sql": "SELECT COUNT(*) FROM properties WHERE city \u003d \u0027Seattle\u0027 AND inclusive \u003d TRUE;", + "sql_explanation": "This query calculates the total number of properties in Seattle with inclusive housing policies by using the COUNT function on all rows (*), filtering for rows where the city is \u0027Seattle\u0027 and the inclusive column is TRUE." +}, { + "id": "4565", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of units in green-certified buildings?", + "sql_context": "CREATE TABLE green_buildings (building_id INT, num_units INT, is_green_certified BOOLEAN); INSERT INTO green_buildings (building_id, num_units, is_green_certified) VALUES (1, 20, true), (2, 30, false), (3, 40, true), (4, 50, true), (5, 60, false);", + "sql": "SELECT SUM(num_units) FROM green_buildings WHERE is_green_certified \u003d true;", + "sql_explanation": "This query calculates the total number of units in green-certified buildings. It uses the SUM function to find the sum of the num_units column for rows where the is_green_certified column is true." +}, { + "id": "4779", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average rent for properties built after 2015 in Tokyo?", + "sql_context": "CREATE TABLE units (id INT, city VARCHAR, build_year INT, rent DECIMAL);", + "sql": "SELECT AVG(rent) FROM units WHERE city \u003d \u0027Tokyo\u0027 AND build_year \u003e 2015;", + "sql_explanation": "This SQL query calculates the average rent (AVG(rent)) for properties (id) in Tokyo (city \u003d \u0027Tokyo\u0027) that were built after 2015 (build_year \u003e 2015)." +}, { + "id": "4813", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum size of green buildings in the \u0027green_buildings\u0027 table?", + "sql_context": "CREATE TABLE green_buildings (id INT, size FLOAT, certification VARCHAR(255), PRIMARY KEY (id)); INSERT INTO green_buildings (id, size, certification) VALUES (1, 1200.0, \u0027LEED\u0027), (2, 1800.0, \u0027BREEAM\u0027), (3, 1500.0, \u0027WELL\u0027);", + "sql": "SELECT MAX(size) FROM green_buildings WHERE certification IS NOT NULL;", + "sql_explanation": "This SQL query retrieves the maximum size of green buildings by first filtering rows with a certification (certification IS NOT NULL) and then applying the MAX function to the \u0027size\u0027 column." +}, { + "id": "4858", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average co-ownership price per square foot in the Bay Area?", + "sql_context": "CREATE TABLE bay_area_prop (id INT, address TEXT, price FLOAT, size FLOAT, co_ownership BOOLEAN); INSERT INTO bay_area_prop (id, address, price, size, co_ownership) VALUES (1, \u0027123 Main St\u0027, 800000, 1500, TRUE), (2, \u0027456 Oak St\u0027, 1000000, 2000, FALSE);", + "sql": "SELECT AVG(price / size) FROM bay_area_prop WHERE co_ownership \u003d TRUE;", + "sql_explanation": "Calculate the average co-ownership price per square foot in the Bay Area by dividing the price by the size for each co-ownership property, then finding the average of those values." +}, { + "id": "4895", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum size of a sustainable property in Vancouver, BC?", + "sql_context": "CREATE TABLE SustainableProperties (Id INT, City VARCHAR(50), Size FLOAT); INSERT INTO SustainableProperties (Id, City, Size) VALUES (1, \u0027Vancouver\u0027, 1800.0), (2, \u0027Seattle\u0027, 1500.2), (3, \u0027Vancouver\u0027, 2000.5), (4, \u0027Austin\u0027, 1000.0);", + "sql": "SELECT MAX(Size) FROM SustainableProperties WHERE City \u003d \u0027Vancouver\u0027;", + "sql_explanation": "This query calculates the maximum size of a sustainable property in Vancouver, BC by selecting the SIZE column from the SustainableProperties table, filtering the data where the CITY column is equal to \u0027Vancouver\u0027, and then applying the MAX function to the SIZE column." +}, { + "id": "4907", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total area of inclusive housing units in Oakland?", + "sql_context": "CREATE TABLE inclusive_housing (id INT, city VARCHAR(20), total_area FLOAT); INSERT INTO inclusive_housing (id, city, total_area) VALUES (1, \u0027Oakland\u0027, 5000), (2, \u0027Berkeley\u0027, 4000), (3, \u0027San Francisco\u0027, 7000), (4, \u0027San Jose\u0027, 8000), (5, \u0027Los Angeles\u0027, 10000);", + "sql": "SELECT SUM(total_area) FROM inclusive_housing WHERE city \u003d \u0027Oakland\u0027;", + "sql_explanation": "The SQL query calculates the total area of inclusive housing units in Oakland by using the SUM function on the total_area column, and filtering the data with a WHERE clause to only include rows with the city value of \u0027Oakland\u0027." +}, { + "id": "4909", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the total property value owned by a specific individual in San Francisco.", + "sql_context": "CREATE TABLE SanFrancisco_Properties (PropertyID INT, Owner VARCHAR(255), Price INT); INSERT INTO SanFrancisco_Properties (PropertyID, Owner, Price) VALUES (1, \u0027David\u0027, 900000), (2, \u0027Danielle\u0027, 800000), (3, \u0027David\u0027, 700000), (4, \u0027Danielle\u0027, 600000);", + "sql": "SELECT SUM(Price) FROM SanFrancisco_Properties WHERE Owner \u003d \u0027David\u0027;", + "sql_explanation": "The SQL query calculates the total property value owned by a specific individual, \u0027David\u0027, in the SanFrancisco_Properties table using the SUM() function and filters the rows based on the Owner column with the WHERE clause." +}, { + "id": "5047", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum property price for sustainable urbanism properties in Texas?", + "sql_context": "CREATE TABLE sustainable_urbanism (id INT, price FLOAT, state VARCHAR(20)); INSERT INTO sustainable_urbanism (id, price, state) VALUES (1, 700000, \u0027Texas\u0027), (2, 800000, \u0027Texas\u0027), (3, 900000, \u0027Texas\u0027);", + "sql": "SELECT MAX(price) FROM sustainable_urbanism WHERE state \u003d \u0027Texas\u0027;", + "sql_explanation": "This query calculates the maximum property price for sustainable urbanism properties in Texas using the MAX function on the price column and filtering rows using the WHERE clause to only consider rows with the state \u0027Texas\u0027." +}, { + "id": "5049", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average property price for co-ownership properties in California?", + "sql_context": "CREATE TABLE co_ownership (id INT, price FLOAT, location VARCHAR(20)); INSERT INTO co_ownership (id, price, location) VALUES (1, 500000, \u0027California\u0027), (2, 600000, \u0027California\u0027);", + "sql": "SELECT AVG(price) FROM co_ownership WHERE location \u003d \u0027California\u0027;", + "sql_explanation": "This query calculates the average price of co-ownership properties in California by using the AVG function on the price column, and filtering rows using the WHERE clause to only consider rows with the location \u0027California\u0027." +}, { + "id": "5213", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which cities have affordable housing units with occupancy rates below 80%?", + "sql_context": "CREATE TABLE AffordableHousing (UnitID INT, City VARCHAR(50), OccupancyRate DECIMAL(4,2)); INSERT INTO AffordableHousing (UnitID, City, OccupancyRate) VALUES (1, \u0027San Francisco\u0027, 0.85), (2, \u0027New York\u0027, 0.92), (3, \u0027Los Angeles\u0027, 0.76);", + "sql": "SELECT City FROM AffordableHousing WHERE OccupancyRate \u003c 0.80;", + "sql_explanation": "The SQL query selects City from the AffordableHousing table where the OccupancyRate is below 80%." +}, { + "id": "5222", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average property price for green-certified buildings in the city of Seattle?", + "sql_context": "CREATE TABLE green_buildings (id INT, price FLOAT, city VARCHAR(20)); INSERT INTO green_buildings (id, price, city) VALUES (1, 750000, \u0027Seattle\u0027), (2, 850000, \u0027Seattle\u0027), (3, 650000, \u0027Portland\u0027);", + "sql": "SELECT AVG(price) FROM green_buildings WHERE city \u003d \u0027Seattle\u0027;", + "sql_explanation": "The SQL query calculates the average price of green-certified buildings in Seattle by using the AVG function on the price column, and filtering the data with a WHERE clause to only include rows with the city value of \u0027Seattle\u0027." +}, { + "id": "5225", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum property price for buildings in the UrbanSustainability schema with a green roof?", + "sql_context": "CREATE TABLE UrbanSustainability.GreenRoofBuildings (id INT, price FLOAT); INSERT INTO UrbanSustainability.GreenRoofBuildings (id, price) VALUES (3, 500000.0), (4, 700000.0);", + "sql": "SELECT MAX(price) FROM UrbanSustainability.GreenRoofBuildings;", + "sql_explanation": "This query calculates the maximum property price for buildings with green roofs in the GreenRoofBuildings table of the UrbanSustainability schema." +}, { + "id": "5375", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Alter the SustainableUrbanismByYear table to include a date column", + "sql_context": "CREATE TABLE SustainableUrbanismByYear (id INT PRIMARY KEY, city VARCHAR(50), state VARCHAR(50), initiative VARCHAR(100), year INT, date DATE);", + "sql": "ALTER TABLE SustainableUrbanismByYear ADD COLUMN date DATE;", + "sql_explanation": "This alters the SustainableUrbanismByYear table by adding a new column called \u0027date\u0027 with the data type DATE." +}, { + "id": "5445", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average property size for co-owned properties in the city of Austin?", + "sql_context": "CREATE TABLE co_ownership (id INT, property_id INT, owner TEXT, city TEXT, size INT); INSERT INTO co_ownership (id, property_id, owner, city, size) VALUES (1, 101, \u0027Alice\u0027, \u0027Austin\u0027, 1200), (2, 101, \u0027Bob\u0027, \u0027Austin\u0027, 1200), (3, 102, \u0027Carol\u0027, \u0027Seattle\u0027, 900);", + "sql": "SELECT AVG(size) FROM co_ownership WHERE city \u003d \u0027Austin\u0027;", + "sql_explanation": "This query calculates the average size of co-owned properties in Austin by using the AVG function on the size column, filtering for rows where the city is \u0027Austin\u0027." +}, { + "id": "5616", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Alter the InclusiveHousing table to include a date column", + "sql_context": "CREATE TABLE InclusiveHousing (id INT PRIMARY KEY, city VARCHAR(50), state VARCHAR(50), policy VARCHAR(100), date DATE);", + "sql": "ALTER TABLE InclusiveHousing ADD COLUMN date DATE;", + "sql_explanation": "This alters the InclusiveHousing table by adding a new column called \u0027date\u0027 with the data type DATE." +}, { + "id": "5747", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the maximum property price in the inclusive housing program?", + "sql_context": "CREATE TABLE inclusive_housing (program_id INT, property_id INT, price DECIMAL(10,2)); INSERT INTO inclusive_housing (program_id, property_id, price) VALUES (1, 1001, 300000.00), (1, 1002, 350000.00), (2, 2001, 400000.00);", + "sql": "SELECT MAX(price) FROM inclusive_housing;", + "sql_explanation": "Find the maximum price by ordering all prices and picking the last one." +}, { + "id": "1958", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of visitors to the Museum of Modern Art in New York from January to June 2022?", + "sql_context": "CREATE TABLE MoMAVisitors (VisitorID int, VisitorName varchar(100), VisitDate date, MuseumName varchar(100)); INSERT INTO MoMAVisitors (VisitorID, VisitorName, VisitDate, MuseumName) VALUES (1, \u0027Visitor A\u0027, \u00272022-01-01\u0027, \u0027Museum of Modern Art\u0027), (2, \u0027Visitor B\u0027, \u00272022-03-01\u0027, \u0027Museum of Modern Art\u0027), (3, \u0027Visitor C\u0027, \u00272022-07-01\u0027, \u0027Museum of Modern Art\u0027);", + "sql": "SELECT COUNT(*) FROM MoMAVisitors WHERE MuseumName \u003d \u0027Museum of Modern Art\u0027 AND MONTH(VisitDate) BETWEEN 1 AND 6 AND YEAR(VisitDate) \u003d 2022;", + "sql_explanation": "This query filters the MoMAVisitors table by MuseumName, the month of VisitDate, and the year of VisitDate, counting the number of records that meet the criteria and represent the total number of visitors to the Museum of Modern Art in New York from January to June 2022." +}, { + "id": "2669", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of museum visitors in the last 6 months from Tokyo, Japan.", + "sql_context": "CREATE TABLE museum_visitors (id INT, visitor_name VARCHAR(255), visit_date DATE); INSERT INTO museum_visitors (id, visitor_name, visit_date) VALUES (\u0027John Smith\u0027, \u00272022-01-01\u0027), (\u0027Jane Doe\u0027, \u00272022-01-02\u0027), (\u0027Mike Johnson\u0027, \u00272022-07-01\u0027);", + "sql": "SELECT COUNT(*) FROM museum_visitors WHERE visit_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 6 MONTH) AND city \u003d \u0027Tokyo\u0027;", + "sql_explanation": "This query selects the count of rows from the museum_visitors table, and filters the visit date to be greater than or equal to the current date minus 6 months and filters the city to Tokyo." +}, { + "id": "3137", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many museums are there in Tokyo, Japan?", + "sql_context": "CREATE TABLE Museums (id INT PRIMARY KEY, name VARCHAR(100), city VARCHAR(100), country VARCHAR(50)); INSERT INTO Museums (id, name, city, country) VALUES (1, \u0027Tokyo National Museum\u0027, \u0027Tokyo\u0027, \u0027Japan\u0027); INSERT INTO Museums (id, name, city, country) VALUES (2, \u0027Mori Art Museum\u0027, \u0027Tokyo\u0027, \u0027Japan\u0027);", + "sql": "SELECT COUNT(*) as museum_count FROM Museums WHERE Museums.city \u003d \u0027Tokyo\u0027 AND Museums.country \u003d \u0027Japan\u0027;", + "sql_explanation": "This SQL query filters for rows in the Museums table where the city is Tokyo and the country is Japan. Then, it counts the number of rows that meet this criteria." +}, { + "id": "3382", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of female artists in the \u0027ArtistDemographics\u0027 table?", + "sql_context": "CREATE TABLE ArtistDemographics (ArtistID INT, ArtistAge INT, ArtistGender VARCHAR(10)); INSERT INTO ArtistDemographics (ArtistID, ArtistAge, ArtistGender) VALUES (1, 37, \u0027Female\u0027), (2, 56, \u0027Male\u0027), (3, 81, \u0027Non-binary\u0027), (4, 62, \u0027Female\u0027), (5, 28, \u0027Female\u0027);", + "sql": "SELECT AVG(ArtistAge) AS AvgFemaleArtistAge FROM ArtistDemographics WHERE ArtistGender \u003d \u0027Female\u0027;", + "sql_explanation": "This SQL query calculates the average age of female artists by using the AVG function on the ArtistAge column, filtering the rows using the WHERE clause to only consider artists who identify as female." +}, { + "id": "3477", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of artworks in the \u0027sculpture\u0027 category created by artists from Europe?", + "sql_context": "CREATE TABLE artworks (id INT, name VARCHAR(255), year INT, artist_name VARCHAR(255), artist_birthplace VARCHAR(255), category VARCHAR(255)); INSERT INTO artworks (id, name, year, artist_name, artist_birthplace, category) VALUES (1, \u0027Painting\u0027, 1920, \u0027John\u0027, \u0027England\u0027, \u0027painting\u0027), (2, \u0027Sculpture\u0027, 1930, \u0027Sara\u0027, \u0027France\u0027, \u0027sculpture\u0027), (3, \u0027Print\u0027, 1940, \u0027Alex\u0027, \u0027Germany\u0027, \u0027print\u0027);", + "sql": "SELECT COUNT(*) FROM artworks WHERE category \u003d \u0027sculpture\u0027 AND artist_birthplace LIKE \u0027Europe%\u0027;", + "sql_explanation": "This SQL query counts the total number of artworks in the \u0027sculpture\u0027 category created by artists from Europe. It does this by selecting the count of all rows where the \u0027category\u0027 column is equal to \u0027sculpture\u0027 and the \u0027artist_birthplace\u0027 starts with \u0027Europe\u0027 (using the LIKE operator with a wildcard \u0027%\u0027)." +}, { + "id": "3511", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average ticket price for dance events in the cities of New York and Chicago?", + "sql_context": "CREATE TABLE events (name VARCHAR(255), location VARCHAR(255), category VARCHAR(255), price DECIMAL(5,2)); INSERT INTO events (name, location, category, price) VALUES (\u0027Swan Lake\u0027, \u0027Chicago\u0027, \u0027Dance\u0027, 95.00), (\u0027The Nutcracker\u0027, \u0027New York\u0027, \u0027Dance\u0027, 125.00), (\u0027Hamilton\u0027, \u0027Chicago\u0027, \u0027Theatre\u0027, 225.00);", + "sql": "SELECT AVG(price) FROM events WHERE location IN (\u0027New York\u0027, \u0027Chicago\u0027) AND category \u003d \u0027Dance\u0027;", + "sql_explanation": "This query calculates the average price from the events table, filters for the cities New York and Chicago and Dance category, and returns the average ticket price for dance events in these cities." +}, { + "id": "3549", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many art pieces were created by female artists from the \u0027Impressionism\u0027 movement?", + "sql_context": "CREATE TABLE art_pieces (piece_id INT, artist_name VARCHAR(50), artist_gender VARCHAR(10), artist_ethnicity VARCHAR(20), movement VARCHAR(20)); INSERT INTO art_pieces (piece_id, artist_name, artist_gender, artist_ethnicity, movement) VALUES (1, \u0027Claude Monet\u0027, \u0027Male\u0027, \u0027French\u0027, \u0027Impressionism\u0027); INSERT INTO art_pieces (piece_id, artist_name, artist_gender, artist_ethnicity, movement) VALUES (2, \u0027Mary Cassatt\u0027, \u0027Female\u0027, \u0027American\u0027, \u0027Impressionism\u0027);", + "sql": "SELECT COUNT(*) FROM art_pieces WHERE artist_gender \u003d \u0027Female\u0027 AND movement \u003d \u0027Impressionism\u0027;", + "sql_explanation": "This query counts the number of art pieces created by female artists in the \u0027Impressionism\u0027 movement by selecting all records with an artist_gender of \u0027Female\u0027 and movement of \u0027Impressionism\u0027, and then returning the count of these records." +}, { + "id": "3558", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 5 most attended events in the \u0027dance\u0027 category.", + "sql_context": "CREATE TABLE events (id INT, name VARCHAR(255), date DATE, category VARCHAR(255), attendance INT); INSERT INTO events (id, name, date, category, attendance) VALUES (1, \u0027Ballet\u0027, \u00272022-06-01\u0027, \u0027dance\u0027, 500), (2, \u0027Flamenco\u0027, \u00272022-06-02\u0027, \u0027dance\u0027, 400);", + "sql": "SELECT name, attendance FROM events WHERE category \u003d \u0027dance\u0027 ORDER BY attendance DESC LIMIT 5;", + "sql_explanation": "List the top 5 most attended events in the \u0027dance\u0027 category by using the ORDER BY clause and filtering the data based on the category." +}, { + "id": "3669", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total attendance at cultural events in \u0027New York\u0027 in 2021?", + "sql_context": "CREATE TABLE CulturalEvents (event_name TEXT, location TEXT, date DATE, attendance INTEGER); INSERT INTO CulturalEvents (event_name, location, date, attendance) VALUES (\u0027Event 1\u0027, \u0027New York\u0027, \u00272021-01-01\u0027, 500), (\u0027Event 2\u0027, \u0027New York\u0027, \u00272021-02-01\u0027, 700);", + "sql": "SELECT SUM(attendance) FROM CulturalEvents WHERE location \u003d \u0027New York\u0027 AND YEAR(date) \u003d 2021", + "sql_explanation": "This query calculates the total attendance at cultural events in New York in 2021. It uses the SUM() function to add up the attendance values for rows that meet the conditions and filters the CulturalEvents table for rows with \u0027New York\u0027 as the location and year of the date equal to 2021." +}, { + "id": "3802", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which museums in Paris and Rome have more than 5000 artworks in their collections?", + "sql_context": "CREATE TABLE museums (id INT, name VARCHAR(50), city VARCHAR(50), artworks_count INT); INSERT INTO museums (id, name, city, artworks_count) VALUES (1, \u0027Louvre Museum\u0027, \u0027Paris\u0027, 55000); INSERT INTO museums (id, name, city, artworks_count) VALUES (2, \u0027Vatican Museums\u0027, \u0027Rome\u0027, 70000);", + "sql": "SELECT name, city FROM museums WHERE city IN (\u0027Paris\u0027, \u0027Rome\u0027) AND artworks_count \u003e 5000;", + "sql_explanation": "This query selects the name and city columns from the museums table for rows where the city is either \u0027Paris\u0027 or \u0027Rome\u0027 and the artworks_count is greater than 5000." +}, { + "id": "3889", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the \u0027price\u0027 of all artworks in the \u0027Impressionist\u0027 style that were created before 1900 by increasing it by 10%.", + "sql_context": "CREATE TABLE artworks (artwork_id INT, title VARCHAR(255), style VARCHAR(64), year INT, price DECIMAL(10, 2));", + "sql": "UPDATE artworks SET price \u003d price * 1.10 WHERE style \u003d \u0027Impressionist\u0027 AND year \u003c 1900;", + "sql_explanation": "The SQL query first selects all artworks from the \u0027artworks\u0027 table where the \u0027style\u0027 is \u0027Impressionist\u0027 and the \u0027year\u0027 is before 1900. It then updates the \u0027price\u0027 of these artworks by increasing it by 10%." +}, { + "id": "4136", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many artists in the database are from Asia?", + "sql_context": "CREATE TABLE artists (id INT, name TEXT, country TEXT); INSERT INTO artists (id, name, country) VALUES (1, \u0027Artist A\u0027, \u0027China\u0027), (2, \u0027Artist B\u0027, \u0027Japan\u0027), (3, \u0027Artist C\u0027, \u0027France\u0027);", + "sql": "SELECT COUNT(*) FROM artists WHERE country IN (\u0027China\u0027, \u0027Japan\u0027, \u0027India\u0027, \u0027Korea\u0027);", + "sql_explanation": "This SQL query counts the number of artists in the database who are from Asia. It does this by using the COUNT function to count the number of rows in the \u0027artists\u0027 table that have a \u0027country\u0027 value of \u0027China\u0027, \u0027Japan\u0027, \u0027India\u0027, or \u0027Korea\u0027. The IN operator is used to specify the list of countries to consider." +}, { + "id": "4313", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of art pieces in the Museum of Modern Art in NY?", + "sql_context": "CREATE TABLE art_pieces (museum VARCHAR(255), quantity INT); INSERT INTO art_pieces (museum, quantity) VALUES (\u0027Museum of Modern Art, NY\u0027, 3025), (\u0027Guggenheim Museum, NY\u0027, 1500);", + "sql": "SELECT SUM(quantity) FROM art_pieces WHERE museum \u003d \u0027Museum of Modern Art, NY\u0027;", + "sql_explanation": "This query calculates the total number of art pieces in the Museum of Modern Art in NY by selecting the sum of the quantity column from the art_pieces table where the museum column is \u0027Museum of Modern Art, NY\u0027." +}, { + "id": "4820", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue generated from all events in the year 2020?", + "sql_context": "CREATE SCHEMA culture; CREATE TABLE events (event_id INT, event_name VARCHAR(255), event_date DATE, revenue DECIMAL(10,2)); INSERT INTO events (event_id, event_name, event_date, revenue) VALUES (1, \u0027Concert\u0027, \u00272020-02-01\u0027, 5000.00), (2, \u0027Theater Play\u0027, \u00272020-05-15\u0027, 8000.00), (3, \u0027Art Exhibit\u0027, \u00272019-12-31\u0027, 3000.00);", + "sql": "SELECT SUM(revenue) FROM culture.events WHERE YEAR(event_date) \u003d 2020;", + "sql_explanation": "The SQL query calculates the total revenue for all events in the year 2020 by summing the revenue column where the event_date\u0027s year is equal to 2020." +}, { + "id": "5039", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average attendance at musical events in the \u0027culture_center\u0027 table?", + "sql_context": "CREATE TABLE culture_center (id INT, name VARCHAR(50), location VARCHAR(50), type VARCHAR(50), capacity INT, attendance INT);", + "sql": "SELECT AVG(attendance) FROM culture_center WHERE type \u003d \u0027Musical\u0027;", + "sql_explanation": "The SQL query calculates the average attendance at musical events by getting the mean value of the \u0027attendance\u0027 column in the \u0027culture_center\u0027 table where the \u0027type\u0027 is \u0027Musical\u0027." +}, { + "id": "5183", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total value of art collections in New York, NY.", + "sql_context": "CREATE TABLE art_collections (id INT, collection_name VARCHAR(255), city VARCHAR(255), value DECIMAL(10,2)); INSERT INTO art_collections (id, collection_name, city, value) VALUES (1, \u0027The Met Collection\u0027, \u0027New York\u0027, 50000000.00), (2, \u0027The Guggenheim Collection\u0027, \u0027New York\u0027, 20000000.00), (3, \u0027The MoMA Collection\u0027, \u0027New York\u0027, 30000000.00);", + "sql": "SELECT SUM(value) FROM art_collections WHERE city \u003d \u0027New York\u0027;", + "sql_explanation": "This query selects the sum of value from the art_collections table, and filters the city to New York." +}, { + "id": "5320", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total number of art pieces in European museums.", + "sql_context": "CREATE TABLE museums (id INT, name VARCHAR(50), region VARCHAR(50), art_pieces INT); INSERT INTO museums (id, name, region, art_pieces) VALUES (1, \u0027Louvre Museum\u0027, \u0027Europe\u0027, 5000), (2, \u0027British Museum\u0027, \u0027Europe\u0027, 7000), (3, \u0027Tokyo National Museum\u0027, \u0027Asia\u0027, 8000);", + "sql": "SELECT SUM(art_pieces) FROM museums WHERE region \u003d \u0027Europe\u0027;", + "sql_explanation": "This query calculates the total number of art pieces in European museums by selecting the \u0027art_pieces\u0027 column for all records where the \u0027region\u0027 column is equal to \u0027Europe\u0027 and applying the SUM() function to the result." +}, { + "id": "5371", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the total weight of pieces by artist \u0027Picasso\u0027 in the \u0027Sculpture\u0027 table?", + "sql_context": "CREATE TABLE Sculpture (id INT PRIMARY KEY, name VARCHAR(50), artist VARCHAR(50), height DECIMAL(5,2), weight DECIMAL(5,2));", + "sql": "SELECT SUM(weight) FROM Sculpture WHERE artist \u003d \u0027Picasso\u0027;", + "sql_explanation": "This query uses the SUM function to add up the \u0027weight\u0027 column for all rows where the \u0027artist\u0027 column is equal to \u0027Picasso\u0027." +}, { + "id": "5434", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum ticket price for theater events?", + "sql_context": "CREATE TABLE events (id INT, name TEXT, category TEXT, price DECIMAL(5,2)); INSERT INTO events (id, name, category, price) VALUES (1, \u0027Concert\u0027, \u0027music\u0027, 50.00), (2, \u0027Theater Play\u0027, \u0027theater\u0027, 75.00), (3, \u0027Jazz Festival\u0027, \u0027music\u0027, 35.00);", + "sql": "SELECT MAX(price) FROM events WHERE category \u003d \u0027theater\u0027;", + "sql_explanation": "This SQL query calculates the maximum ticket price for theater events. It does this by using the MAX function on the \u0027price\u0027 column, while filtering the rows with the WHERE clause to only consider the events in the \u0027theater\u0027 category." +}, { + "id": "5493", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average ticket price for events in the \"Music\" category?", + "sql_context": "CREATE TABLE events (id INT, name VARCHAR(255), date DATE, category VARCHAR(255), price FLOAT); INSERT INTO events (id, name, date, category, price) VALUES (1, \u0027Concert\u0027, \u00272022-06-01\u0027, \u0027Music\u0027, 50.00), (2, \u0027Play\u0027, \u00272022-07-01\u0027, \u0027Theater\u0027, 30.00), (3, \u0027Festival\u0027, \u00272022-08-01\u0027, \u0027Music\u0027, 75.00);", + "sql": "SELECT AVG(price) FROM events WHERE category \u003d \u0027Music\u0027;", + "sql_explanation": "This query calculates the average ticket price for events in the \"Music\" category by summing all the \"price\" values for rows with \"Music\" in the \"category\" column and then dividing by the count of those rows." +}, { + "id": "5595", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records with \u0027Unknown\u0027 artists from the \u0027ArtCollection\u0027 table.", + "sql_context": "CREATE TABLE ArtCollection (id INT, artist VARCHAR(50), title VARCHAR(100), year INT, type VARCHAR(50), PRIMARY KEY (id));", + "sql": "DELETE FROM ArtCollection WHERE artist \u003d \u0027Unknown\u0027;", + "sql_explanation": "This query deletes all records from the \u0027ArtCollection\u0027 table where the artist is named \u0027Unknown\u0027." +}, { + "id": "5669", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all artists who have created more than 50 pieces of art.", + "sql_context": "CREATE TABLE artists (id INT, name VARCHAR(50), art_pieces INT); INSERT INTO artists (id, name, art_pieces) VALUES (1, \u0027John Doe\u0027, 50), (2, \u0027Jane Smith\u0027, 75), (3, \u0027Alice Johnson\u0027, 30), (4, \u0027Bob Brown\u0027, 100);", + "sql": "SELECT name FROM artists WHERE art_pieces \u003e 50;", + "sql_explanation": "This query retrieves the names of all artists who have created more than 50 pieces of art by selecting the \u0027name\u0027 column for all records where the \u0027art_pieces\u0027 column is greater than 50." +}, { + "id": "5688", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum age and corresponding artist name from the \u0027artists\u0027 table?", + "sql_context": "CREATE TABLE artists (artist_id INT, name VARCHAR(50), age INT, country VARCHAR(50)); INSERT INTO artists (artist_id, name, age, country) VALUES (1, \u0027Pablo Picasso\u0027, 91, \u0027Spain\u0027); INSERT INTO artists (artist_id, name, age, country) VALUES (2, \u0027Francis Bacon\u0027, 82, \u0027Ireland\u0027); INSERT INTO artists (artist_id, name, age, country) VALUES (3, \u0027Piet Mondrian\u0027, 71, \u0027Netherlands\u0027);", + "sql": "SELECT MAX(age) as max_age, name FROM artists;", + "sql_explanation": "The SQL query uses the MAX function to find the maximum age from the \u0027age\u0027 column in the \u0027artists\u0027 table. The query also selects the \u0027name\u0027 column to display the corresponding artist name." +}, { + "id": "5786", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete artworks from 2000 in the \u0027Artworks\u0027 table", + "sql_context": "CREATE TABLE Artworks (Artist VARCHAR(50), Artwork VARCHAR(50), Year INT); INSERT INTO Artworks (Artist, Artwork, Year)", + "sql": "DELETE FROM Artworks WHERE Year \u003d 2000", + "sql_explanation": "This query deletes records from the Artworks table where the Year column is equal to 2000." +}, { + "id": "1448", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add a new sustainable practice to the SustainablePractices table for a specific project.", + "sql_context": "CREATE TABLE SustainablePractices (PracticeID INT, PracticeName VARCHAR(50), Description VARCHAR(255), ProjectID INT, FOREIGN KEY (ProjectID) REFERENCES Projects(ProjectID));", + "sql": "INSERT INTO SustainablePractices (PracticeID, PracticeName, Description, ProjectID) VALUES (3, \u0027Rainwater Harvesting\u0027, \u0027Collection and usage of rainwater\u0027, 3);", + "sql_explanation": "This SQL query inserts a new record into the SustainablePractices table for the practice of \u0027Rainwater Harvesting\u0027 with a description and project ID." +}, { + "id": "1514", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total cost of green building materials used in Los Angeles in 2020?", + "sql_context": "CREATE TABLE Green_Building_Materials (Material_ID INT, Material_Type VARCHAR(50), Cost FLOAT, City VARCHAR(50), Year INT); INSERT INTO Green_Building_Materials (Material_ID, Material_Type, Cost, City, Year) VALUES (1, \u0027Solar Panels\u0027, 2000, \u0027Los Angeles\u0027, 2020), (2, \u0027Energy-efficient Windows\u0027, 1000, \u0027Los Angeles\u0027, 2020);", + "sql": "SELECT SUM(Cost) FROM Green_Building_Materials WHERE City \u003d \u0027Los Angeles\u0027 AND Year \u003d 2020 AND Material_Type IN (\u0027Solar Panels\u0027, \u0027Energy-efficient Windows\u0027);", + "sql_explanation": "This SQL query calculates the total cost of green building materials (Solar Panels and Energy-efficient Windows) used in Los Angeles in 2020. It does so by using a SUM() aggregate function to add up the Cost column, filtered using a WHERE clause for City \u003d \u0027Los Angeles\u0027, Year \u003d 2020, and Material_Type IN (\u0027Solar Panels\u0027, \u0027Energy-efficient Windows\u0027)." +}, { + "id": "1683", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of sustainable projects in California out of all projects in the state?", + "sql_context": "CREATE TABLE Projects (ProjectID INT, State CHAR(2), IsSustainable BOOLEAN); INSERT INTO Projects (ProjectID, State, IsSustainable) VALUES (1, \u0027CA\u0027, true), (2, \u0027NY\u0027, false), (3, \u0027TX\u0027, true), (4, \u0027CA\u0027, false), (5, \u0027CA\u0027, true);", + "sql": "SELECT (COUNT(*) FILTER (WHERE Projects.IsSustainable \u003d true) * 100.0 / COUNT(*)) AS SustainablePercentage FROM Projects WHERE Projects.State \u003d \u0027CA\u0027;", + "sql_explanation": "This query calculates the percentage of sustainable projects in California out of all projects in the state. It does this by first filtering the Projects table for rows where the State column is equal to \u0027CA\u0027. Then, it calculates the number of sustainable projects by filtering for rows where the IsSustainable column is true. Finally, it calculates the percentage by dividing the count of sustainable projects by the total count of projects and multiplying by 100.0. The FILTER clause is used to perform the filtering in the COUNT aggregate functions." +}, { + "id": "1791", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which sustainable building projects in Australia started between 2019 and 2021?", + "sql_context": "CREATE TABLE project_australia (project_id INT, country VARCHAR(50), project_type VARCHAR(50), start_date DATE); INSERT INTO project_australia (project_id, country, project_type, start_date) VALUES (1, \u0027Australia\u0027, \u0027Sustainable\u0027, \u00272019-05-01\u0027);", + "sql": "SELECT * FROM project_australia WHERE country \u003d \u0027Australia\u0027 AND project_type \u003d \u0027Sustainable\u0027 AND start_date BETWEEN \u00272019-01-01\u0027 AND \u00272021-12-31\u0027;", + "sql_explanation": "This query retrieves sustainable building projects in Australia that started between 2019 and 2021 by filtering the \u0027project_australia\u0027 table for the relevant country, project type, and date range." +}, { + "id": "1824", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new row into the construction_labor table: \u0027Solar Panel Installation\u0027, \u0027John Doe\u0027, 40, 35.0", + "sql_context": "CREATE TABLE construction_labor (project_name TEXT, worker_name TEXT, hours_worked INTEGER, hourly_rate FLOAT); INSERT INTO construction_labor (project_name, worker_name, hours_worked, hourly_rate) VALUES (\u0027Solar Panel Installation\u0027, \u0027John Doe\u0027, 40, 35.0);", + "sql": "INSERT INTO construction_labor (project_name, worker_name, hours_worked, hourly_rate) VALUES (\u0027Solar Panel Installation\u0027, \u0027John Doe\u0027, 40, 35.0);", + "sql_explanation": "A new row is inserted into the \"construction_labor\" table with the given values for each column." +}, { + "id": "1934", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of construction laborers in Arizona, New Mexico, and Nevada combined?", + "sql_context": "CREATE TABLE labor_stats (state VARCHAR(20), occupation VARCHAR(20), number_of_employees INT); INSERT INTO labor_stats (state, occupation, number_of_employees) VALUES (\u0027Arizona\u0027, \u0027Construction laborer\u0027, 8000); INSERT INTO labor_stats (state, occupation, number_of_employees) VALUES (\u0027New Mexico\u0027, \u0027Construction laborer\u0027, 6000); INSERT INTO labor_stats (state, occupation, number_of_employees) VALUES (\u0027Nevada\u0027, \u0027Construction laborer\u0027, 7000);", + "sql": "SELECT SUM(number_of_employees) FROM labor_stats WHERE state IN (\u0027Arizona\u0027, \u0027New Mexico\u0027, \u0027Nevada\u0027) AND occupation \u003d \u0027Construction laborer\u0027;", + "sql_explanation": "This SQL query calculates the total number of construction laborers in Arizona, New Mexico, and Nevada by selecting the SUM (total) of the \u0027number_of_employees\u0027 column, where the \u0027state\u0027 column is either \u0027Arizona\u0027, \u0027New Mexico\u0027, or \u0027Nevada\u0027, and the \u0027occupation\u0027 column equals \u0027Construction laborer\u0027." +}, { + "id": "2161", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which construction laborers in California earned more than $50,000 in 2019?", + "sql_context": "CREATE TABLE labor_statistics (id INT PRIMARY KEY, worker_name VARCHAR(255), state VARCHAR(255), annual_salary FLOAT);", + "sql": "SELECT worker_name FROM labor_statistics WHERE state \u003d \u0027California\u0027 AND annual_salary \u003e 50000 AND YEAR(employment_start_date) \u003d 2019;", + "sql_explanation": "This query selects the names of all construction laborers in California who earned more than $50,000 in 2019 by selecting the worker_name field from the labor_statistics table where the state is California, the annual salary is greater than $50,000, and the employment start date is in 2019." +}, { + "id": "2288", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which sustainable building practices were implemented in projects located in \u0027West Coast\u0027 region between 2018 and 2020?", + "sql_context": "CREATE TABLE Sustainable_Practices (project_id INT, practice VARCHAR(255), region VARCHAR(255), completion_date DATE); INSERT INTO Sustainable_Practices (project_id, practice, region, completion_date) VALUES (1, \u0027Solar Panels\u0027, \u0027West Coast\u0027, \u00272019-04-15\u0027); INSERT INTO Sustainable_Practices (project_id, practice, region, completion_date) VALUES (2, \u0027Green Roof\u0027, \u0027West Coast\u0027, \u00272020-06-27\u0027);", + "sql": "SELECT practice FROM Sustainable_Practices WHERE region \u003d \u0027West Coast\u0027 AND completion_date BETWEEN \u00272018-01-01\u0027 AND \u00272020-12-31\u0027;", + "sql_explanation": "The SQL query selects the \u0027practice\u0027 column from the \u0027Sustainable_Practices\u0027 table, filtering the results by the \u0027West Coast\u0027 region and a date range of 2018 to 2020." +}, { + "id": "2388", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum total labor cost for construction projects in New York City between 2018 and 2020?", + "sql_context": "CREATE TABLE construction_labor_costs (cost_id INT, project_name VARCHAR(100), city VARCHAR(50), start_year INT, end_year INT, total_cost DECIMAL(10,2)); INSERT INTO construction_labor_costs (cost_id, project_name, city, start_year, end_year, total_cost) VALUES (1, \u0027CentralParkRevamp\u0027, \u0027New York City\u0027, 2019, 2020, 1500000), (2, \u0027BrooklynBridgeUpgrade\u0027, \u0027New York City\u0027, 2018, 2019, 1200000), (3, \u0027NYCLibraryUpgrade\u0027, \u0027New York City\u0027, 2018, 2020, 1800000);", + "sql": "SELECT MAX(total_cost) FROM construction_labor_costs WHERE city \u003d \u0027New York City\u0027 AND start_year \u003e\u003d 2018 AND end_year \u003c\u003d 2020;", + "sql_explanation": "This query calculates the maximum total labor cost for construction projects in New York City between 2018 and 2020 by filtering the table based on the city and year range and then finding the maximum total cost using the MAX function." +}, { + "id": "2516", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the latest construction innovation implemented in project 3?", + "sql_context": "CREATE TABLE project (id INT PRIMARY KEY); INSERT INTO project (id) VALUES (1), (2), (3); CREATE TABLE innovation (id INT PRIMARY KEY, project_id INT, innovation_type VARCHAR(50), implementation_date DATE, foreign key (project_id) references project(id)); INSERT INTO innovation (id, project_id, innovation_type, implementation_date) VALUES (1, 1, \u00273D Printing\u0027, \u00272020-09-01\u0027), (2, 2, \u0027Modular Construction\u0027, \u00272021-04-01\u0027), (3, 3, \u0027Solar Roof Integration\u0027, \u00272022-01-15\u0027);", + "sql": "SELECT innovation_type, implementation_date FROM innovation WHERE project_id \u003d 3 ORDER BY implementation_date DESC LIMIT 1", + "sql_explanation": "This query selects the innovation_type and implementation_date from the innovation table where the project_id is 3, ordered by implementation_date in descending order and limited to 1 record. This would return the latest construction innovation implemented in project 3, which is Solar Roof Integration implemented on 2022-01-15." +}, { + "id": "2546", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of sustainable building projects in the city of Chicago from 2018 to 2020?", + "sql_context": "CREATE TABLE sustainable_projects (project_id INT, city VARCHAR(20), year INT, project_type VARCHAR(20)); INSERT INTO sustainable_projects (project_id, city, year, project_type) VALUES (1, \u0027Chicago\u0027, 2020, \u0027Sustainable\u0027), (2, \u0027Chicago\u0027, 2019, \u0027Sustainable\u0027), (3, \u0027New York\u0027, 2020, \u0027Sustainable\u0027), (4, \u0027Los Angeles\u0027, 2020, \u0027Sustainable\u0027), (5, \u0027Chicago\u0027, 2018, \u0027Sustainable\u0027);", + "sql": "SELECT SUM(project_type \u003d \u0027Sustainable\u0027) FROM sustainable_projects WHERE city \u003d \u0027Chicago\u0027 AND year BETWEEN 2018 AND 2020;", + "sql_explanation": "This query calculates the total number of sustainable building projects in the city of Chicago from 2018 to 2020 by summing up the boolean expression (project_type \u003d \u0027Sustainable\u0027) for rows where city is Chicago and year is between 2018 and 2020. The boolean expression evaluates to 1 for true and 0 for false, so the sum gives the total number of sustainable projects." +}, { + "id": "2980", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum labor cost per square foot for a sustainable building project in New York?", + "sql_context": "CREATE TABLE building_data (project_id INT, square_footage DECIMAL(10, 2), labor_cost DECIMAL(10, 2), city VARCHAR(255), country VARCHAR(255), is_sustainable BOOLEAN);", + "sql": "SELECT MAX(labor_cost / square_footage) FROM building_data WHERE city \u003d \u0027New York\u0027 AND is_sustainable \u003d TRUE;", + "sql_explanation": "The SQL query calculates the maximum labor cost per square foot for a sustainable building project in New York. It does this by using the MAX function to find the maximum value of the labor_cost / square_footage calculation where the city is New York and the project is sustainable." +}, { + "id": "3018", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of labor hours worked per week in the construction industry in Mexico?", + "sql_context": "CREATE TABLE Labor_Statistics (id INT, worker_id TEXT, company TEXT, job_title TEXT, hours_worked FLOAT, country TEXT);", + "sql": "SELECT MAX(hours_worked) FROM Labor_Statistics WHERE job_title LIKE \u0027%construction%\u0027 AND country \u003d \u0027Mexico\u0027;", + "sql_explanation": "The SQL query finds the maximum number of labor hours worked per week in the construction industry in Mexico by filtering the Labor_Statistics table for job titles containing the word \u0027construction\u0027 and country equal to \u0027Mexico\u0027, then applying the MAX function to the hours_worked column." +}, { + "id": "3065", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum labor cost for sustainable building projects in Colorado?", + "sql_context": "CREATE TABLE labor_costs (id INT, project_state TEXT, project_type TEXT, labor_cost DECIMAL(10,2)); INSERT INTO labor_costs (id, project_state, project_type, labor_cost) VALUES (1, \u0027Colorado\u0027, \u0027Sustainable\u0027, 18000.00), (2, \u0027Colorado\u0027, \u0027Sustainable\u0027, 20000.00), (3, \u0027California\u0027, \u0027Conventional\u0027, 15000.00);", + "sql": "SELECT MIN(labor_cost) FROM labor_costs WHERE project_state \u003d \u0027Colorado\u0027 AND project_type \u003d \u0027Sustainable\u0027;", + "sql_explanation": "This query calculates the minimum labor cost for sustainable building projects in Colorado by filtering the labor_costs table based on the project_state and project_type columns and then computing the minimum of the labor_cost values." +}, { + "id": "3144", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of building permits issued for projects with a labor cost greater than $1,000,000 in the state of Texas in 2019?", + "sql_context": "CREATE TABLE permits (permit_id INT, project_id INT, labor_cost DECIMAL(10, 2), state VARCHAR(255), issue_date DATE);", + "sql": "SELECT COUNT(*) FROM permits WHERE state \u003d \u0027Texas\u0027 AND labor_cost \u003e 1000000 AND YEAR(issue_date) \u003d 2019;", + "sql_explanation": "The SQL query calculates the total number of building permits issued for projects with a labor cost greater than $1,000,000 in the state of Texas in 2019. It does this by using the COUNT function to find the number of rows in the permits table where the state is Texas, the labor cost is greater than 1,000,000, and the issue_date is in 2019." +}, { + "id": "3251", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of building permits issued in Q2 2021 for New York?", + "sql_context": "CREATE TABLE BuildingPermits (id INT, permit_date DATE, state VARCHAR(20)); INSERT INTO BuildingPermits (id, permit_date, state) VALUES (3, \u00272021-04-01\u0027, \u0027New York\u0027), (4, \u00272021-06-15\u0027, \u0027New York\u0027);", + "sql": "SELECT COUNT(*) FROM BuildingPermits WHERE state \u003d \u0027New York\u0027 AND MONTH(permit_date) BETWEEN 4 AND 6;", + "sql_explanation": "The SQL query calculates the total number of building permits issued in Q2 2021 for New York. It filters the BuildingPermits table for permits issued in New York in April, May, or June 2021, then counts the number of rows that meet this criteria." +}, { + "id": "3279", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the earliest start date for residential construction projects in Denver, Colorado?", + "sql_context": "CREATE TABLE project_timelines (project_id INT, project_type VARCHAR(20), city VARCHAR(20), start_date DATE); INSERT INTO project_timelines (project_id, project_type, city, start_date) VALUES (10, \u0027Residential\u0027, \u0027Denver\u0027, \u00272021-02-01\u0027), (11, \u0027Commercial\u0027, \u0027Denver\u0027, \u00272020-06-15\u0027), (12, \u0027Residential\u0027, \u0027Boulder\u0027, \u00272019-12-01\u0027);", + "sql": "SELECT MIN(start_date) FROM project_timelines WHERE project_type \u003d \u0027Residential\u0027 AND city \u003d \u0027Denver\u0027;", + "sql_explanation": "Find the minimum start_date for rows with project_type \u0027Residential\u0027 and city \u0027Denver\u0027." +}, { + "id": "3461", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records from construction_projects table where city is \u0027GreenValley\u0027 and completion_date is before \u00272020-01-01\u0027", + "sql_context": "CREATE TABLE construction_projects (id INT, city VARCHAR(20), completion_date DATE);", + "sql": "DELETE FROM construction_projects WHERE city \u003d \u0027GreenValley\u0027 AND completion_date \u003c \u00272020-01-01\u0027;", + "sql_explanation": "*This SQL query deletes records from the construction_projects table* *that meet specific conditions: city is \u0027GreenValley\u0027 and completion_date is before \u00272020-01-01\u0027*" +}, { + "id": "3641", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average number of construction labor hours worked per day for the month of February 2022", + "sql_context": "CREATE TABLE construction_labor (worker_id INT, hours_worked INT, work_date DATE);", + "sql": "SELECT AVG(hours_worked / 8) FROM construction_labor WHERE EXTRACT(MONTH FROM work_date) \u003d 2", + "sql_explanation": "This SQL query calculates the average number of construction labor hours worked per day for the month of February 2022." +}, { + "id": "3700", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all building permits issued for sustainable projects in the state of Washington.", + "sql_context": "CREATE TABLE building_permits (id INT, project_name VARCHAR(50), project_type VARCHAR(20), state VARCHAR(20)); INSERT INTO building_permits (id, project_name, project_type, state) VALUES (1, \u0027Green Building\u0027, \u0027Sustainable\u0027, \u0027Washington\u0027); INSERT INTO building_permits (id, project_name, project_type, state) VALUES (2, \u0027Solar Farm\u0027, \u0027Sustainable\u0027, \u0027Washington\u0027);", + "sql": "SELECT * FROM building_permits WHERE project_type \u003d \u0027Sustainable\u0027 AND state \u003d \u0027Washington\u0027;", + "sql_explanation": "This query lists all building permits issued for sustainable projects in the state of Washington by selecting all columns from the \u0027building_permits\u0027 table where \u0027project_type\u0027 is \u0027Sustainable\u0027 and \u0027state\u0027 is \u0027Washington\u0027." +}, { + "id": "3702", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of construction workers in \u0027Solar Suburb\u0027?", + "sql_context": "CREATE TABLE Construction_Workers (worker_id INT, name VARCHAR(30), hours_worked FLOAT, location VARCHAR(20)); INSERT INTO Construction_Workers VALUES (1, \u0027John Doe\u0027, 150.25, \u0027Solar Suburb\u0027), (2, \u0027Jane Smith\u0027, 200.50, \u0027Rural County\u0027), (3, \u0027Mike Johnson\u0027, 300.75, \u0027Solar Suburb\u0027), (4, \u0027Sara Doe\u0027, 250.50, \u0027Solar Suburb\u0027);", + "sql": "SELECT COUNT(DISTINCT worker_id) FROM Construction_Workers WHERE location \u003d \u0027Solar Suburb\u0027;", + "sql_explanation": "The SQL query calculates the total number of construction workers in \u0027Solar Suburb\u0027 by counting the distinct worker_id values from the Construction_Workers table where the location is \u0027Solar Suburb\u0027." +}, { + "id": "3760", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a table named \"Projects\" with columns \"project_id\", \"contractor_id\", \"start_date\", and \"end_date\".", + "sql_context": "CREATE TABLE Projects (project_id INT, contractor_id INT, start_date DATE, end_date DATE);", + "sql": "CREATE TABLE Projects (project_id INT, contractor_id INT, start_date DATE, end_date DATE);", + "sql_explanation": "1. A CREATE TABLE statement is used to create a new table named \"Projects\". 2. The table has four columns: \"project_id\" (integer), \"contractor_id\" (integer), \"start_date\" (date), and \"end_date\" (date)." +}, { + "id": "3813", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average labor cost for construction projects in \u0027British Columbia\u0027 in the \u0027construction_labor_stats\u0027 table?", + "sql_context": "CREATE TABLE construction_labor_stats (province TEXT, project_id INT, labor_cost FLOAT); INSERT INTO construction_labor_stats (province, project_id, labor_cost) VALUES (\u0027British Columbia\u0027, 1, 18000), (\u0027British Columbia\u0027, 2, 20000), (\u0027British Columbia\u0027, 3, 22000);", + "sql": "SELECT AVG(labor_cost) FROM construction_labor_stats WHERE province \u003d \u0027British Columbia\u0027;", + "sql_explanation": "The SQL query calculates the average labor cost for construction projects in \u0027British Columbia\u0027 in the \u0027construction_labor_stats\u0027 table using the AVG() function and the WHERE clause." +}, { + "id": "3817", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total construction labor cost for electricians in Florida?", + "sql_context": "CREATE TABLE construction_labor (state VARCHAR(20), job VARCHAR(50), cost FLOAT); INSERT INTO construction_labor VALUES (\u0027Florida\u0027, \u0027Electrician\u0027, 52.0), (\u0027Florida\u0027, \u0027Electrician\u0027, 53.0), (\u0027Florida\u0027, \u0027Carpenter\u0027, 48.0);", + "sql": "SELECT SUM(cost) FROM construction_labor WHERE state \u003d \u0027Florida\u0027 AND job \u003d \u0027Electrician\u0027;", + "sql_explanation": "This query calculates the total construction labor cost for electricians in Florida by summing up the cost column values for the specified job and state." +}, { + "id": "3893", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records from construction_projects table where city is \u0027SolarCity\u0027 and completion_date is NULL", + "sql_context": "CREATE TABLE construction_projects (id INT, city VARCHAR(20), completion_date DATE);", + "sql": "DELETE FROM construction_projects WHERE city \u003d \u0027SolarCity\u0027 AND completion_date IS NULL;", + "sql_explanation": "*This SQL query deletes records from the construction_projects table* *that meet specific conditions: city is \u0027SolarCity\u0027 and completion_date is NULL*" +}, { + "id": "3900", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names and completion dates of all sustainable building projects in the city of Seattle?", + "sql_context": "CREATE TABLE Sustainable_Buildings (project_name TEXT, city TEXT, completion_date DATE); INSERT INTO Sustainable_Buildings (project_name, city, completion_date) VALUES (\u0027Solar Panel Installation\u0027, \u0027Seattle\u0027, \u00272022-06-01\u0027), (\u0027Green Roof Construction\u0027, \u0027New York\u0027, \u00272021-12-15\u0027);", + "sql": "SELECT project_name, completion_date FROM Sustainable_Buildings WHERE city \u003d \u0027Seattle\u0027;", + "sql_explanation": "This query filters the Sustainable_Buildings table to only include rows where the city is Seattle. It then selects the project_name and completion_date columns to display in the result." +}, { + "id": "3905", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average cost of sustainable building materials in the \u0027West\u0027 region?", + "sql_context": "CREATE TABLE Building_Materials (id INT, region VARCHAR(20), material VARCHAR(30), cost FLOAT); INSERT INTO Building_Materials (id, region, material, cost) VALUES (1, \u0027North\u0027, \u0027Concrete\u0027, 150.50), (2, \u0027West\u0027, \u0027Bamboo\u0027, 300.00), (3, \u0027South\u0027, \u0027Steel\u0027, 450.75), (4, \u0027East\u0027, \u0027Wood\u0027, 200.25);", + "sql": "SELECT AVG(cost) FROM Building_Materials WHERE region \u003d \u0027West\u0027 AND material \u003d \u0027Bamboo\u0027;", + "sql_explanation": "This query calculates the average cost of bamboo material in the \u0027West\u0027 region." +}, { + "id": "4007", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all building permit data for Georgia in Q3 of 2021", + "sql_context": "CREATE TABLE Building_Permits_GA (permit_id INT, permit_number VARCHAR(20), state VARCHAR(2), quarter INT, year INT); INSERT INTO Building_Permits_GA VALUES (1, \u0027GA-12345\u0027, \u0027GA\u0027, 3, 2021);", + "sql": "SELECT * FROM Building_Permits_GA WHERE state \u003d \u0027GA\u0027 AND quarter \u003d 3 AND year \u003d 2021;", + "sql_explanation": "This query lists all building permit data for Georgia in Q3 of 2021 by selecting all columns from the \u0027Building_Permits_GA\u0027 table where the \u0027state\u0027 column is equal to \u0027GA\u0027, the \u0027quarter\u0027 column is equal to 3, and the \u0027year\u0027 column is equal to 2021." +}, { + "id": "4139", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of construction laborers in Michigan in 2019?", + "sql_context": "CREATE TABLE ConstructionLaborers (id INT, name TEXT, state TEXT, year INT, hourlyWage FLOAT);", + "sql": "SELECT COUNT(*) FROM ConstructionLaborers WHERE state \u003d \u0027Michigan\u0027 AND year \u003d 2019;", + "sql_explanation": "This query calculates the total number of construction laborers in Michigan in 2019 by filtering the ConstructionLaborers table based on the state and year columns, then counting the number of records that meet the filter criteria." +}, { + "id": "4191", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average construction labor cost for masons in Texas?", + "sql_context": "CREATE TABLE construction_labor (state VARCHAR(20), job VARCHAR(50), cost FLOAT); INSERT INTO construction_labor VALUES (\u0027Texas\u0027, \u0027Mason\u0027, 55.0), (\u0027Texas\u0027, \u0027Mason\u0027, 56.0), (\u0027Texas\u0027, \u0027Electrician\u0027, 50.0);", + "sql": "SELECT AVG(cost) FROM construction_labor WHERE state \u003d \u0027Texas\u0027 AND job \u003d \u0027Mason\u0027;", + "sql_explanation": "This query calculates the average construction labor cost for masons in Texas by summing up the cost column values and dividing by the count of rows for Texas and mason." +}, { + "id": "4473", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum project duration for residential permits?", + "sql_context": "CREATE TABLE ProjectTimeline (permit_id INT, project_type VARCHAR(255), duration INT); INSERT INTO ProjectTimeline (permit_id, project_type, duration) VALUES (1, \u0027residential\u0027, 120), (2, \u0027commercial\u0027, 180), (3, \u0027residential\u0027, 150);", + "sql": "SELECT MAX(duration) FROM ProjectTimeline WHERE project_type \u003d \u0027residential\u0027;", + "sql_explanation": "This query calculates the maximum value of the duration column from the ProjectTimeline table where the project_type is \u0027residential\u0027. It returns the maximum project duration for residential permits." +}, { + "id": "4649", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many construction workers were employed in each state for non-green building projects in 2020?", + "sql_context": "CREATE TABLE employment_non_green_data (state VARCHAR(255), employees INT, year INT); INSERT INTO employment_non_green_data (state, employees, year) VALUES (\u0027California\u0027, 40000, 2020), (\u0027Texas\u0027, 35000, 2020), (\u0027New York\u0027, 45000, 2020);", + "sql": "SELECT state, employees FROM employment_non_green_data WHERE year \u003d 2020;", + "sql_explanation": "The SQL query returns the number of construction workers employed in each state for non-green building projects in 2020 by selecting the employees column and filtering by the year column." +}, { + "id": "4989", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of stories in sustainable buildings in the West?", + "sql_context": "CREATE TABLE West_Sustainable (building_id INT, location VARCHAR(20), stories INT, is_sustainable INT); INSERT INTO West_Sustainable VALUES (5001, \u0027CA\u0027, 5, 1), (5002, \u0027WA\u0027, 7, 1), (5003, \u0027OR\u0027, 3, 0);", + "sql": "SELECT MAX(stories) FROM West_Sustainable WHERE is_sustainable \u003d 1;", + "sql_explanation": "Calculates the maximum number of stories in sustainable buildings in the West by finding the maximum value of the \u0027stories\u0027 column in the West_Sustainable table for records with an \u0027is_sustainable\u0027 value of 1." +}, { + "id": "5055", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average cost of sustainable building materials in the \u0027west\u0027 region?", + "sql_context": "CREATE TABLE sustainable_materials (id INT, material_name TEXT, cost FLOAT, region TEXT); INSERT INTO sustainable_materials (id, material_name, cost, region) VALUES (1, \u0027Bamboo Flooring\u0027, 12.50, \u0027west\u0027), (2, \u0027Recycled Steel\u0027, 35.00, \u0027east\u0027);", + "sql": "SELECT AVG(cost) FROM sustainable_materials WHERE region \u003d \u0027west\u0027;", + "sql_explanation": "The SQL query calculates the average cost of sustainable building materials in the \u0027west\u0027 region by using the AVG function on the \u0027cost\u0027 column, where the \u0027region\u0027 is \u0027west\u0027." +}, { + "id": "5302", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average permit cost per square foot for all permit types in the state of California?", + "sql_context": "CREATE TABLE permit (permit_id INT, permit_type TEXT, state TEXT, cost INT, sqft INT); INSERT INTO permit (permit_id, permit_type, state, cost, sqft) VALUES (1, \u0027Residential\u0027, \u0027California\u0027, 50000, 2000), (2, \u0027Commercial\u0027, \u0027California\u0027, 200000, 5000);", + "sql": "SELECT AVG(cost/sqft) FROM permit WHERE state \u003d \u0027California\u0027;", + "sql_explanation": "Calculates the average permit cost per square foot by dividing the cost by sqft for all permits in California." +}, { + "id": "5328", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum cost of sustainable construction materials in the \u0027materials\u0027 table?", + "sql_context": "CREATE TABLE materials (material_name VARCHAR(30), is_sustainable BOOLEAN, cost FLOAT); INSERT INTO materials (material_name, is_sustainable, cost) VALUES (\u0027Recycled Steel\u0027, TRUE, 120); INSERT INTO materials (material_name, is_sustainable, cost) VALUES (\u0027Reclaimed Wood\u0027, TRUE, 150);", + "sql": "SELECT MIN(cost) FROM materials WHERE is_sustainable \u003d TRUE;", + "sql_explanation": "This SQL query calculates the minimum cost of sustainable construction materials by selecting the MIN (minimum) of the \u0027cost\u0027 column where the \u0027is_sustainable\u0027 column equals TRUE." +}, { + "id": "462", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue for \u0027Fusion Foods\u0027 in Q2 2019?", + "sql_context": "CREATE TABLE Revenue (restaurant_id INT, quarter INT, year INT, revenue INT); INSERT INTO Revenue (restaurant_id, quarter, year, revenue) VALUES (11, 2, 2019, 8000);", + "sql": "SELECT SUM(revenue) FROM Revenue WHERE restaurant_id \u003d 11 AND EXTRACT(QUARTER FROM DATE \u00272019-01-01\u0027 + INTERVAL (quarter - 1) * 3 MONTH) \u003d 2 AND EXTRACT(YEAR FROM DATE \u00272019-01-01\u0027 + INTERVAL (quarter - 1) * 3 MONTH) \u003d 2019;", + "sql_explanation": "The SQL query calculates the total revenue for \u0027Fusion Foods\u0027 in Q2 2019 by summing the revenue values in the Revenue table where the restaurant_id is 11 and the quarter and year of the calculated timestamp column are Q2 2019." +}, { + "id": "1496", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue generated by restaurants with a \u00274-star\u0027 or \u00275-star\u0027 food safety rating in the city of \u0027New York\u0027 for the month of June 2022?", + "sql_context": "CREATE TABLE restaurant_revenue(restaurant_id INT, food_safety_rating INT, revenue DECIMAL(10,2), revenue_date DATE);", + "sql": "SELECT SUM(revenue) FROM restaurant_revenue WHERE food_safety_rating IN (4, 5) AND city \u003d \u0027New York\u0027 AND revenue_date BETWEEN \u00272022-06-01\u0027 AND \u00272022-06-30\u0027;", + "sql_explanation": "The SQL query calculates the total revenue for restaurants with a \u00274-star\u0027 or \u00275-star\u0027 food safety rating in \u0027New York\u0027 for June 2022 by summing the revenue for each record with a food_safety_rating of 4 or 5, city of \u0027New York\u0027, and revenue_date within the range \u00272022-06-01\u0027 and \u00272022-06-30\u0027." +}, { + "id": "1684", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many restaurants have adopted \u0027Fair Trade\u0027 sourcing practices in the \u0027Africa\u0027 region as of \u00272022-06-15\u0027?", + "sql_context": "CREATE TABLE restaurant_sourcing(restaurant_id INT, sourcing_practices VARCHAR(255), region VARCHAR(255), sourcing_date DATE); INSERT INTO restaurant_sourcing(restaurant_id, sourcing_practices, region, sourcing_date) VALUES (1, \u0027Fair Trade\u0027, \u0027Africa\u0027, \u00272022-06-15\u0027), (2, \u0027Local Sourcing\u0027, \u0027Africa\u0027, \u00272022-06-14\u0027), (3, \u0027Fair Trade\u0027, \u0027Europe\u0027, \u00272022-06-13\u0027);", + "sql": "SELECT COUNT(restaurant_id) FROM restaurant_sourcing WHERE sourcing_practices \u003d \u0027Fair Trade\u0027 AND region \u003d \u0027Africa\u0027 AND sourcing_date \u003c\u003d \u00272022-06-15\u0027;", + "sql_explanation": "The SQL query counts the number of restaurants with \u0027Fair Trade\u0027 sourcing practices in the \u0027Africa\u0027 region as of \u00272022-06-15\u0027 by selecting the count of restaurant_id for each record with a sourcing_practices of \u0027Fair Trade\u0027, region of \u0027Africa\u0027, and sourcing_date on or before \u00272022-06-15\u0027." +}, { + "id": "2211", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new sustainable supplier \u0027Fazenda Verde\u0027 from Brazil with sustainable practices", + "sql_context": "CREATE TABLE sustainable_sourcing (supplier_name TEXT, supplier_country TEXT, sustainable_practices BOOLEAN);", + "sql": "INSERT INTO sustainable_sourcing (supplier_name, supplier_country, sustainable_practices) VALUES (\u0027Fazenda Verde\u0027, \u0027Brazil\u0027, TRUE);", + "sql_explanation": "This query inserts a new record into the \u0027sustainable_sourcing\u0027 table for \u0027Fazenda Verde\u0027 from Brazil with sustainable practices." +}, { + "id": "2655", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total revenue for the \u0027Seafood\u0027 category in January 2021?", + "sql_context": "CREATE TABLE restaurant_revenue(restaurant_id INT, category VARCHAR(255), revenue DECIMAL(10,2), date DATE);", + "sql": "SELECT SUM(revenue) FROM restaurant_revenue WHERE category \u003d \u0027Seafood\u0027 AND date BETWEEN \u00272021-01-01\u0027 AND \u00272021-01-31\u0027;", + "sql_explanation": "Summarizes the revenue generated from the \u0027Seafood\u0027 category in January 2021." +}, { + "id": "2737", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue for a specific restaurant in the past month?", + "sql_context": "CREATE TABLE revenue_by_date (date DATE, restaurant VARCHAR(50), revenue INT); INSERT INTO revenue_by_date (date, restaurant, revenue) VALUES (\u00272022-01-01\u0027, \u0027Restaurant A\u0027, 3000), (\u00272022-01-01\u0027, \u0027Restaurant B\u0027, 4000), (\u00272022-01-01\u0027, \u0027Restaurant C\u0027, 5000), (\u00272022-01-02\u0027, \u0027Restaurant A\u0027, 4000), (\u00272022-01-02\u0027, \u0027Restaurant B\u0027, 5000), (\u00272022-01-02\u0027, \u0027Restaurant C\u0027, 6000), (\u00272022-01-03\u0027, \u0027Restaurant A\u0027, 5000), (\u00272022-01-03\u0027, \u0027Restaurant B\u0027, 6000), (\u00272022-01-03\u0027, \u0027Restaurant C\u0027, 7000);", + "sql": "SELECT SUM(revenue) FROM revenue_by_date WHERE restaurant \u003d \u0027Restaurant A\u0027 AND date \u003e\u003d CURDATE() - INTERVAL 30 DAY;", + "sql_explanation": "The SQL query calculates the total revenue for a specific restaurant in the past month by using the SUM() function to add up the revenue for all records where the restaurant is the specified restaurant and the date is within the past 30 days. The query uses the CURDATE() function to get the current date and the INTERVAL keyword to subtract 30 days from the current date, which gives a date range of the past 30 days. This date range is used in the WHERE clause to filter the results to only include records from that date range." +}, { + "id": "2747", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum and minimum price of vegetarian menu items in the \u0027asian\u0027 cuisine category?", + "sql_context": "CREATE TABLE menu_items (id INT, name TEXT, cuisine TEXT, vegetarian BOOLEAN, price INT); INSERT INTO menu_items (id, name, cuisine, vegetarian, price) VALUES (1, \u0027Tofu Curry\u0027, \u0027asian\u0027, 1, 15), (2, \u0027Chicken Fried Rice\u0027, \u0027asian\u0027, 0, 12), (3, \u0027Vegetable Stir Fry\u0027, \u0027asian\u0027, 1, 10), (4, \u0027Beef and Broccoli\u0027, \u0027asian\u0027, 0, 18);", + "sql": "SELECT MAX(price) as max_price, MIN(price) as min_price FROM menu_items WHERE cuisine \u003d \u0027asian\u0027 AND vegetarian \u003d 1;", + "sql_explanation": "The SQL query filters for rows in the \u0027menu_items\u0027 table where the \u0027cuisine\u0027 column is equal to \u0027asian\u0027 and the \u0027vegetarian\u0027 column is equal to 1. It then calculates the maximum and minimum values of the \u0027price\u0027 column for these rows." +}, { + "id": "2955", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the sustainable_sourcing table with the following data: restaurant_id \u003d 103, ingredient \u003d \u0027Quinoa\u0027, sourcing_percentage \u003d 95", + "sql_context": "CREATE TABLE sustainable_sourcing (id INT PRIMARY KEY, restaurant_id INT, ingredient VARCHAR(50), sourcing_percentage DECIMAL(5, 2));", + "sql": "INSERT INTO sustainable_sourcing (restaurant_id, ingredient, sourcing_percentage) VALUES (103, \u0027Quinoa\u0027, 95);", + "sql_explanation": "The INSERT INTO statement is used to add a new record to the sustainable_sourcing table with the specified column values." +}, { + "id": "3186", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the price of the \u0027Tofu Stir Fry\u0027 dish to $12.50 in the \u0027Asian Fusion\u0027 restaurant.", + "sql_context": "CREATE TABLE menu_engineering(dish VARCHAR(255), category VARCHAR(255), price DECIMAL(5,2), restaurant VARCHAR(255));", + "sql": "UPDATE menu_engineering SET price \u003d 12.50 WHERE dish \u003d \u0027Tofu Stir Fry\u0027 AND restaurant \u003d \u0027Asian Fusion\u0027;", + "sql_explanation": "Updates the price of the \u0027Tofu Stir Fry\u0027 dish in the \u0027Asian Fusion\u0027 restaurant to $12.50." +}, { + "id": "3434", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many food safety violations were recorded in the past week?", + "sql_context": "CREATE TABLE Inspections (id INT, date DATE, violation BOOLEAN);", + "sql": "SELECT COUNT(*) FROM Inspections WHERE date \u003e\u003d DATEADD(week, -1, GETDATE()) AND violation \u003d TRUE;", + "sql_explanation": "The SQL query calculates the number of food safety violations recorded in the past week by counting the number of rows in the \u0027Inspections\u0027 table where the \u0027date\u0027 is within the last week and \u0027violation\u0027 is true." +}, { + "id": "3603", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which menu items have the lowest food cost for gluten-free dishes?", + "sql_context": "CREATE TABLE gluten_free_menu_items (menu_item_id INT, dish_type VARCHAR(255), food_cost DECIMAL(5,2)); INSERT INTO gluten_free_menu_items (menu_item_id, dish_type, food_cost) VALUES (1, \u0027Gluten-free\u0027, 3.50), (2, \u0027Vegetarian\u0027, 2.50), (3, \u0027Gluten-free\u0027, 1.50);", + "sql": "SELECT dish_type, MIN(food_cost) FROM gluten_free_menu_items WHERE dish_type \u003d \u0027Gluten-free\u0027;", + "sql_explanation": "This query identifies which menu items have the lowest food cost for gluten-free dishes by filtering the \u0027dish_type\u0027 column with the value \u0027Gluten-free\u0027 and finding the minimum \u0027food_cost\u0027." +}, { + "id": "3625", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average daily revenue for the first week of January 2022?", + "sql_context": "CREATE TABLE menu_sales_4 (sale_date DATE, revenue INT); INSERT INTO menu_sales_4 (sale_date, revenue) VALUES (\u00272022-01-01\u0027, 1200), (\u00272022-01-02\u0027, 1500), (\u00272022-01-03\u0027, 1000), (\u00272022-01-04\u0027, 1800), (\u00272022-01-05\u0027, 1300), (\u00272022-01-06\u0027, 2000), (\u00272022-01-07\u0027, 1500);", + "sql": "SELECT AVG(revenue) FROM menu_sales_4 WHERE sale_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-01-07\u0027;", + "sql_explanation": "This query calculates the average daily revenue for the first week of January 2022. It does this by averaging the revenue for each sale date where the sale date is within the first week of January 2022." +}, { + "id": "3654", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue for the first week of January 2022?", + "sql_context": "CREATE TABLE menu_sales_3 (sale_date DATE, revenue INT); INSERT INTO menu_sales_3 (sale_date, revenue) VALUES (\u00272022-01-01\u0027, 1200), (\u00272022-01-02\u0027, 1500), (\u00272022-01-03\u0027, 1000), (\u00272022-01-04\u0027, 1800), (\u00272022-01-05\u0027, 1300), (\u00272022-01-06\u0027, 2000), (\u00272022-01-07\u0027, 1500);", + "sql": "SELECT SUM(revenue) FROM menu_sales_3 WHERE sale_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-01-07\u0027;", + "sql_explanation": "This query calculates the total revenue for the first week of January 2022. It does this by summing the revenue for each sale date where the sale date is within the first week of January 2022." +}, { + "id": "3707", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average food safety score for restaurants in the \u0027Fast Casual\u0027 category?", + "sql_context": "CREATE TABLE restaurants (id INT, name VARCHAR(255), category VARCHAR(255), food_safety_score INT); INSERT INTO restaurants (id, name, category, food_safety_score) VALUES (1, \u0027Tasty Thai Eats\u0027, \u0027Fast Casual\u0027, 92), (2, \u0027Gourmet Burger Bites\u0027, \u0027Fast Casual\u0027, 88), (3, \u0027Fresh Mex Cantina\u0027, \u0027Fast Casual\u0027, 95);", + "sql": "SELECT AVG(food_safety_score) as avg_score FROM restaurants WHERE category \u003d \u0027Fast Casual\u0027;", + "sql_explanation": "This SQL query filters the restaurants table to only include rows where the category column is \u0027Fast Casual\u0027, then calculates the average food_safety_score for those rows." +}, { + "id": "3732", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records from the FoodInspections table where the InspectionDate is older than 180 days from today.", + "sql_context": "CREATE TABLE FoodInspections (InspectionID int, RestaurantID varchar(50), InspectionDate date);", + "sql": "DELETE FI FROM FoodInspections FI WHERE DATEDIFF(day, FI.InspectionDate, GETDATE()) \u003e 180;", + "sql_explanation": "The SQL query deletes records from the FoodInspections table where the InspectionDate is older than 180 days from today. It does this by calculating the difference between the current date (GETDATE()) and the InspectionDate, and then deletes the records where the difference is greater than 180 days." +}, { + "id": "3915", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 3 menu categories with the highest revenue.", + "sql_context": "CREATE TABLE menu_category_revenue (menu_category VARCHAR(50), revenue INT); INSERT INTO menu_category_revenue (menu_category, revenue) VALUES (\u0027Appetizers\u0027, 5000), (\u0027Entrees\u0027, 10000), (\u0027Desserts\u0027, 7000), (\u0027Beverages\u0027, 8000);", + "sql": "SELECT menu_category, revenue FROM menu_category_revenue ORDER BY revenue DESC LIMIT 3;", + "sql_explanation": "This query orders the menu_category_revenue table by the revenue column in descending order and selects the top 3 rows using the LIMIT clause." +}, { + "id": "4043", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average food safety score for restaurants in the \u0027urban\u0027 region?", + "sql_context": "CREATE TABLE restaurants (id INT, name TEXT, region TEXT, safety_score INT); INSERT INTO restaurants (id, name, region, safety_score) VALUES (1, \u0027Restaurant A\u0027, \u0027urban\u0027, 95), (2, \u0027Restaurant B\u0027, \u0027rural\u0027, 85), (3, \u0027Restaurant C\u0027, \u0027urban\u0027, 90);", + "sql": "SELECT AVG(r.safety_score) as avg_score FROM restaurants r WHERE r.region \u003d \u0027urban\u0027;", + "sql_explanation": "The SQL query filters for rows in the \u0027restaurants\u0027 table where the \u0027region\u0027 column is equal to \u0027urban\u0027. It then calculates the average value of the \u0027safety_score\u0027 column for these rows." +}, { + "id": "4165", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue for gluten-free menu items at location 101?", + "sql_context": "CREATE TABLE menus (menu_id INT, item_name TEXT, category TEXT, price DECIMAL(5,2), location_id INT); INSERT INTO menus (menu_id, item_name, category, price, location_id) VALUES (1, \u0027Quinoa Salad\u0027, \u0027Vegan\u0027, 9.99, 101), (2, \u0027Tofu Stir Fry\u0027, \u0027Vegan\u0027, 12.49, 102), (3, \u0027Chicken Caesar Salad\u0027, \u0027Gluten-free\u0027, 13.99, 101);", + "sql": "SELECT SUM(price) FROM menus WHERE category \u003d \u0027Gluten-free\u0027 AND location_id \u003d 101;", + "sql_explanation": "This query calculates the total revenue for gluten-free menu items at location 101. It does this by summing the \u0027price\u0027 column, where the \u0027category\u0027 is equal to \u0027Gluten-free\u0027 and the \u0027location_id\u0027 is equal to 101." +}, { + "id": "4211", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all menu items and their prices for \u0027Sustainable Seafood Restaurant\u0027.", + "sql_context": "CREATE TABLE Menu (Restaurant VARCHAR(255), Item VARCHAR(255), Price DECIMAL(5,2)); INSERT INTO Menu (Restaurant, Item, Price) VALUES (\u0027Vegan Bistro\u0027, \u0027Tofu Stir Fry\u0027, 12.99), (\u0027Vegan Bistro\u0027, \u0027Quinoa Salad\u0027, 10.99), (\u0027Sustainable Seafood Restaurant\u0027, \u0027Grilled Salmon\u0027, 19.99), (\u0027Sustainable Seafood Restaurant\u0027, \u0027Lobster Bisque\u0027, 7.99);", + "sql": "SELECT Item, Price FROM Menu WHERE Restaurant \u003d \u0027Sustainable Seafood Restaurant\u0027;", + "sql_explanation": "This SQL query lists all menu items and their prices for \u0027Sustainable Seafood Restaurant\u0027 by selecting the Item and Price columns where the Restaurant column equals \u0027Sustainable Seafood Restaurant\u0027." +}, { + "id": "4217", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many sustainable sourcing records are there for \u0027Pizza Palace\u0027?", + "sql_context": "CREATE TABLE sustainable_sourcing (restaurant_name VARCHAR(255), sourcing_record VARCHAR(255)); INSERT INTO sustainable_sourcing (restaurant_name, sourcing_record) VALUES (\u0027Pizza Palace\u0027, \u0027Organic Tomatoes\u0027), (\u0027Pizza Palace\u0027, \u0027Local Cheese\u0027), (\u0027Pizza Palace\u0027, \u0027Fair Trade Pepperoni\u0027);", + "sql": "SELECT COUNT(*) FROM sustainable_sourcing WHERE restaurant_name \u003d \u0027Pizza Palace\u0027;", + "sql_explanation": "This query counts the number of sustainable sourcing records for \u0027Pizza Palace\u0027 by selecting the count of all records where the restaurant_name is \u0027Pizza Palace\u0027." +}, { + "id": "4364", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many sustainable sourcing audits were conducted in \u0027California\u0027 and \u0027Texas\u0027?", + "sql_context": "CREATE TABLE sourcing_audits (restaurant_name TEXT, location TEXT, audit_date DATE); INSERT INTO sourcing_audits (restaurant_name, location, audit_date) VALUES (\u0027Restaurant A\u0027, \u0027California\u0027, \u00272021-06-01\u0027), (\u0027Restaurant B\u0027, \u0027California\u0027, \u00272021-07-15\u0027), (\u0027Restaurant C\u0027, \u0027Texas\u0027, \u00272021-08-05\u0027), (\u0027Restaurant D\u0027, \u0027California\u0027, \u00272021-09-01\u0027);", + "sql": "SELECT COUNT(*) FROM sourcing_audits WHERE location IN (\u0027California\u0027, \u0027Texas\u0027);", + "sql_explanation": "This query counts the number of sourcing audits conducted in \u0027California\u0027 and \u0027Texas\u0027 by finding the number of rows in the \u0027sourcing_audits\u0027 table where the location is either \u0027California\u0027 or \u0027Texas\u0027." +}, { + "id": "4369", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record \u0027Impossible Burger\u0027 with price $12.99 in the menu_items table", + "sql_context": "CREATE TABLE menu_items (item_id INT, item_name TEXT, category TEXT, price DECIMAL(5,2), inventory_count INT);", + "sql": "INSERT INTO menu_items (item_name, price) VALUES (\u0027Impossible Burger\u0027, 12.99);", + "sql_explanation": "* The INSERT INTO statement is used to add a new record into the menu_items table. * The VALUES clause is used to provide the values for the new record, in this case, \u0027Impossible Burger\u0027 and $12.99 for item_name and price columns respectively." +}, { + "id": "5099", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue for restaurants located in \u0027New York\u0027?", + "sql_context": "CREATE TABLE restaurants (name TEXT, revenue FLOAT, location TEXT); INSERT INTO restaurants (name, revenue, location) VALUES (\u0027Pizzeria Spumoni\u0027, 15000.0, \u0027New York\u0027), (\u0027Pizzeria Yum\u0027, 18000.0, \u0027California\u0027), (\u0027Bakery Bon Appetit\u0027, 22000.0, \u0027New York\u0027);", + "sql": "SELECT SUM(revenue) FROM restaurants WHERE location \u003d \u0027New York\u0027;", + "sql_explanation": "This query calculates the total revenue for restaurants located in \u0027New York\u0027 by summing the revenue values in the \u0027restaurants\u0027 table where the location is \u0027New York\u0027." +}, { + "id": "5117", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records from the \u0027sustainable_sourcing\u0027 table where the \u0027supplier_country\u0027 is \u0027USA\u0027", + "sql_context": "CREATE TABLE sustainable_sourcing (supplier_name TEXT, supplier_country TEXT, sustainable_practices BOOLEAN);", + "sql": "DELETE FROM sustainable_sourcing WHERE supplier_country \u003d \u0027USA\u0027;", + "sql_explanation": "This query deletes all records from the \u0027sustainable_sourcing\u0027 table where the \u0027supplier_country\u0027 is \u0027USA\u0027." +}, { + "id": "5159", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the price of the \u0027Burrito\u0027 menu item to $10.99", + "sql_context": "CREATE TABLE menu_items (item_name VARCHAR(255), price DECIMAL(10,2)); INSERT INTO menu_items (item_name, price) VALUES (\u0027Pizza\u0027, 12.99), (\u0027Burrito\u0027, 9.99);", + "sql": "UPDATE menu_items SET price \u003d 10.99 WHERE item_name \u003d \u0027Burrito\u0027;", + "sql_explanation": "This query updates the price of the \u0027Burrito\u0027 menu item to $10.99. It filters the menu_items table to only include the \u0027Burrito\u0027 menu item and then sets the price to $10.99 for that item." +}, { + "id": "5209", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "Which restaurants in the \u0027fine_dining\u0027 schema have a health score above 90?", + "sql_context": "CREATE TABLE fine_dining.restaurants (restaurant_id INT, name TEXT, health_score INT); INSERT INTO fine_dining.restaurants (restaurant_id, name, health_score) VALUES (1, \u0027The Classy Spoon\u0027, 95), (2, \u0027Gourmet Delights\u0027, 88);", + "sql": "SELECT * FROM fine_dining.restaurants WHERE health_score \u003e 90;", + "sql_explanation": "The SQL query selects all columns from the \u0027fine_dining.restaurants\u0027 table where the health score is greater than 90." +}, { + "id": "5394", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum revenue for a restaurant in the \u0027Asian\u0027 category?", + "sql_context": "CREATE TABLE restaurants (id INT, name VARCHAR(255), type VARCHAR(255), revenue FLOAT); INSERT INTO restaurants (id, name, type, revenue) VALUES (1, \u0027Restaurant A\u0027, \u0027Italian\u0027, 5000.00), (2, \u0027Restaurant B\u0027, \u0027Asian\u0027, 8000.00), (3, \u0027Restaurant C\u0027, \u0027Mexican\u0027, 3000.00);", + "sql": "SELECT MAX(revenue) FROM restaurants WHERE type \u003d \u0027Asian\u0027;", + "sql_explanation": "This SQL query calculates the maximum revenue for a restaurant in the \u0027Asian\u0027 category by selecting the maximum revenue of all records where the type is \u0027Asian\u0027." +}, { + "id": "5406", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "Which sustainable sourcing initiatives were implemented in 2021 and their associated costs?", + "sql_context": "CREATE TABLE SustainabilityInitiatives (InitiativeID int, InitiativeName varchar(255), Year int, Cost decimal(5,2));", + "sql": "SELECT * FROM SustainabilityInitiatives WHERE Year \u003d 2021;", + "sql_explanation": "This query retrieves the sustainable sourcing initiatives implemented in 2021 and their associated costs. It selects all columns from the SustainabilityInitiatives table where the Year column is equal to 2021." +}, { + "id": "5560", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many vegan options are available on the menu?", + "sql_context": "CREATE TABLE menu_items_2 (item VARCHAR(255), vegan BOOLEAN); INSERT INTO menu_items_2 (item, vegan) VALUES (\u0027Burger\u0027, false), (\u0027Veggie Burger\u0027, false), (\u0027Salad\u0027, true);", + "sql": "SELECT COUNT(*) FROM menu_items_2 WHERE vegan \u003d true;", + "sql_explanation": "This query counts the number of vegan options on the menu by selecting all items with the vegan attribute set to true and counting the number of rows in the result." +}, { + "id": "5684", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all menu items with their descriptions and allergens.", + "sql_context": "CREATE TABLE Menu (id INT, name VARCHAR(50), description TEXT, allergens TEXT); INSERT INTO Menu (id, name, description, allergens) VALUES (1, \u0027Chicken Alfredo\u0027, \u0027Creamy fettuccine with chicken\u0027, \u0027Milk, Eggs\u0027), (2, \u0027Beef Tacos\u0027, \u0027Soft corn tortillas filled with seasoned beef\u0027, \u0027Wheat\u0027), (3, \u0027Veggie Burger\u0027, \u0027Grilled vegetable patty on a whole wheat bun\u0027, \u0027None\u0027);", + "sql": "SELECT name, description, allergens FROM Menu;", + "sql_explanation": "This query retrieves all records from the Menu table, returning the name, description, and allergens columns for each menu item." +}, { + "id": "5759", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records in the food_safety table that have an inspection score below 70", + "sql_context": "CREATE TABLE food_safety (id INT PRIMARY KEY, restaurant_id INT, inspection_date DATE, score INT);", + "sql": "DELETE FROM food_safety WHERE score \u003c 70;", + "sql_explanation": "The DELETE statement is used to remove records from the food_safety table where the score column has a value less than 70." +}, { + "id": "5795", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the average cost of vegetarian menu items?", + "sql_context": "CREATE TABLE menu_items (item VARCHAR(50), type VARCHAR(15), cost DECIMAL(10,2)); INSERT INTO menu_items (item, type, cost) VALUES (\u0027Pizza Margherita\u0027, \u0027Vegetarian\u0027, 30.00), (\u0027Caesar Salad\u0027, \u0027Vegetarian\u0027, 15.00); CREATE VIEW veg_menu_items AS SELECT item, cost FROM menu_items WHERE type \u003d \u0027Vegetarian\u0027;", + "sql": "SELECT AVG(cost) FROM veg_menu_items;", + "sql_explanation": "This query calculates the average cost of vegetarian menu items by joining the menu_items table with the veg_menu_items view." +}, { + "id": "1210", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the number of days since the last inspection for power plants, specifically those with more than 60 days since the last inspection.", + "sql_context": "CREATE TABLE PollutionSources ( SourceID INT, SourceName NVARCHAR(50), Type NVARCHAR(50), LastInspection DATETIME); INSERT INTO PollutionSources (SourceID, SourceName, Type, LastInspection) VALUES (1, \u0027Oil Rig Alpha\u0027, \u0027Oil Rig\u0027, \u00272021-05-12 14:00:00\u0027); INSERT INTO PollutionSources (SourceID, SourceName, Type, LastInspection) VALUES (2, \u0027Coal Power Plant Beta\u0027, \u0027Power Plant\u0027, \u00272021-03-04 08:30:00\u0027);", + "sql": "SELECT SourceID, SourceName, DATEDIFF(day, LastInspection, GETDATE()) as DaysSinceInspection FROM PollutionSources WHERE Type \u003d \u0027Power Plant\u0027 AND DaysSinceInspection \u003e 60", + "sql_explanation": "Calculate the number of days since the last inspection for power plants, specifically those with more than 60 days since the last inspection." +}, { + "id": "1220", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top three countries with the highest marine litter generation in the Mediterranean Sea?", + "sql_context": "CREATE TABLE MarineLitter (country VARCHAR(50), litter_kg_yr INT, region VARCHAR(50), PRIMARY KEY(country)); INSERT INTO MarineLitter (country, litter_kg_yr, region) VALUES (\u0027CountryA\u0027, 1256, \u0027Mediterranean Sea\u0027), (\u0027CountryB\u0027, 1567, \u0027Mediterranean Sea\u0027), (\u0027CountryC\u0027, 1890, \u0027Mediterranean Sea\u0027), (\u0027CountryD\u0027, 987, \u0027Mediterranean Sea\u0027);", + "sql": "SELECT MarineLitter.country, MarineLitter.litter_kg_yr FROM MarineLitter WHERE MarineLitter.region \u003d \u0027Mediterranean Sea\u0027 ORDER BY MarineLitter.litter_kg_yr DESC LIMIT 3;", + "sql_explanation": "This query lists the top three countries with the highest marine litter generation in the Mediterranean Sea by filtering for \u0027Mediterranean Sea\u0027 in the region column, ordering the results in descending order based on the litter_kg_yr column, and limiting the results to 3 rows." +}, { + "id": "1669", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new records for a new ocean pollution control initiative into the \u0027PollutionControl\u0027 table", + "sql_context": "CREATE TABLE PollutionControl (id INT, name VARCHAR(50), location VARCHAR(50), start_date DATE, end_date DATE); INSERT INTO PollutionControl (id, name, location, start_date, end_date) VALUES (1, \u0027Clean Caribbean Sea\u0027, \u0027Caribbean Sea\u0027, \u00272021-01-01\u0027, \u00272022-12-31\u0027);", + "sql": "INSERT INTO PollutionControl (id, name, location, start_date, end_date) VALUES (2, \u0027Pure Pacific Ocean\u0027, \u0027Pacific Ocean\u0027, \u00272022-04-01\u0027, \u00272023-03-31\u0027);", + "sql_explanation": "This query inserts new records for a new ocean pollution control initiative into the \u0027PollutionControl\u0027 table." +}, { + "id": "2130", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new marine debris records into the marine_debris table.", + "sql_context": "CREATE TABLE marine_debris (id INT, debris_type VARCHAR(255), debris_date DATE); INSERT INTO marine_debris (id, debris_type, debris_date) VALUES (1, \u0027Fishing Net\u0027, \u00272022-01-01\u0027), (2, \u0027Plastic Bottle\u0027, \u00272022-02-01\u0027);", + "sql": "INSERT INTO marine_debris (id, debris_type, debris_date) VALUES (3, \u0027Microplastic\u0027, \u00272022-03-01\u0027), (4, \u0027Abandoned Net\u0027, \u00272022-04-01\u0027);", + "sql_explanation": "The SQL query uses the INSERT INTO statement to add new marine debris records into the marine_debris table." +}, { + "id": "2435", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum depth recorded by Indonesian organizations in the Indian Ocean in the past 3 years?", + "sql_context": "CREATE TABLE indian_ocean_mapping (id INT, organization VARCHAR(50), depth INT, date DATE, country VARCHAR(50)); INSERT INTO indian_ocean_mapping (id, organization, depth, date, country) VALUES (1, \u0027Indonesian National Institute of Oceanography\u0027, 7000, \u00272022-01-10\u0027, \u0027Indonesia\u0027); INSERT INTO indian_ocean_mapping (id, organization, depth, date, country) VALUES (2, \u0027Universitas Hasanuddin\u0027, 6500, \u00272021-12-25\u0027, \u0027Indonesia\u0027);", + "sql": "SELECT MAX(depth) FROM indian_ocean_mapping WHERE country \u003d \u0027Indonesia\u0027 AND date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 3 YEAR);", + "sql_explanation": "The SQL query calculates the maximum depth recorded by Indonesian organizations in the Indian Ocean in the past 3 years. It uses the WHERE clause to filter the data based on the country and date range, then calculates the maximum depth using the MAX() function." +}, { + "id": "2538", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many whale sightings were recorded in the Atlantic ocean in the last year?", + "sql_context": "CREATE TABLE whale_sightings (sighting_date DATE, location VARCHAR(255)); INSERT INTO whale_sightings (sighting_date, location) VALUES (\u00272021-06-15\u0027, \u0027Atlantic Ocean\u0027), (\u00272022-03-02\u0027, \u0027Atlantic Ocean\u0027);", + "sql": "SELECT COUNT(*) FROM whale_sightings WHERE location \u003d \u0027Atlantic Ocean\u0027 AND sighting_date \u003e\u003d DATEADD(year, -1, GETDATE());", + "sql_explanation": "This query counts the number of whale sightings recorded in the Atlantic ocean in the last year by selecting the count of rows in the whale_sightings table where the location is \u0027Atlantic Ocean\u0027 and the sighting_date is within the last year." +}, { + "id": "3046", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the name of the pollution control initiative in the Arctic Ocean", + "sql_context": "CREATE TABLE pollution_control_initiatives (id INT PRIMARY KEY, initiative_name VARCHAR(255), region VARCHAR(255)); INSERT INTO pollution_control_initiatives (id, initiative_name, region) VALUES (1, \u0027Clean Arctic\u0027, \u0027Arctic Ocean\u0027);", + "sql": "UPDATE pollution_control_initiatives SET initiative_name \u003d \u0027Arctic Clean-Up\u0027 WHERE region \u003d \u0027Arctic Ocean\u0027;", + "sql_explanation": "This query updates the initiative_name of the record in the pollution_control_initiatives table where the region is \u0027Arctic Ocean\u0027 to \u0027Arctic Clean-Up\u0027." +}, { + "id": "3129", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which maritime law violations occurred in the last 6 months, ordered by date?", + "sql_context": "CREATE TABLE violations (id INT, law_code TEXT, description TEXT, violation_date DATE); INSERT INTO violations (id, law_code, description, violation_date) VALUES (1, \u0027LAW123\u0027, \u0027Speeding\u0027, \u00272021-07-01\u0027), (2, \u0027LAW456\u0027, \u0027Illegal fishing\u0027, \u00272021-08-15\u0027);", + "sql": "SELECT * FROM violations WHERE violation_date \u003e\u003d DATE(NOW()) - INTERVAL 6 MONTH ORDER BY violation_date;", + "sql_explanation": "Retrieve maritime law violations from the last 6 months, ordered by date." +}, { + "id": "3262", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which marine mammals were sighted in the Atlantic ocean in 2019?", + "sql_context": "CREATE TABLE Atlantic_sightings (animal_name TEXT, sighting_date DATE); INSERT INTO Atlantic_sightings (animal_name, sighting_date) VALUES (\u0027Dolphin\u0027, \u00272019-01-01\u0027), (\u0027Blue Whale\u0027, \u00272019-06-12\u0027), (\u0027Seal\u0027, \u00272019-12-25\u0027);", + "sql": "SELECT animal_name FROM Atlantic_sightings WHERE sighting_date BETWEEN \u00272019-01-01\u0027 AND \u00272019-12-31\u0027;", + "sql_explanation": "Retrieves the names of all marine mammals sighted in the Atlantic ocean in 2019." +}, { + "id": "3335", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the annual CO2 emissions from shipping activities in the Atlantic Ocean?", + "sql_context": "CREATE TABLE CO2_Emissions (year INT, emissions_mt INT, region VARCHAR(50), PRIMARY KEY(year)); INSERT INTO CO2_Emissions (year, emissions_mt, region) VALUES (2015, 125678, \u0027Atlantic Ocean\u0027), (2016, 136789, \u0027Atlantic Ocean\u0027);", + "sql": "SELECT CO2_Emissions.emissions_mt FROM CO2_Emissions WHERE CO2_Emissions.region \u003d \u0027Atlantic Ocean\u0027;", + "sql_explanation": "This query calculates the annual CO2 emissions from shipping activities in the Atlantic Ocean by selecting the emissions_mt column values where region is \u0027Atlantic Ocean\u0027." +}, { + "id": "3430", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which conservation initiatives are specific to the Arctic region and do not involve the Dolphin species?", + "sql_context": "CREATE TABLE conservation_initiatives (id INT, initiative VARCHAR(255), species VARCHAR(255), region VARCHAR(255)); INSERT INTO conservation_initiatives (id, initiative, species, region) VALUES (1, \u0027Ice Cap Mapping\u0027, \u0027Polar Bear\u0027, \u0027Arctic\u0027), (2, \u0027Beach Cleanup\u0027, \u0027Dolphin\u0027, \u0027Pacific\u0027), (3, \u0027Coral Restoration\u0027, \u0027Clownfish\u0027, \u0027Indian\u0027), (4, \u0027Fish Population Study\u0027, \u0027Cod\u0027, \u0027Atlantic\u0027), (5, \u0027Ocean Floor Mapping\u0027, \u0027Seal\u0027, \u0027Arctic\u0027);", + "sql": "SELECT initiative FROM conservation_initiatives WHERE region \u003d \u0027Arctic\u0027 AND species !\u003d \u0027Dolphin\u0027;", + "sql_explanation": "This SQL query selects initiatives from the conservation_initiatives table where the region is Arctic and the species is not Dolphin. It returns the conservation initiatives that are specific to the Arctic region and do not involve the Dolphin species." +}, { + "id": "3499", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total length of underwater cables in the Mediterranean and Arctic Oceans.", + "sql_context": "CREATE TABLE underwater_cables (ocean_name TEXT, cable_length INT); INSERT INTO underwater_cables (ocean_name, cable_length) VALUES (\u0027Mediterranean\u0027, 3000), (\u0027Arctic\u0027, 5000);", + "sql": "SELECT SUM(cable_length) FROM underwater_cables WHERE ocean_name IN (\u0027Mediterranean\u0027, \u0027Arctic\u0027);", + "sql_explanation": "This query calculates the total length of underwater cables in the Mediterranean and Arctic Oceans by summing up the cable_length column values where ocean_name is either \u0027Mediterranean\u0027 or \u0027Arctic\u0027." +}, { + "id": "3563", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average temperature of the ocean floor in the Pacific Ocean?", + "sql_context": "CREATE TABLE Pacific_Ocean (ocean_floor_location text, point_longitude numeric, point_latitude numeric, point_temperature numeric);", + "sql": "SELECT AVG(point_temperature) FROM Pacific_Ocean WHERE ocean_floor_location \u003d \u0027Pacific Ocean\u0027;", + "sql_explanation": "Calculate the average temperature of all points in the Pacific Ocean by averaging the point_temperature column values where the ocean_floor_location matches \u0027Pacific Ocean\u0027." +}, { + "id": "3568", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum legal limit of nitrogen oxide emissions for ships in the Baltic Sea?", + "sql_context": "CREATE TABLE Baltic_Maritime_Law (law_name TEXT, nox_limit INT); INSERT INTO Baltic_Maritime_Law (law_name, nox_limit) VALUES (\u0027Baltic Sea Emission Control Area\u0027, 8.3);", + "sql": "SELECT nox_limit FROM Baltic_Maritime_Law WHERE law_name \u003d \u0027Baltic Sea Emission Control Area\u0027;", + "sql_explanation": "This query returns the minimum legal limit of nitrogen oxide emissions for ships in the Baltic Sea." +}, { + "id": "3582", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all pollution control initiatives in the North Atlantic and their corresponding funding amounts.", + "sql_context": "CREATE TABLE pollution_control_initiatives (id INT, initiative TEXT, region TEXT, funding FLOAT); INSERT INTO pollution_control_initiatives (id, initiative, region, funding) VALUES (1, \u0027Initiative X\u0027, \u0027North Atlantic\u0027, 500000), (2, \u0027Initiative Y\u0027, \u0027Arctic\u0027, 700000), (3, \u0027Initiative Z\u0027, \u0027North Atlantic\u0027, 600000);", + "sql": "SELECT initiative, funding FROM pollution_control_initiatives WHERE region \u003d \u0027North Atlantic\u0027;", + "sql_explanation": "This query lists all pollution control initiatives in the North Atlantic and their corresponding funding amounts by selecting the initiative and funding columns, filtered by the region column with the value \u0027North Atlantic\u0027." +}, { + "id": "3587", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of marine species in the \u0027marine_species\u0027 table with a conservation status of \u0027Vulnerable\u0027 or \u0027Threatened\u0027", + "sql_context": "CREATE TABLE marine_species (id INT PRIMARY KEY, species_name VARCHAR(255), conservation_status VARCHAR(255)); INSERT INTO marine_species (id, species_name, conservation_status) VALUES (1001, \u0027Oceanic Whitetip Shark\u0027, \u0027Vulnerable\u0027), (1002, \u0027Green Sea Turtle\u0027, \u0027Threatened\u0027), (1003, \u0027Leatherback Sea Turtle\u0027, \u0027Vulnerable\u0027), (1004, \u0027Hawksbill Sea Turtle\u0027, \u0027Endangered\u0027);", + "sql": "SELECT COUNT(*) FROM marine_species WHERE conservation_status IN (\u0027Vulnerable\u0027, \u0027Threatened\u0027);", + "sql_explanation": "This SQL query finds the number of marine species with a conservation status of \u0027Vulnerable\u0027 or \u0027Threatened\u0027 by selecting the count of all records in the \u0027marine_species\u0027 table where the conservation_status is either \u0027Vulnerable\u0027 or \u0027Threatened\u0027." +}, { + "id": "3691", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many marine species are endangered in the Atlantic Ocean?", + "sql_context": "CREATE TABLE endangered_species(id INT, species VARCHAR(255), ocean VARCHAR(255), status VARCHAR(255));INSERT INTO endangered_species(id, species, ocean, status) VALUES (1, \u0027Dolphin\u0027, \u0027Atlantic\u0027, \u0027Endangered\u0027), (2, \u0027Shark\u0027, \u0027Atlantic\u0027, \u0027Vulnerable\u0027), (3, \u0027Tuna\u0027, \u0027Atlantic\u0027, \u0027Endangered\u0027);", + "sql": "SELECT COUNT(*) FROM endangered_species WHERE ocean \u003d \u0027Atlantic\u0027 AND status \u003d \u0027Endangered\u0027;", + "sql_explanation": "This query counts the number of marine species that are endangered in the Atlantic Ocean by using the COUNT() function and filtering the ocean and status columns with the equality operator." +}, { + "id": "3738", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all countries involved in marine life research in the \u0027Pacific\u0027 and \u0027Atlantic\u0027 regions.", + "sql_context": "CREATE TABLE marine_life_research (id INT, country TEXT, region TEXT);INSERT INTO marine_life_research (id, country, region) VALUES (1, \u0027India\u0027, \u0027Indian\u0027), (2, \u0027Indonesia\u0027, \u0027Indian\u0027), (3, \u0027Australia\u0027, \u0027Pacific\u0027), (4, \u0027Canada\u0027, \u0027Atlantic\u0027), (5, \u0027USA\u0027, \u0027Atlantic\u0027);", + "sql": "SELECT DISTINCT country FROM marine_life_research WHERE region IN (\u0027Pacific\u0027, \u0027Atlantic\u0027);", + "sql_explanation": "This SQL query selects distinct \u0027country\u0027 values from the \u0027marine_life_research\u0027 table where the \u0027region\u0027 is either \u0027Pacific\u0027 or \u0027Atlantic\u0027." +}, { + "id": "4050", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum depth of all marine protected areas in the Atlantic Ocean?", + "sql_context": "CREATE TABLE marine_protected_areas (area_name TEXT, location TEXT, avg_depth FLOAT); INSERT INTO marine_protected_areas (area_name, location, avg_depth) VALUES (\u0027Bermuda Atlantic National Park\u0027, \u0027Atlantic Ocean\u0027, 10.0), (\u0027Galapagos Marine Reserve\u0027, \u0027Atlantic Ocean\u0027, 20.0);", + "sql": "SELECT MIN(avg_depth) FROM marine_protected_areas WHERE location \u003d \u0027Atlantic Ocean\u0027;", + "sql_explanation": "This SQL query calculates the minimum depth of all marine protected areas in the Atlantic Ocean by taking the minimum of the avg_depth column values where the location is \u0027Atlantic Ocean\u0027." +}, { + "id": "4159", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which countries in the Caribbean region have the longest coastlines?", + "sql_context": "CREATE TABLE Caribbean_Coastlines (country TEXT, coastline_km FLOAT); INSERT INTO Caribbean_Coastlines (country, coastline_km) VALUES (\u0027Cuba\u0027, 5745), (\u0027Jamaica\u0027, 1022);", + "sql": "SELECT country, coastline_km FROM Caribbean_Coastlines ORDER BY coastline_km DESC;", + "sql_explanation": "This query lists the countries in the Caribbean region with the longest coastlines." +}, { + "id": "4306", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum pollution level recorded in the Indian Ocean?", + "sql_context": "CREATE TABLE pollution_data (location VARCHAR(255), pollution_level FLOAT); INSERT INTO pollution_data (location, pollution_level) VALUES (\u0027Indian Ocean\u0027, 12.5), (\u0027Atlantic Ocean\u0027, 15.6);", + "sql": "SELECT MAX(pollution_level) FROM pollution_data WHERE location \u003d \u0027Indian Ocean\u0027;", + "sql_explanation": "The SQL query selects the maximum pollution level for the location \u0027Indian Ocean\u0027 by using the MAX aggregate function." +}, { + "id": "4351", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many fish species are there in the marine_life_populations table?", + "sql_context": "CREATE TABLE marine_life_populations (species TEXT, population INTEGER); INSERT INTO marine_life_populations (species, population) VALUES (\u0027Whale Shark\u0027, 30000), (\u0027Dolphin\u0027, 250000), (\u0027Clownfish\u0027, 500000), (\u0027Blue Whale\u0027, 12000), (\u0027Tuna\u0027, 80000), (\u0027Salmon\u0027, 40000);", + "sql": "SELECT COUNT(species) FROM marine_life_populations WHERE species LIKE \u0027%fish%\u0027;", + "sql_explanation": "This query calculates the number of fish species in the marine_life_populations table." +}, { + "id": "4415", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the names and populations of marine species with a population greater than 1000 in the \u0027Research\u0027 schema\u0027s \u0027Species\u0027 table", + "sql_context": "CREATE TABLE Research.Species ( id INT, species_name VARCHAR(255), population INT );", + "sql": "SELECT species_name, population FROM Research.Species WHERE population \u003e 1000;", + "sql_explanation": "This query filters the \u0027Species\u0027 table in the \u0027Research\u0027 schema to show only the rows with populations greater than 1000." +}, { + "id": "4531", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of pollution control initiatives in the \u0027Pollution\u0027 schema from 2018 to 2021?", + "sql_context": "CREATE SCHEMA Pollution; CREATE TABLE Initiatives (initiative_id INT, year INT); INSERT INTO Initiatives (initiative_id, year) VALUES (1, 2018), (2, 2019), (3, 2020), (4, 2021), (5, 2022);", + "sql": "SELECT COUNT(*) FROM Pollution.Initiatives WHERE year BETWEEN 2018 AND 2021;", + "sql_explanation": "This SQL query calculates the total number of pollution control initiatives in the \u0027Pollution\u0027 schema from 2018 to 2021 by selecting the count of all rows in the \u0027Initiatives\u0027 table where the \u0027year\u0027 column is between 2018 and 2021." +}, { + "id": "4551", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of vessels that violated maritime law in the Arctic in 2020?", + "sql_context": "CREATE TABLE arctic_vessels (vessel_id TEXT, year INTEGER, violation BOOLEAN); INSERT INTO arctic_vessels (vessel_id, year, violation) VALUES (\u0027VES001\u0027, 2020, TRUE), (\u0027VES002\u0027, 2019, FALSE);", + "sql": "SELECT COUNT(*) FROM arctic_vessels WHERE year \u003d 2020 AND violation \u003d TRUE;", + "sql_explanation": "This query retrieves the total number of vessels that violated maritime law in the Arctic in 2020." +}, { + "id": "4712", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the number of pollution incidents in the Southern Ocean.", + "sql_context": "CREATE TABLE pollution_incidents (id INT, incident_type VARCHAR(50), location_latitude FLOAT, location_longitude FLOAT, ocean VARCHAR(50)); INSERT INTO pollution_incidents (id, incident_type, location_latitude, location_longitude, ocean) VALUES (1, \u0027Oil Spill\u0027, -60.6667, 148.9667, \u0027Southern Ocean\u0027), (2, \u0027Garbage Patch\u0027, -46.6333, 81.1833, \u0027Southern Ocean\u0027);", + "sql": "SELECT COUNT(*) FROM pollution_incidents WHERE ocean \u003d \u0027Southern Ocean\u0027;", + "sql_explanation": "* Counts all rows in the pollution_incidents table where the ocean column is \u0027Southern Ocean\u0027." +}, { + "id": "4727", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many vessels are compliant with maritime law in the \u0027Maritime\u0027 schema?", + "sql_context": "CREATE SCHEMA Maritime; CREATE TABLE Law (vessel_id INT, compliance_status VARCHAR(10)); INSERT INTO Law (vessel_id, compliance_status) VALUES (1, \u0027Compliant\u0027), (2, \u0027Non-Compliant\u0027), (3, \u0027Compliant\u0027), (4, \u0027Compliant\u0027), (5, \u0027Compliant\u0027);", + "sql": "SELECT COUNT(*) FROM Maritime.Law WHERE compliance_status \u003d \u0027Compliant\u0027;", + "sql_explanation": "This SQL query calculates the total number of vessels that are compliant with maritime law in the \u0027Maritime\u0027 schema by selecting the count of all rows in the \u0027Law\u0027 table where the \u0027compliance_status\u0027 column is \u0027Compliant\u0027." +}, { + "id": "4817", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total area of marine protected areas?", + "sql_context": "CREATE TABLE marine_protected_areas (area_name TEXT, coordinates POINT, depth INTEGER);", + "sql": "SELECT SUM(ST_Area(geography::GEOGRAPHY)) FROM marine_protected_areas;", + "sql_explanation": "This query calculates the total area of marine protected areas by using the ST_Area function to determine the area of each polygon in the marine_protected_areas table, and then using the SUM function to add up those areas." +}, { + "id": "4943", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum depth of ocean floor mapping projects in the Atlantic?", + "sql_context": "CREATE TABLE ocean_floor_mapping (project_name TEXT, depth REAL, ocean TEXT); INSERT INTO ocean_floor_mapping (project_name, depth, ocean) VALUES (\u0027Project_A\u0027, 5500.0, \u0027Atlantic\u0027), (\u0027Project_B\u0027, 6000.0, \u0027Atlantic\u0027), (\u0027Project_C\u0027, 4500.0, \u0027Indian\u0027);", + "sql": "SELECT MAX(depth) FROM ocean_floor_mapping WHERE ocean \u003d \u0027Atlantic\u0027;", + "sql_explanation": "This SQL query calculates the maximum depth (MAX(depth)) of ocean floor mapping projects (ocean_floor_mapping table) in the Atlantic (WHERE ocean \u003d \u0027Atlantic\u0027)." +}, { + "id": "4964", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average salinity of the Black Sea?", + "sql_context": "CREATE TABLE sea_salinity (id INT, sea TEXT, avg_salinity FLOAT); INSERT INTO sea_salinity (id, sea, avg_salinity) VALUES (1, \u0027Black Sea\u0027, 17.0), (2, \u0027Mediterranean Sea\u0027, 38.0);", + "sql": "SELECT AVG(avg_salinity) FROM sea_salinity WHERE sea \u003d \u0027Black Sea\u0027;", + "sql_explanation": "This SQL query calculates the average salinity (AVG(avg_salinity)) of the Black Sea (WHERE sea \u003d \u0027Black Sea\u0027)." +}, { + "id": "4986", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Remove the marine research project in the Indian Ocean", + "sql_context": "CREATE TABLE marine_research_projects (id INT PRIMARY KEY, project_name VARCHAR(255), region VARCHAR(255)); INSERT INTO marine_research_projects (id, project_name, region) VALUES (1, \u0027Indian Ocean Exploration\u0027, \u0027Indian Ocean\u0027);", + "sql": "DELETE FROM marine_research_projects WHERE region \u003d \u0027Indian Ocean\u0027;", + "sql_explanation": "This query deletes the record from the marine_research_projects table where the region is \u0027Indian Ocean\u0027." +}, { + "id": "4992", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records of marine species from the Arctic ocean that are deeper than 1000 meters.", + "sql_context": "CREATE TABLE marine_species (id INT, species_name VARCHAR(255), ocean VARCHAR(255), depth INT); INSERT INTO marine_species (id, species_name, ocean, depth) VALUES (1, \u0027Narwhal\u0027, \u0027Arctic\u0027, 1500); INSERT INTO marine_species (id, species_name, ocean, depth) VALUES (2, \u0027Beluga Whale\u0027, \u0027Arctic\u0027, 500);", + "sql": "DELETE FROM marine_species WHERE ocean \u003d \u0027Arctic\u0027 AND depth \u003e 1000;", + "sql_explanation": "This SQL query deletes all records of marine species from the Arctic ocean that are deeper than 1000 meters by using the DELETE statement and filtering the rows using the WHERE clause to only include the records where the \u0027ocean\u0027 column is equal to \u0027Arctic\u0027 and the \u0027depth\u0027 column is greater than 1000." +}, { + "id": "5007", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the maximum and minimum depths of oceanic trenches in the Caribbean plate.", + "sql_context": "CREATE TABLE caribbean_plate (trench_name TEXT, average_depth FLOAT); INSERT INTO caribbean_plate (trench_name, average_depth) VALUES (\u0027Cayman Trough\u0027, 7676.0), (\u0027Colombia Basin\u0027, 5000.0);", + "sql": "SELECT MIN(average_depth), MAX(average_depth) FROM caribbean_plate;", + "sql_explanation": "This query returns both the minimum and maximum values in the \u0027average_depth\u0027 column from the \u0027caribbean_plate\u0027 table." +}, { + "id": "5048", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many marine mammal species are endangered in the Arctic Ocean?", + "sql_context": "CREATE TABLE Arctic_Ocean_Mammals (mammal_name TEXT, endangered BOOLEAN); INSERT INTO Arctic_Ocean_Mammals (mammal_name, endangered) VALUES (\u0027Beluga Whale\u0027, TRUE), (\u0027Narwhal\u0027, FALSE), (\u0027Polar Bear\u0027, TRUE);", + "sql": "SELECT COUNT(*) FROM Arctic_Ocean_Mammals WHERE endangered \u003d TRUE;", + "sql_explanation": "This query counts the number of endangered marine mammal species in the Arctic Ocean." +}, { + "id": "5094", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum biomass of any whale species in the Arctic Ocean?", + "sql_context": "CREATE TABLE whale_biomass (species TEXT, location TEXT, biomass INTEGER); INSERT INTO whale_biomass (species, location, biomass) VALUES (\u0027Blue Whale\u0027, \u0027Arctic\u0027, 200000), (\u0027Humpback Whale\u0027, \u0027Arctic\u0027, 70000), (\u0027Sperm Whale\u0027, \u0027Arctic\u0027, 300000), (\u0027Beluga Whale\u0027, \u0027Arctic\u0027, 60000);", + "sql": "SELECT MIN(biomass) FROM whale_biomass WHERE location \u003d \u0027Arctic\u0027;", + "sql_explanation": "This SQL query finds the minimum biomass of any whale species in the Arctic Ocean by using the MIN function on the biomass column, filtering for rows where the location is \u0027Arctic\u0027." +}, { + "id": "5097", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum depth of oceanic trenches in the Caribbean plate?", + "sql_context": "CREATE TABLE Trench (trench_name VARCHAR(50), plate_name VARCHAR(50), max_depth NUMERIC(8,2)); INSERT INTO Trench (trench_name, plate_name, max_depth) VALUES (\u0027Cayman Trough\u0027, \u0027Caribbean\u0027, 25000);", + "sql": "SELECT MAX(max_depth) FROM Trench WHERE plate_name \u003d \u0027Caribbean\u0027;", + "sql_explanation": "This query calculates the maximum depth of trenches in the Caribbean plate by selecting the max_depth column from the Trench table where the plate_name is \u0027Caribbean\u0027." +}, { + "id": "5218", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records of marine life research data from the Mediterranean sea.", + "sql_context": "CREATE TABLE marine_life_data (id INT, species TEXT, population INT, location TEXT); INSERT INTO marine_life_data (id, species, population, location) VALUES (1, \u0027Dolphin\u0027, 50, \u0027Mediterranean\u0027); INSERT INTO marine_life_data (id, species, population, location) VALUES (2, \u0027Turtle\u0027, 25, \u0027Mediterranean\u0027);", + "sql": "DELETE FROM marine_life_data WHERE location \u003d \u0027Mediterranean\u0027;", + "sql_explanation": "This SQL query deletes all records of marine life research data from the Mediterranean sea by removing all rows from the marine_life_data table where the location is \u0027Mediterranean\u0027." +}, { + "id": "5248", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total volume of pollution in cubic meters in the Pacific Ocean?", + "sql_context": "CREATE TABLE pollution (location TEXT, volume REAL); INSERT INTO pollution (location, volume) VALUES (\u0027Pacific Ocean\u0027, 123456789.0);", + "sql": "SELECT volume FROM pollution WHERE location \u003d \u0027Pacific Ocean\u0027;", + "sql_explanation": "This query calculates the total volume of pollution in cubic meters in the Pacific Ocean by selecting the volume from the pollution table where the location is \u0027Pacific Ocean\u0027." +}, { + "id": "5315", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the pollution level for a specific ocean pollution record in the ocean_pollution table.", + "sql_context": "CREATE TABLE ocean_pollution (id INT, pollution_type VARCHAR(255), pollution_level INT, pollution_date DATE); INSERT INTO ocean_pollution (id, pollution_type, pollution_level, pollution_date) VALUES (1, \u0027Oil Spill\u0027, 5, \u00272021-01-01\u0027), (2, \u0027Plastic Waste\u0027, 3, \u00272022-01-01\u0027);", + "sql": "UPDATE ocean_pollution SET pollution_level \u003d 6 WHERE id \u003d 1;", + "sql_explanation": "The SQL query uses the UPDATE statement to modify the pollution level for the specific oil spill record with an id of 1." +}, { + "id": "5335", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names of the vessels that have spotted more than 200 marine species?", + "sql_context": "CREATE TABLE ResearchVessels (VesselID INT, Name VARCHAR(50), SpeciesSpotted INT); INSERT INTO ResearchVessels (VesselID, Name, SpeciesSpotted) VALUES (1, \u0027RV1\u0027, 100), (2, \u0027RV2\u0027, 150), (3, \u0027RV3\u0027, 250), (4, \u0027RV4\u0027, 50);", + "sql": "SELECT Name FROM ResearchVessels WHERE SpeciesSpotted \u003e 200;", + "sql_explanation": "This query selects all records from the ResearchVessels table where the SpeciesSpotted column is greater than 200, and returns the corresponding Name." +}, { + "id": "5339", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records of whale sightings in the Arctic Ocean", + "sql_context": "CREATE TABLE whale_sightings (id INT PRIMARY KEY, species VARCHAR(255), location VARCHAR(255), sighting_date DATE); INSERT INTO whale_sightings (id, species, location, sighting_date) VALUES (1, \u0027Beluga Whale\u0027, \u0027Arctic Ocean\u0027, \u00272023-03-10\u0027);", + "sql": "DELETE FROM whale_sightings WHERE location \u003d \u0027Arctic Ocean\u0027;", + "sql_explanation": "This query deletes all records from the whale_sightings table where the location is \u0027Arctic Ocean\u0027." +}, { + "id": "5432", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many countries are involved in marine life research in all regions?", + "sql_context": "CREATE TABLE marine_life_research (id INT, country TEXT, region TEXT);INSERT INTO marine_life_research (id, country, region) VALUES (1, \u0027India\u0027, \u0027Indian\u0027), (2, \u0027Indonesia\u0027, \u0027Indian\u0027), (3, \u0027Australia\u0027, \u0027Pacific\u0027), (4, \u0027Canada\u0027, \u0027Atlantic\u0027), (5, \u0027USA\u0027, \u0027Atlantic\u0027), (6, \u0027Brazil\u0027, \u0027Atlantic\u0027), (7, \u0027South Africa\u0027, \u0027Indian\u0027);", + "sql": "SELECT COUNT(DISTINCT country) FROM marine_life_research;", + "sql_explanation": "This SQL query counts the number of unique \u0027country\u0027 values in the \u0027marine_life_research\u0027 table." +}, { + "id": "5548", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which marine species have been found at a depth of over 5000 meters?", + "sql_context": "CREATE TABLE species_depth (id INT, species TEXT, depth INT); INSERT INTO species_depth (id, species, depth) VALUES (1, \u0027Hatchetfish\u0027, 5500), (2, \u0027Goblin shark\u0027, 4300), (3, \u0027Anglerfish\u0027, 6100);", + "sql": "SELECT species FROM species_depth WHERE depth \u003e 5000;", + "sql_explanation": "Retrieve marine species found at a depth of over 5000 meters." +}, { + "id": "5579", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "Select all marine research grants over 100,000 from the \u0027research_grants\u0027 view", + "sql_context": "CREATE VIEW research_grants AS SELECT g.id, g.grant_name, g.amount, g.start_date, g.end_date, m.species_name FROM grants g JOIN marine_species m ON g.species_id \u003d m.id;", + "sql": "SELECT * FROM research_grants WHERE amount \u003e 100000;", + "sql_explanation": "This query retrieves all records from the \u0027research_grants\u0027 view that have an amount greater than 100,000." +}, { + "id": "5693", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average depth of oceanic trenches", + "sql_context": "CREATE TABLE ocean_trenches (trench_name TEXT, location TEXT, average_depth FLOAT); INSERT INTO ocean_trenches (trench_name, location, average_depth) VALUES (\u0027Mariana Trench\u0027, \u0027Western Pacific\u0027, 10994), (\u0027Tonga Trench\u0027, \u0027South Pacific\u0027, 10820), (\u0027Kuril Trench\u0027, \u0027North Pacific\u0027, 10542);", + "sql": "SELECT AVG(average_depth) FROM ocean_trenches;", + "sql_explanation": "This query calculates the average of the \u0027average_depth\u0027 column in the \u0027ocean_trenches\u0027 table." +}, { + "id": "5733", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total length of the Mid-Atlantic Ridge?", + "sql_context": "CREATE TABLE mid_atlantic_ridge (segment_name TEXT, length REAL); INSERT INTO mid_atlantic_ridge (segment_name, length) VALUES (\u0027Reykjanes Ridge\u0027, 200), (\u0027Kolbeinsey Ridge\u0027, 120), (\u0027Mohns Ridge\u0027, 150), (\u0027Knipovich Ridge\u0027, 180);", + "sql": "SELECT SUM(length) FROM mid_atlantic_ridge;", + "sql_explanation": "Sum the lengths of each segment in the mid_atlantic_ridge table." +}, { + "id": "5758", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the total number of marine protected areas in Southeast Asia?", + "sql_context": "CREATE TABLE Southeast_Asia_MPAs (mpa_name TEXT, country TEXT); INSERT INTO Southeast_Asia_MPAs (mpa_name, country) VALUES (\u0027Tubbataha Reefs Natural Park\u0027, \u0027Philippines\u0027), (\u0027Sundarbans National Park\u0027, \u0027India\u0027), (\u0027Belum-Temengor\u0027, \u0027Malaysia\u0027);", + "sql": "SELECT COUNT(*) FROM Southeast_Asia_MPAs;", + "sql_explanation": "This query calculates the total number of marine protected areas in Southeast Asian countries." +}, { + "id": "5833", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total number of marine species in the OceanLife database.", + "sql_context": "CREATE TABLE OceanLife (id INT, species TEXT, status TEXT); INSERT INTO OceanLife (id, species, status) VALUES (1, \u0027Blue Whale\u0027, \u0027Endangered\u0027); INSERT INTO OceanLife (id, species, status) VALUES (2, \u0027Dolphin\u0027, \u0027Protected\u0027); INSERT INTO OceanLife (id, species, status) VALUES (3, \u0027Clownfish\u0027, \u0027Protected\u0027); INSERT INTO OceanLife (id, species, status) VALUES (4, \u0027Seahorse\u0027, \u0027Vulnerable\u0027);", + "sql": "SELECT COUNT(*) FROM OceanLife;", + "sql_explanation": "This SQL query calculates the total number of rows in the \u0027OceanLife\u0027 table using the COUNT() function without specifying any column, which returns the count of all rows." +}, { + "id": "1485", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total donated amount by donors having \u0027LLC\u0027 in their names to arts and culture causes in Q3 2021?", + "sql_context": "CREATE TABLE donors (donor_id INT, donor_name TEXT, donation_amount FLOAT, cause TEXT, donation_date DATE);", + "sql": "SELECT SUM(donation_amount) FROM donors WHERE donor_name LIKE \u0027%LLC%\u0027 AND cause \u003d \u0027Arts and Culture\u0027 AND donation_date BETWEEN \u00272021-07-01\u0027 AND \u00272021-09-30\u0027;", + "sql_explanation": "This SQL query calculates the total donation_amount for donors having \u0027LLC\u0027 in their names who donated to arts and culture causes in Q3 2021 using the SUM function. It filters records based on the donor_name, cause, and donation_date." +}, { + "id": "3124", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total donation amount from donors in the US and Canada in 2021?", + "sql_context": "CREATE TABLE Donors (DonorID int, DonorName varchar(50), Country varchar(50), DonationAmount numeric(18,2)); INSERT INTO Donors (DonorID, DonorName, Country, DonationAmount) VALUES (1, \u0027Donor1\u0027, \u0027USA\u0027, 5000), (2, \u0027Donor2\u0027, \u0027Canada\u0027, 7000);", + "sql": "SELECT SUM(DonationAmount) FROM Donors WHERE Country IN (\u0027USA\u0027, \u0027Canada\u0027) AND YEAR(DonationDate) \u003d 2021;", + "sql_explanation": "The SQL query calculates the total donation amount from donors in the US and Canada in 2021 by filtering the Donors table based on the country and year, and then summing the DonationAmount column." +}, { + "id": "3138", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of donations going to education causes?", + "sql_context": "CREATE TABLE donations (id INT, cause TEXT); INSERT INTO donations (id, cause) VALUES (1, \u0027Education\u0027), (2, \u0027Health\u0027), (3, \u0027Environment\u0027), (4, \u0027Education\u0027), (5, \u0027Health\u0027);", + "sql": "SELECT (COUNT(*) FILTER (WHERE cause \u003d \u0027Education\u0027)) * 100.0 / COUNT(*) as pct_education FROM donations;", + "sql_explanation": "This SQL query calculates the percentage of donations going to education causes by using the COUNT function to count the number of donations per cause, while filtering the data using the FILTER clause based on cause. It then divides the count of education donations by the total number of donations and multiplies by 100 to get the percentage." +}, { + "id": "3620", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average number of years active for organizations in the Education sector.", + "sql_context": "CREATE TABLE PhilanthropyTrends (OrgID INT, Sector TEXT, YearsActive INT); INSERT INTO PhilanthropyTrends (OrgID, Sector, YearsActive) VALUES (101, \u0027Education\u0027, 10), (102, \u0027Healthcare\u0027, 7), (103, \u0027Education\u0027, 12), (104, \u0027Environment\u0027, 8), (105, \u0027Education\u0027, 15);", + "sql": "SELECT AVG(YearsActive) as AvgYearsActive FROM PhilanthropyTrends WHERE Sector \u003d \u0027Education\u0027;", + "sql_explanation": "This SQL query creates the PhilanthropyTrends table with the necessary columns and inserts sample data. The prompt requires finding the average number of years active for organizations in the Education sector. The SQL query filters rows where the sector is Education and calculates the average years active using the AVG function." +}, { + "id": "3993", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the total amount donated to climate change mitigation initiatives?", + "sql_context": "CREATE TABLE initiatives (id INT, name TEXT, amount_donated INT); INSERT INTO initiatives (id, name, amount_donated) VALUES (1, \u0027Climate Change Mitigation\u0027, 50000);", + "sql": "SELECT SUM(amount_donated) FROM initiatives WHERE name \u003d \u0027Climate Change Mitigation\u0027;", + "sql_explanation": "This query calculates the total amount donated to climate change mitigation initiatives by summing the amount_donated field for all records with the name \u0027Climate Change Mitigation\u0027." +}, { + "id": "4676", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average donation amount to environmental organizations?", + "sql_context": "CREATE TABLE Donations (DonationID int, Amount decimal, OrganizationType text); INSERT INTO Donations (DonationID, Amount, OrganizationType) VALUES (1, 5000, \u0027Environment\u0027); INSERT INTO Donations (DonationID, Amount, OrganizationType) VALUES (2, 7000, \u0027Health\u0027); INSERT INTO Donations (DonationID, Amount, OrganizationType) VALUES (3, 3000, \u0027Education\u0027); INSERT INTO Donations (DonationID, Amount, OrganizationType) VALUES (4, 2000, \u0027Environment\u0027);", + "sql": "SELECT AVG(Amount) FROM Donations WHERE OrganizationType \u003d \u0027Environment\u0027;", + "sql_explanation": "The SQL query filters for environmental organizations using the WHERE clause, and then calculates the average donation amount using the AVG function." +}, { + "id": "4787", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show me the total number of donors from Asia.", + "sql_context": "CREATE TABLE Donors (DonorID int, DonorName varchar(50), Country varchar(50)); INSERT INTO Donors (DonorID, DonorName, Country) VALUES (1, \u0027John Doe\u0027, \u0027United States\u0027); INSERT INTO Donors (DonorID, DonorName, Country) VALUES (2, \u0027Jane Smith\u0027, \u0027India\u0027); INSERT INTO Donors (DonorID, DonorName, Country) VALUES (3, \u0027Alice Johnson\u0027, \u0027Japan\u0027);", + "sql": "SELECT COUNT(*) FROM Donors WHERE Donors.Country IN (\u0027India\u0027, \u0027Japan\u0027);", + "sql_explanation": "The SQL query filters the results to only include donors from Asian countries using the WHERE clause and the IN operator. Finally, it calculates the number of donors by using the COUNT function." +}, { + "id": "5004", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the minimum donation amount for donors from India?", + "sql_context": "CREATE TABLE donations (id INT, donation_amount DECIMAL, country TEXT); INSERT INTO donations (id, donation_amount, country) VALUES (1, 150.00, \u0027Germany\u0027), (2, 250.00, \u0027Germany\u0027), (3, 300.00, \u0027Canada\u0027), (4, 50.00, \u0027India\u0027), (5, 100.00, \u0027India\u0027);", + "sql": "SELECT MIN(donation_amount) FROM donations WHERE country \u003d \u0027India\u0027;", + "sql_explanation": "The SQL query calculates the minimum donation amount for donors from India by using the MIN function on the donation_amount column, filtering the data for donations from India using the WHERE clause." +}, { + "id": "5325", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the amount of effective_altruism with id 1 to 2000000", + "sql_context": "CREATE TABLE effective_altruism (id INT PRIMARY KEY, name VARCHAR(100), amount INT, cause VARCHAR(20));", + "sql": "UPDATE effective_altruism SET amount \u003d 2000000 WHERE id \u003d 1;", + "sql_explanation": "This query updates the amount of the effective_altruism with id 1 to 2000000." +}, { + "id": "5490", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records from effective_altruism table where cause is \u0027Poverty\u0027", + "sql_context": "CREATE TABLE effective_altruism (id INT PRIMARY KEY, name VARCHAR(100), amount INT, cause VARCHAR(20));", + "sql": "DELETE FROM effective_altruism WHERE cause \u003d \u0027Poverty\u0027;", + "sql_explanation": "This query deletes records from the effective_altruism table where the cause is \u0027Poverty\u0027." +}, { + "id": "5501", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records of donors who have not donated more than $2000 in total.", + "sql_context": "CREATE TABLE donors (donor_id INT, donor_name TEXT, country TEXT, total_donation_amount FLOAT); INSERT INTO donors (donor_id, donor_name, country, total_donation_amount) VALUES (1, \u0027Juan Rodriguez\u0027, \u0027Mexico\u0027, 3000.00), (2, \u0027Maria Lopez\u0027, \u0027USA\u0027, 1500.00), (3, \u0027Pedro Garcia\u0027, \u0027Brazil\u0027, 2500.00);", + "sql": "DELETE FROM donors WHERE total_donation_amount \u003c\u003d 2000;", + "sql_explanation": "This query deletes records of donors with a total donation amount less than or equal to $2000 directly from the donors table." +}, { + "id": "5624", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "How can we delete donors from a specific region?", + "sql_context": "CREATE TABLE Donors (DonorID INT, Region VARCHAR(50)); INSERT INTO Donors (DonorID, Region) VALUES (1, \u0027Africa\u0027), (2, \u0027Asia\u0027), (3, \u0027Europe\u0027), (4, \u0027South America\u0027), (5, \u0027North America\u0027);", + "sql": "DELETE FROM Donors WHERE Region \u003d \u0027South America\u0027;", + "sql_explanation": "This query deletes donors from a specific region in the Donors table. It filters the data to only include donors from the specified region and then deletes those records." +}, { + "id": "5849", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records from the \u0027donations\u0027 table.", + "sql_context": "CREATE TABLE donations (donation_id INT, donor_id INT, organization_id INT, donation_amount FLOAT); INSERT INTO donations (donation_id, donor_id, organization_id, donation_amount) VALUES (1, 2, 101, 350.00), (2, 3, 102, 700.00), (3, 4, 103, 250.00);", + "sql": "DELETE FROM donations;", + "sql_explanation": "The SQL query deletes all records from the \u0027donations\u0027 table. If there are foreign key constraints referencing the \u0027donations\u0027 table, this query might fail, and you should use the ON DELETE CASCADE option in your foreign key constraints to avoid this." +}, { + "id": "1232", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum number of hospital beds in a rural hospital in Brazil?", + "sql_context": "CREATE TABLE BrazilianRuralHospitals (State VARCHAR(20), HospitalName VARCHAR(50), NumberOfBeds INT); INSERT INTO BrazilianRuralHospitals (State, HospitalName, NumberOfBeds) VALUES (\u0027State A\u0027, \u0027Hospital A\u0027, 50), (\u0027State A\u0027, \u0027Hospital B\u0027, 75), (\u0027State B\u0027, \u0027Hospital C\u0027, 100), (\u0027State B\u0027, \u0027Hospital D\u0027, 125);", + "sql": "SELECT MIN(NumberOfBeds) FROM BrazilianRuralHospitals WHERE State IN (\u0027State A\u0027, \u0027State B\u0027) AND HospitalName IN (\u0027Hospital A\u0027, \u0027Hospital B\u0027, \u0027Hospital C\u0027, \u0027Hospital D\u0027);", + "sql_explanation": "This SQL query calculates the minimum number of hospital beds in a rural hospital in Brazil. It uses the MIN function to find the hospital with the fewest beds and filters the data by state and hospital name." +}, { + "id": "1890", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a view for displaying daily COVID-19 testing data by hospital", + "sql_context": "CREATE TABLE covid_testing (id INT PRIMARY KEY, hospital_id INT, test_date DATE, tests_conducted INT);", + "sql": "CREATE VIEW daily_hospital_covid_testing AS SELECT hospital_id, test_date, tests_conducted FROM covid_testing ORDER BY hospital_id, test_date;", + "sql_explanation": "Create a view called daily_hospital_covid_testing that displays daily COVID-19 testing data by hospital. The view uses the ORDER BY clause to order the data by hospital ID and test date." +}, { + "id": "2124", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the names and locations of rural health centers in Egypt and Morocco with a capacity over 30 patients.", + "sql_context": "CREATE TABLE health_centers_africa2 (name TEXT, location TEXT, capacity INT, country TEXT); INSERT INTO health_centers_africa2 (name, location, capacity, country) VALUES (\u0027Center 1\u0027, \u0027Rural Egypt\u0027, 50, \u0027Egypt\u0027), (\u0027Center 2\u0027, \u0027Rural Morocco\u0027, 35, \u0027Morocco\u0027), (\u0027Center 3\u0027, \u0027Urban Morocco\u0027, 45, \u0027Morocco\u0027);", + "sql": "SELECT name, location FROM health_centers_africa2 WHERE (location LIKE \u0027Rural%\u0027 AND capacity \u003e 30) AND country IN (\u0027Egypt\u0027, \u0027Morocco\u0027)", + "sql_explanation": "Retrieves the names and locations of rural health centers in Egypt and Morocco with a capacity over 30 patients." +}, { + "id": "2179", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the ratio of primary care physicians to specialists in rural areas?", + "sql_context": "CREATE TABLE physicians_rural (id INTEGER, specialty VARCHAR(255), location VARCHAR(255));", + "sql": "SELECT (COUNT(*) FILTER (WHERE specialty \u003d \u0027Primary Care\u0027)) / COUNT(*) AS ratio FROM physicians_rural WHERE location LIKE \u0027%rural%\u0027;", + "sql_explanation": "This query calculates the ratio of primary care physicians to specialists in rural areas by dividing the count of primary care physicians by the total count of physicians in rural areas." +}, { + "id": "2767", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add a new healthcare facility \u0027RuralHealthFacility11\u0027 with 50 beds.", + "sql_context": "CREATE TABLE RuralHealthFacility11 (facility_id INT, facility_name VARCHAR(50), num_beds INT);", + "sql": "INSERT INTO RuralHealthFacility11 (facility_id, facility_name, num_beds) VALUES (31, \u0027RuralHealthFacility11\u0027, 50);", + "sql_explanation": "This SQL query inserts a new healthcare facility named \u0027RuralHealthFacility11\u0027 with 50 beds into the \u0027RuralHealthFacility11\u0027 table." +}, { + "id": "2838", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the \u0027Heart Disease\u0027 prevalence rate for \u0027Rural County A\u0027 in the \"disease_prevalence\" table to 12%", + "sql_context": "CREATE TABLE disease_prevalence (county VARCHAR(50), diagnosis VARCHAR(50), prevalence DECIMAL(5,2)); INSERT INTO disease_prevalence (county, diagnosis, prevalence) VALUES (\u0027Rural County A\u0027, \u0027Heart Disease\u0027, 10.00);", + "sql": "UPDATE disease_prevalence SET prevalence \u003d 0.12 WHERE county \u003d \u0027Rural County A\u0027 AND diagnosis \u003d \u0027Heart Disease\u0027;", + "sql_explanation": "1. Filter the disease_prevalence table to only include the row with the county \u0027Rural County A\u0027 and diagnosis \u0027Heart Disease\u0027 using the WHERE clause. 2. Update the prevalence rate of the selected row to 12%." +}, { + "id": "2948", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many hospitals in the rural Midwest have less than 100 beds and offer maternity care?", + "sql_context": "CREATE TABLE hospitals (id INT, beds INT, location VARCHAR(20), maternity_care BOOLEAN); INSERT INTO hospitals (id, beds, location, maternity_care) VALUES (1, 50, \u0027rural midwest\u0027, true), (2, 200, \u0027urban east\u0027, true), (3, 75, \u0027rural west\u0027, false);", + "sql": "SELECT COUNT(*) FROM hospitals WHERE beds \u003c 100 AND location LIKE \u0027%rural midwest%\u0027 AND maternity_care \u003d true;", + "sql_explanation": "This query counts the number of hospitals in the rural Midwest with less than 100 beds that offer maternity care. It filters the hospitals table for rural Midwest hospitals with less than 100 beds and maternity care, then counts the number of rows that meet these criteria." +}, { + "id": "2965", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of female patients diagnosed with diabetes in rural areas of Texas?", + "sql_context": "CREATE TABLE patient (patient_id INT, gender VARCHAR(10), age INT, diagnosis VARCHAR(20), location VARCHAR(20)); INSERT INTO patient (patient_id, gender, age, diagnosis, location) VALUES (1, \u0027Female\u0027, 45, \u0027Diabetes\u0027, \u0027Rural Texas\u0027); INSERT INTO patient (patient_id, gender, age, diagnosis, location) VALUES (2, \u0027Male\u0027, 50, \u0027Diabetes\u0027, \u0027Urban Texas\u0027);", + "sql": "SELECT AVG(age) FROM patient WHERE gender \u003d \u0027Female\u0027 AND diagnosis \u003d \u0027Diabetes\u0027 AND location \u003d \u0027Rural Texas\u0027;", + "sql_explanation": "The SQL query calculates the average age of female patients diagnosed with diabetes in rural areas of Texas by filtering the patient table based on gender, diagnosis, and location, and then calculating the average age of the filtered records." +}, { + "id": "3070", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum number of hospital beds in rural healthcare facilities in the Midwest that have performed at least 100 surgeries in the last year?", + "sql_context": "CREATE TABLE facilities (id INT, name TEXT, location TEXT, capacity INT, num_beds INT, num_surgeries INT); INSERT INTO facilities (id, name, location, capacity, num_beds, num_surgeries) VALUES (1, \u0027Facility 1\u0027, \u0027Rural Midwest\u0027, 100, 50, 120);", + "sql": "SELECT MIN(num_beds) as min_beds FROM facilities WHERE location LIKE \u0027%Midwest%\u0027 AND num_surgeries \u003e\u003d 100;", + "sql_explanation": "This query calculates the minimum number of hospital beds in rural healthcare facilities in the Midwest that have performed at least 100 surgeries in the last year. It starts by selecting the minimum number of beds from the facilities table, filtering for facilities located in the Midwest and with at least 100 surgeries. The result is the minimum number of beds for facilities meeting these criteria." +}, { + "id": "3091", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of hospital beds in rural hospitals of Connecticut that were built before 2010?", + "sql_context": "CREATE TABLE hospitals (id INT, name TEXT, location TEXT, beds INT, rural BOOLEAN, built DATE); INSERT INTO hospitals (id, name, location, beds, rural, built) VALUES (1, \u0027Hospital A\u0027, \u0027Connecticut\u0027, 150, true, \u00272005-01-01\u0027), (2, \u0027Hospital B\u0027, \u0027Connecticut\u0027, 200, true, \u00272008-01-01\u0027);", + "sql": "SELECT SUM(beds) FROM hospitals WHERE location \u003d \u0027Connecticut\u0027 AND rural \u003d true AND built \u003c \u00272010-01-01\u0027;", + "sql_explanation": "This query calculates the total number of hospital beds in rural hospitals in Connecticut that were built before 2010." +}, { + "id": "3197", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of hospital beds in rural hospitals of Idaho that have between 100 and 200 beds?", + "sql_context": "CREATE TABLE hospitals (id INT, name TEXT, location TEXT, beds INT, rural BOOLEAN); INSERT INTO hospitals (id, name, location, beds, rural) VALUES (1, \u0027Hospital A\u0027, \u0027Idaho\u0027, 180, true), (2, \u0027Hospital B\u0027, \u0027Idaho\u0027, 150, true);", + "sql": "SELECT AVG(beds) FROM hospitals WHERE location \u003d \u0027Idaho\u0027 AND rural \u003d true AND beds BETWEEN 100 AND 200;", + "sql_explanation": "This query calculates the average number of hospital beds in rural hospitals in Idaho that have between 100 and 200 beds." +}, { + "id": "3623", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of hospital beds in rural hospitals of Delaware that have more than 150 beds?", + "sql_context": "CREATE TABLE hospitals (id INT, name TEXT, location TEXT, beds INT, rural BOOLEAN); INSERT INTO hospitals (id, name, location, beds, rural) VALUES (1, \u0027Hospital A\u0027, \u0027Delaware\u0027, 180, true), (2, \u0027Hospital B\u0027, \u0027Delaware\u0027, 220, true);", + "sql": "SELECT AVG(beds) FROM hospitals WHERE location \u003d \u0027Delaware\u0027 AND rural \u003d true AND beds \u003e 150;", + "sql_explanation": "This query calculates the average number of hospital beds in rural hospitals in Delaware that have more than 150 beds." +}, { + "id": "3685", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of hospital beds in rural hospitals of Alabama that have more than 250 beds?", + "sql_context": "CREATE TABLE hospitals (id INT, name TEXT, location TEXT, beds INT, rural BOOLEAN); INSERT INTO hospitals (id, name, location, beds, rural) VALUES (1, \u0027Hospital A\u0027, \u0027Alabama\u0027, 300, true), (2, \u0027Hospital B\u0027, \u0027Alabama\u0027, 200, true);", + "sql": "SELECT SUM(beds) FROM hospitals WHERE location \u003d \u0027Alabama\u0027 AND rural \u003d true AND beds \u003e 250;", + "sql_explanation": "This query calculates the total number of hospital beds in rural hospitals in Alabama that have more than 250 beds." +}, { + "id": "3712", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average annual budget for rural health clinics in the western states, excluding clinics with an annual budget below $500,000?", + "sql_context": "CREATE TABLE clinics (name TEXT, state TEXT, annual_budget INT); INSERT INTO clinics (name, state, annual_budget) VALUES (\u0027Clinic A\u0027, \u0027Western\u0027, 700000), (\u0027Clinic B\u0027, \u0027Western\u0027, 400000), (\u0027Clinic C\u0027, \u0027Western\u0027, 600000), (\u0027Clinic D\u0027, \u0027Eastern\u0027, 800000);", + "sql": "SELECT AVG(annual_budget) FROM clinics WHERE state \u003d \u0027Western\u0027 AND annual_budget \u003e\u003d 500000;", + "sql_explanation": "This query calculates the average annual budget for clinics in the \u0027Western\u0027 state where the annual budget is greater than or equal to $500,000." +}, { + "id": "3836", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of patients diagnosed with diabetes in rural areas of Texas?", + "sql_context": "CREATE TABLE patients (id INT, age INT, diagnosis VARCHAR(20), location VARCHAR(20)); INSERT INTO patients (id, age, diagnosis, location) VALUES (1, 65, \u0027diabetes\u0027, \u0027rural Texas\u0027);", + "sql": "SELECT AVG(age) FROM patients WHERE diagnosis \u003d \u0027diabetes\u0027 AND location \u003d \u0027rural Texas\u0027;", + "sql_explanation": "This SQL query calculates the average age of patients diagnosed with diabetes in rural areas of Texas by selecting the age column from the patients table where the diagnosis is \u0027diabetes\u0027 and the location is \u0027rural Texas\u0027, then finding the average of the resulting ages." +}, { + "id": "3968", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the \"providers\" table", + "sql_context": "CREATE TABLE providers (provider_id INT, name VARCHAR(50), zip_code VARCHAR(10));", + "sql": "INSERT INTO providers (provider_id, name, zip_code) VALUES (3, \u0027Dr. Garcia\u0027, \u002712345\u0027);", + "sql_explanation": "This query inserts a new record into the providers table, with a provider_id of 3, name of \u0027Dr. Garcia\u0027, and zip code of \u002712345\u0027." +}, { + "id": "4262", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of female patients diagnosed with diabetes?", + "sql_context": "CREATE TABLE patient (patient_id INT, gender VARCHAR(10), age INT, diagnosis VARCHAR(20));", + "sql": "SELECT AVG(age) FROM patient WHERE gender \u003d \u0027female\u0027 AND diagnosis \u003d \u0027diabetes\u0027;", + "sql_explanation": "This query calculates the average age of female patients who have been diagnosed with diabetes. It does so by filtering the patient table to only include female patients with a diabetes diagnosis and then calculating the average age of those patients." +}, { + "id": "4267", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the \"hospitals\" table for \u0027Rural Hospital B\u0027 located at \u0027789 Oak St\u0027", + "sql_context": "CREATE TABLE hospitals (id INT PRIMARY KEY, name VARCHAR(50), address VARCHAR(100)); INSERT INTO hospitals (id, name, address) VALUES (\u0027123\u0027, \u0027Rural Hospital A\u0027, \u0027123 Main St\u0027);", + "sql": "INSERT INTO hospitals (name, address) VALUES (\u0027Rural Hospital B\u0027, \u0027789 Oak St\u0027);", + "sql_explanation": "1. Insert a new record into the hospitals table with the following data: \u0027Rural Hospital B\u0027 and \u0027789 Oak St\u0027. 2. The \u0027id\u0027 column is set to auto-increment, so the database will automatically assign a unique ID to the new record." +}, { + "id": "4296", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of patients diagnosed with Asthma in the rural areas of Texas?", + "sql_context": "CREATE TABLE Patients (PatientID INT, Age INT, Gender VARCHAR(10), Diagnosis VARCHAR(20), Location VARCHAR(20)); INSERT INTO Patients (PatientID, Age, Gender, Diagnosis, Location) VALUES (1, 35, \u0027Male\u0027, \u0027Asthma\u0027, \u0027Texas\u0027); INSERT INTO Patients (PatientID, Age, Gender, Diagnosis, Location) VALUES (2, 42, \u0027Female\u0027, \u0027Asthma\u0027, \u0027Texas\u0027);", + "sql": "SELECT AVG(Age) FROM Patients WHERE Diagnosis \u003d \u0027Asthma\u0027 AND Location \u003d \u0027Texas\u0027;", + "sql_explanation": "The SQL query calculates the average age of patients by finding the mean of the \u0027Age\u0027 column, filtered for patients with a diagnosis of \u0027Asthma\u0027 in the \u0027Texas\u0027 location." +}, { + "id": "4509", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add a new clinician \u0027Bella\u0027 with the last name \u0027Williams\u0027 to the \"clinicians\" table", + "sql_context": "CREATE TABLE clinicians (id INT PRIMARY KEY AUTO_INCREMENT, first_name VARCHAR(50), last_name VARCHAR(50)); INSERT INTO clinicians (first_name, last_name) VALUES (\u0027John\u0027, \u0027Doe\u0027), (\u0027Jane\u0027, \u0027Smith\u0027);", + "sql": "INSERT INTO clinicians (first_name, last_name) VALUES (\u0027Bella\u0027, \u0027Williams\u0027);", + "sql_explanation": "1. Insert a new record into the clinicians table with the first name \u0027Bella\u0027 and last name \u0027Williams\u0027. 2. The \u0027id\u0027 column is set to auto-increment, so the database will automatically assign a unique ID to the new record." +}, { + "id": "4518", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of nurses in hospitals in Montana and Wyoming?", + "sql_context": "CREATE TABLE hospitals (id INT, name VARCHAR(50), state VARCHAR(25), num_nurses INT); INSERT INTO hospitals (id, name, state, num_nurses) VALUES (1, \u0027Hospital A\u0027, \u0027Montana\u0027, 50), (2, \u0027Hospital B\u0027, \u0027Wyoming\u0027, 30), (3, \u0027Hospital C\u0027, \u0027Wyoming\u0027, 75);", + "sql": "SELECT AVG(num_nurses) FROM hospitals WHERE state IN (\u0027Montana\u0027, \u0027Wyoming\u0027);", + "sql_explanation": "This query calculates the average number of nurses in hospitals in Montana and Wyoming by using the AVG function on the num_nurses column, filtering for hospitals in Montana and Wyoming." +}, { + "id": "4956", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names and positions of all female staff members in the \u0027healthcare_staff\u0027 table?", + "sql_context": "CREATE TABLE healthcare_staff (name VARCHAR(255), gender VARCHAR(255), position VARCHAR(255), hospital_id INT); INSERT INTO healthcare_staff (name, gender, position, hospital_id) VALUES (\u0027Dr. Jane Smith\u0027, \u0027Female\u0027, \u0027Doctor\u0027, 1), (\u0027Dr. Maria Garcia\u0027, \u0027Female\u0027, \u0027Doctor\u0027, 2);", + "sql": "SELECT name, position FROM healthcare_staff WHERE gender \u003d \u0027Female\u0027;", + "sql_explanation": "This query returns the names and positions of all female staff members in the \u0027healthcare_staff\u0027 table by filtering on the gender column with the value \u0027Female\u0027." +}, { + "id": "5012", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many nurses work in hospitals with more than 100 beds?", + "sql_context": "CREATE TABLE hospital (hospital_id INT, beds INT, nurse_count INT);", + "sql": "SELECT COUNT(*) FROM hospital WHERE beds \u003e 100 AND nurse_count \u003e 0;", + "sql_explanation": "This query counts the number of nurses working in hospitals with more than 100 beds by filtering the hospital table to only include hospitals with more than 100 beds and a positive nurse count." +}, { + "id": "5038", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of patients per day in rural_clinics in 2022?", + "sql_context": "CREATE TABLE rural_clinics (clinic_id INT, patients_per_day INT, year INT);", + "sql": "SELECT AVG(patients_per_day) FROM rural_clinics WHERE year \u003d 2022;", + "sql_explanation": "The SQL query calculates the average (AVG) number of patients per day in the \u0027rural_clinics\u0027 table for the year 2022." +}, { + "id": "5090", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many hospitals are there in rural Texas with more than 100 beds?", + "sql_context": "CREATE TABLE Hospitals (HospitalID int, Name varchar(50), Location varchar(50), Beds int, Rural bool); INSERT INTO Hospitals (HospitalID, Name, Location, Beds, Rural) VALUES (1, \u0027Hospital A\u0027, \u0027Rural Texas\u0027, 120, true);", + "sql": "SELECT COUNT(*) FROM Hospitals WHERE Rural \u003d true AND Beds \u003e 100;", + "sql_explanation": "This query calculates the number of hospitals in rural Texas with more than 100 beds by filtering the Hospitals table for rows where Rural is true and Beds is greater than 100, then counting the number of those rows." +}, { + "id": "5100", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of rural hospitals and clinics in the states of California and Texas?", + "sql_context": "CREATE TABLE rural_hospitals (name TEXT, state TEXT, num_beds INTEGER); INSERT INTO rural_hospitals (name, state, num_beds) VALUES (\u0027Hospital A\u0027, \u0027CA\u0027, 50), (\u0027Hospital B\u0027, \u0027CA\u0027, 75), (\u0027Clinic C\u0027, \u0027TX\u0027, 25), (\u0027Clinic D\u0027, \u0027TX\u0027, 30);", + "sql": "SELECT COUNT(*) FROM rural_hospitals WHERE state IN (\u0027CA\u0027, \u0027TX\u0027);", + "sql_explanation": "The SQL query counts the number of rows in the rural_hospitals table where the state is either \u0027CA\u0027 or \u0027TX\u0027. This provides the total number of hospitals and clinics in these two states." +}, { + "id": "5229", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum age requirement for medical professionals in Colombia?", + "sql_context": "CREATE TABLE professionals (id INT, name TEXT, country TEXT, age INT, profession TEXT);", + "sql": "SELECT MIN(age) FROM professionals WHERE country \u003d \u0027Colombia\u0027;", + "sql_explanation": "This query finds the minimum age requirement for medical professionals in Colombia. It uses the MIN function to find the lowest value in the age column of the professionals table where country is \u0027Colombia\u0027." +}, { + "id": "5377", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum age of patients in rural_clinic from Canada?", + "sql_context": "CREATE TABLE rural_clinic (patient_id INT, age INT, gender VARCHAR(10), country VARCHAR(20));", + "sql": "SELECT MAX(age) FROM rural_clinic WHERE country \u003d \u0027Canada\u0027;", + "sql_explanation": "The SQL query finds the maximum (MAX) age of patients in the \u0027rural_clinic\u0027 table who are from Canada." +}, { + "id": "5415", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of male patients in the \u0027rural_clinic_5\u0027 table?", + "sql_context": "CREATE TABLE rural_clinic_5 (patient_id INT, age INT, gender VARCHAR(10)); INSERT INTO rural_clinic_5 (patient_id, age, gender) VALUES (1, 35, \u0027Male\u0027), (2, 50, \u0027Female\u0027), (3, 42, \u0027Male\u0027), (4, 60, \u0027Male\u0027), (5, 30, \u0027Female\u0027), (6, 45, \u0027Female\u0027), (7, 40, \u0027Male\u0027);", + "sql": "SELECT COUNT(*) FROM rural_clinic_5 WHERE gender \u003d \u0027Male\u0027;", + "sql_explanation": "This query counts the number of male patients in the \u0027rural_clinic_5\u0027 table by selecting all records where gender is \u0027Male\u0027 and counting the number of records." +}, { + "id": "5543", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which facilities are not hospitals?", + "sql_context": "CREATE TABLE facilities (id INT, name TEXT, type TEXT); INSERT INTO facilities (id, name, type) VALUES (1, \u0027Rural Clinic\u0027, \u0027Primary Care\u0027), (2, \u0027Urgent Care\u0027, \u0027Urgent Care\u0027), (3, \u0027General Hospital\u0027, \u0027Hospital\u0027);", + "sql": "SELECT name FROM facilities WHERE type !\u003d \u0027Hospital\u0027;", + "sql_explanation": "The SQL query uses the NOT EQUAL operator to find facilities whose type is not \u0027Hospital\u0027." +}, { + "id": "5593", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Remove records where prevalence \u003e 1000 from \u0027disease_data\u0027", + "sql_context": "CREATE TABLE if not exists \u0027disease_data\u0027 (id INT, state TEXT, disease TEXT, prevalence INT, PRIMARY KEY(id));", + "sql": "DELETE FROM \u0027disease_data\u0027 WHERE prevalence \u003e 1000;", + "sql_explanation": "The SQL query deletes records from \u0027disease_data\u0027 table where \u0027prevalence\u0027 is greater than 1000." +}, { + "id": "5728", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many beds are available in all rural hospitals?", + "sql_context": "CREATE TABLE rural_hospitals( hospital_id INT PRIMARY KEY, name VARCHAR(255), bed_count INT, rural_population_served INT);", + "sql": "SELECT SUM(bed_count) FROM rural_hospitals;", + "sql_explanation": "1. Select the sum of values in the bed_count column 2. Calculate the sum for all rows in the rural_hospitals table" +}, { + "id": "5775", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total cost of procedures in the \u0027rural_hospital_3\u0027 table?", + "sql_context": "CREATE TABLE rural_hospital_3 (procedure_id INT, cost DECIMAL(5,2)); INSERT INTO rural_hospital_3 (procedure_id, cost) VALUES (1, 100.50), (2, 150.25), (3, 75.00), (4, 200.00), (5, 50.00);", + "sql": "SELECT SUM(cost) FROM rural_hospital_3;", + "sql_explanation": "This query calculates the total cost of procedures in the \u0027rural_hospital_3\u0027 table by summing all costs." +}, { + "id": "5819", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the name and type of each facility?", + "sql_context": "CREATE TABLE facilities (id INT, name TEXT, type TEXT); INSERT INTO facilities (id, name, type) VALUES (1, \u0027Rural Clinic\u0027, \u0027Primary Care\u0027), (2, \u0027Urgent Care\u0027, \u0027Urgent Care\u0027), (3, \u0027General Hospital\u0027, \u0027Hospital\u0027);", + "sql": "SELECT name, type FROM facilities;", + "sql_explanation": "The SQL query selects the \u0027name\u0027 and \u0027type\u0027 columns from the \u0027facilities\u0027 table." +}, { + "id": "1309", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show all posts with geolocation data in the state of \u0027California\u0027 since January 1, 2021.", + "sql_context": "CREATE TABLE posts (post_id INT, user_id INT, post_text TEXT, post_date DATE, geolocation POINT); INSERT INTO posts (post_id, user_id, post_text, post_date, geolocation) VALUES (1, 789, \u0027Enjoying the sun in CA\u0027, \u00272021-02-22\u0027, POINT(37.7749, -122.4194));", + "sql": "SELECT post_id, user_id, post_text, post_date, geolocation FROM posts WHERE ST_Contains(geolocation, ST_MakePoint(-119.4179, 37.0000)) AND post_date \u003e\u003d \u00272021-01-01\u0027;", + "sql_explanation": "This query returns all posts with geolocation data within the state of California since January 1, 2021. The WHERE clause uses ST_Contains, a spatial function, to filter the geolocation column and post_date." +}, { + "id": "2004", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average engagement rate for posts containing hashtags related to \u0027technology\u0027 in the past month?", + "sql_context": "CREATE TABLE posts (id INT, hashtags TEXT, engagement_rate DECIMAL(5, 2), timestamp TIMESTAMP); INSERT INTO posts (id, hashtags, engagement_rate, timestamp) VALUES (1, \u0027#technology, #gadgets\u0027, 5.12, \u00272022-05-01 10:00:00\u0027);", + "sql": "SELECT AVG(engagement_rate) FROM posts WHERE hashtags LIKE \u0027%#technology%\u0027 AND timestamp \u003e\u003d DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 1 MONTH);", + "sql_explanation": "This query calculates the average engagement rate for posts containing hashtags related to \u0027technology\u0027 in the past month. It filters the data for hashtags containing \u0027#technology\u0027 and checks if the timestamp is within the last month. Then it calculates the average engagement rate for the matching records." +}, { + "id": "2077", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many users from Canada in the technology category have more than 1000 followers?", + "sql_context": "CREATE TABLE users (id INT, country VARCHAR(255), category VARCHAR(255), followers INT); INSERT INTO users (id, country, category, followers) VALUES (1, \u0027Canada\u0027, \u0027technology\u0027, 1200);", + "sql": "SELECT COUNT(DISTINCT users.id) FROM users WHERE users.country \u003d \u0027Canada\u0027 AND users.category \u003d \u0027technology\u0027 AND users.followers \u003e 1000;", + "sql_explanation": "This query counts the number of users from Canada in the technology category who have more than 1000 followers. It does this by filtering the users table for users from Canada, the technology category, and followers greater than 1000, then counts the distinct number of user IDs." +}, { + "id": "2244", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total revenue generated from users in the US and Canada for Q1 2022?", + "sql_context": "CREATE SCHEMA socialmedia; CREATE TABLE ads(user_id INT, country VARCHAR(2), revenue FLOAT, transaction_date DATE); INSERT INTO ads (user_id, country, revenue, transaction_date) VALUES (1, \u0027US\u0027, 100, \u00272022-01-01\u0027); INSERT INTO ads (user_id, country, revenue, transaction_date) VALUES (2, \u0027CA\u0027, 150, \u00272022-01-02\u0027);", + "sql": "SELECT SUM(revenue) FROM socialmedia.ads WHERE country IN (\u0027US\u0027, \u0027CA\u0027) AND transaction_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-03-31\u0027;", + "sql_explanation": "This query calculates the total revenue for users in the US and Canada by summing the revenue for rows with country values of \u0027US\u0027 or \u0027CA\u0027 and transaction dates within Q1 2022." +}, { + "id": "2535", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of users in Nigeria and Kenya who have clicked on a specific ad, and what was the total revenue generated from these clicks?", + "sql_context": "CREATE TABLE ad_clicks (user_id INT, ad_id INT, country VARCHAR(2), click_time TIMESTAMP, revenue FLOAT); INSERT INTO ad_clicks (user_id, ad_id, country, click_time, revenue) VALUES (1, 1001, \u0027NG\u0027, \u00272022-03-01 12:34:56\u0027, 2.5), (2, 1001, \u0027KE\u0027, \u00272022-03-02 13:45:27\u0027, 3.0);", + "sql": "SELECT SUM(CASE WHEN country IN (\u0027NG\u0027, \u0027KE\u0027) THEN revenue ELSE 0 END) as total_revenue FROM ad_clicks WHERE ad_id \u003d 1001;", + "sql_explanation": "This SQL query calculates the total revenue generated from users in Nigeria and Kenya who have clicked on a specific ad. It achieves this by using the SUM and CASE statements to sum the revenue for the specified countries and ad_id. The WHERE clause filters the results to only include records for the specified ad." +}, { + "id": "2721", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the maximum number of likes for posts related to \"fitness\" in the \"social_media\" schema posted after January 15, 2022.", + "sql_context": "CREATE TABLE posts (id INT, content TEXT, likes INT, shares INT, created_at DATETIME); INSERT INTO posts (id, content, likes, shares, created_at) VALUES (1, \u0027Fitness goal: 100 push-ups a day!\u0027, 500, 200, \u00272022-01-01 10:00:00\u0027), (2, \u0027Just finished a 5k run!\u0027, 800, 300, \u00272022-01-15 11:00:00\u0027), (3, \u0027Joining a local gym!\u0027, 600, 400, \u00272022-01-16 12:00:00\u0027);", + "sql": "SELECT MAX(likes) FROM posts WHERE content LIKE \u0027%fitness%\u0027 AND created_at \u003e \u00272022-01-15\u0027 AND schema\u003d\u0027social_media\u0027;", + "sql_explanation": "This query finds the maximum number of likes for posts related to \"fitness\" in the \"social_media\" schema posted after January 15, 2022. The WHERE clause filters the rows based on the content column\u0027s value and created_at column\u0027s value. The MAX function then computes the maximum value of the likes column for the filtered rows." +}, { + "id": "2809", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of likes on posts made in the month of January 2022?", + "sql_context": "CREATE TABLE posts (id INT, user_id INT, timestamp TIMESTAMP, likes INT); INSERT INTO posts (id, user_id, timestamp, likes) VALUES (1, 1, \u00272022-01-01 12:00:00\u0027, 100), (2, 1, \u00272022-01-02 13:00:00\u0027, 200), (3, 2, \u00272022-01-03 10:00:00\u0027, 50), (4, 1, \u00272022-02-04 11:00:00\u0027, 300), (5, 2, \u00272022-02-05 12:00:00\u0027, 400), (6, 1, \u00272022-03-06 13:00:00\u0027, 500);", + "sql": "SELECT SUM(posts.likes) FROM posts WHERE posts.timestamp BETWEEN \u00272022-01-01 00:00:00\u0027 AND \u00272022-01-31 23:59:59\u0027;", + "sql_explanation": "This query calculates the total number of likes on posts made in the month of January 2022. It does this by filtering for posts that were made in January 2022 and then calculating the sum of the likes column." +}, { + "id": "2893", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of posts with hashtags in the \u0027social_media\u0027 table, for users in \u0027North America\u0027?", + "sql_context": "CREATE TABLE social_media(user_id INT, user_name VARCHAR(50), region VARCHAR(50), post_date DATE, hashtags BOOLEAN, likes INT);", + "sql": "SELECT 100.0 * SUM(hashtags) / COUNT(*) as hashtag_percentage FROM social_media WHERE region \u003d \u0027North America\u0027;", + "sql_explanation": "This SQL query calculates the percentage of posts with hashtags for users in \u0027North America\u0027 by filtering the \u0027social_media\u0027 table based on the \u0027region\u0027 column and then applying the SUM and COUNT functions to the \u0027hashtags\u0027 column. The result is multiplied by 100.0 to convert it to a percentage." +}, { + "id": "3258", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the daily active user count for Vietnam in March 2022.", + "sql_context": "CREATE TABLE if not exists activity (user_id INT, country VARCHAR(50), activity_date DATE, year INT, month INT, day INT); INSERT INTO activity (user_id, country, activity_date) VALUES (1, \u0027Vietnam\u0027, \u00272022-03-01\u0027), (2, \u0027Vietnam\u0027, \u00272022-03-02\u0027);", + "sql": "SELECT COUNT(DISTINCT user_id) FROM activity WHERE country \u003d \u0027Vietnam\u0027 AND month \u003d 3 AND year \u003d 2022;", + "sql_explanation": "This query calculates the daily active user count for Vietnam in March 2022 by counting the distinct user_id values for matching records." +}, { + "id": "4458", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who is the oldest user in the \u0027africa\u0027 region?", + "sql_context": "CREATE TABLE user (id INT, name VARCHAR(50), age INT, gender VARCHAR(10), region VARCHAR(20), created_at TIMESTAMP); INSERT INTO user (id, name, age, gender, region, created_at) VALUES (1, \u0027Nana Agyeman\u0027, 60, \u0027Female\u0027, \u0027africa\u0027, \u00272021-01-01 10:00:00\u0027), (2, \u0027Kofi Ansah\u0027, 45, \u0027Male\u0027, \u0027africa\u0027, \u00272021-01-02 11:00:00\u0027);", + "sql": "SELECT name, age FROM user WHERE region \u003d \u0027africa\u0027 ORDER BY age DESC LIMIT 1;", + "sql_explanation": "This query retrieves the name and age of the oldest user in the \u0027africa\u0027 region. It filters the \u0027user\u0027 table to only include rows with a \u0027region\u0027 value of \u0027africa\u0027 and orders the resulting table by the \u0027age\u0027 column in descending order. Then, it retrieves the top row of the sorted table." +}, { + "id": "4593", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many users have more than 1000 followers in the \"social_media_users\" table?", + "sql_context": "CREATE TABLE social_media_users (user_id INT, followers_count INT); INSERT INTO social_media_users (user_id, followers_count) VALUES (1, 1200), (2, 2000), (3, 1500), (4, 1050), (5, 2500), (6, 800);", + "sql": "SELECT COUNT(user_id) FROM social_media_users WHERE followers_count \u003e 1000;", + "sql_explanation": "The SQL query counts the number of users with more than 1000 followers by using the COUNT function on the \"user_id\" column and applying a WHERE clause to filter based on the \"followers_count\" column." +}, { + "id": "4897", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average number of likes received by posts containing the hashtag \u0027#bookreviews\u0027 in \u0027France\u0027, per day.", + "sql_context": "CREATE TABLE posts (id INT, date DATE, likes INT, content TEXT); CREATE TABLE hashtags (id INT, post_id INT, hashtag TEXT);", + "sql": "SELECT AVG(likes / DATEDIFF(\u00272023-03-01\u0027, date)) AS avg_likes_per_day", + "sql_explanation": "This query starts by joining the posts and hashtags tables based on their relationships. It then filters the results to only include posts from France that contain the hashtag \u0027#bookreviews\u0027. Finally, it calculates the average number of likes per day by dividing the total number of likes by the number of days between the earliest and latest post dates." +}, { + "id": "4928", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the maximum number of retweets for tweets with the hashtag #zerowaste in the \"sustainable_living\" schema.", + "sql_context": "CREATE TABLE tweets (id INT, user_id INT, content TEXT, retweets INT, hashtags TEXT);", + "sql": "SELECT MAX(retweets) FROM tweets WHERE hashtags LIKE \u0027%#zerowaste%\u0027;", + "sql_explanation": "The query calculates the maximum number of retweets for tweets with the hashtag #zerowaste in the tweets table." +}, { + "id": "5053", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of posts per day in the \u0027social_media\u0027 database?", + "sql_context": "CREATE TABLE posts (id INT, user_id INT, content TEXT, timestamp TIMESTAMP);", + "sql": "SELECT AVG(COUNT(posts.id)/86400) AS avg_posts_per_day FROM posts;", + "sql_explanation": "This query calculates the average number of posts per day in the \u0027social_media\u0027 database by grouping the \u0027posts\u0027 table by timestamp and calculating the count of posts for each group. It then divides the count by the number of seconds in a day (86400) and calculates the average." +}, { + "id": "5332", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of followers for any user, and what is their location?", + "sql_context": "CREATE TABLE users (id INT, name VARCHAR(50), location VARCHAR(50), followers INT); INSERT INTO users (id, name, location, followers) VALUES (1, \u0027Alice\u0027, \u0027Canada\u0027, 100); INSERT INTO users (id, name, location, followers) VALUES (2, \u0027Bob\u0027, \u0027USA\u0027, 200); INSERT INTO users (id, name, location, followers) VALUES (3, \u0027Charlie\u0027, \u0027Mexico\u0027, 300);", + "sql": "SELECT location, MAX(followers) as max_followers FROM users;", + "sql_explanation": "We select the location and maximum number of followers from the users table." +}, { + "id": "5660", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of followers for all users in the \u0027user_followers\u0027 table?", + "sql_context": "CREATE TABLE user_followers (user_id INT, followers_count INT);", + "sql": "SELECT SUM(followers_count) FROM user_followers;", + "sql_explanation": "The SQL query calculates the total number of followers for all users in the \u0027user_followers\u0027 table using the SUM() function on the \u0027followers_count\u0027 column." +}, { + "id": "531", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of male and female members in unions with a \u0027technology\u0027 industry classification?", + "sql_context": "CREATE TABLE unions (id INT, name VARCHAR(255), industry VARCHAR(255), member_gender VARCHAR(10), member_count INT); INSERT INTO unions (id, name, industry, member_gender, member_count) VALUES (1, \u0027Union A\u0027, \u0027technology\u0027, \u0027male\u0027, 200), (2, \u0027Union B\u0027, \u0027technology\u0027, \u0027female\u0027, 150), (3, \u0027Union C\u0027, \u0027technology\u0027, \u0027male\u0027, 250), (4, \u0027Union D\u0027, \u0027technology\u0027, \u0027female\u0027, 100);", + "sql": "SELECT SUM(CASE WHEN member_gender \u003d \u0027male\u0027 THEN member_count ELSE 0 END) as total_male, SUM(CASE WHEN member_gender \u003d \u0027female\u0027 THEN member_count ELSE 0 END) as total_female FROM unions WHERE industry \u003d \u0027technology\u0027;", + "sql_explanation": "This query calculates the total number of male and female members in unions with a \u0027technology\u0027 industry classification. It uses a CASE statement to filter the unions table for the specific gender, then calculates the total for each gender using the SUM function." +}, { + "id": "716", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new employees into the Employees table", + "sql_context": "CREATE TABLE Employees (id INT, name VARCHAR(50), position VARCHAR(50), left_company BOOLEAN);", + "sql": "INSERT INTO Employees (id, name, position, left_company) VALUES (1, \u0027Juan Garcia\u0027, \u0027Software Engineer\u0027, FALSE), (2, \u0027Aisha Khan\u0027, \u0027Data Scientist\u0027, FALSE), (3, \u0027Carlos Mendoza\u0027, \u0027QA Engineer\u0027, FALSE);", + "sql_explanation": "The SQL query inserts new records into the Employees table, representing new employees with the specified names, positions, and a boolean value indicating that they have not left the company yet." +}, { + "id": "2554", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many collective bargaining agreements were signed in the \u0027Education\u0027 sector between 2015 and 2018?", + "sql_context": "CREATE TABLE CollectiveBargaining (AgreementID INT, Sector VARCHAR(20), SignDate DATE); INSERT INTO CollectiveBargaining (AgreementID, Sector, SignDate) VALUES (1, \u0027Education\u0027, \u00272015-05-01\u0027), (2, \u0027Education\u0027, \u00272016-09-15\u0027), (3, \u0027Healthcare\u0027, \u00272017-03-25\u0027);", + "sql": "SELECT COUNT(*) FROM CollectiveBargaining WHERE Sector \u003d \u0027Education\u0027 AND SignDate BETWEEN \u00272015-01-01\u0027 AND \u00272018-12-31\u0027;", + "sql_explanation": "Count the number of collective bargaining agreements signed in the \u0027Education\u0027 sector between 2015 and 2018 by filtering the CollectiveBargaining table by Sector and SignDate within the specified range, then counting the number of records in the filtered dataset (COUNT function)." +}, { + "id": "2639", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum and maximum workplace safety rating in Europe?", + "sql_context": "CREATE TABLE safety_ratings (country VARCHAR(50), rating INT); INSERT INTO safety_ratings (country, rating) VALUES (\u0027Germany\u0027, 85); INSERT INTO safety_ratings (country, rating) VALUES (\u0027France\u0027, 82); INSERT INTO safety_ratings (country, rating) VALUES (\u0027United Kingdom\u0027, 88); INSERT INTO safety_ratings (country, rating) VALUES (\u0027Italy\u0027, 79);", + "sql": "SELECT MIN(rating), MAX(rating) FROM safety_ratings WHERE country IN (\u0027Germany\u0027, \u0027France\u0027, \u0027United Kingdom\u0027, \u0027Italy\u0027);", + "sql_explanation": "The SQL query filters the \u0027safety_ratings\u0027 table for rows with European countries, finding the minimum and maximum values of the \u0027rating\u0027 column. This shows the minimum and maximum workplace safety ratings in Europe." +}, { + "id": "2938", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of members in unions in Japan, Argentina, and Brazil?", + "sql_context": "CREATE TABLE UnionMembers (id INT, union_name VARCHAR(50), country VARCHAR(50), member_count INT); INSERT INTO UnionMembers (id, union_name, country, member_count) VALUES (1, \u0027JAW\u0027, \u0027Japan\u0027, 150000), (2, \u0027ALU\u0027, \u0027Australia\u0027, 200000), (3, \u0027AWU\u0027, \u0027Australia\u0027, 100000), (6, \u0027CGT\u0027, \u0027Argentina\u0027, 300000), (7, \u0027CTA\u0027, \u0027Argentina\u0027, 250000), (8, \u0027CUT\u0027, \u0027Brazil\u0027, 150000), (9, \u0027FORCA\u0027, \u0027Brazil\u0027, 120000);", + "sql": "SELECT SUM(member_count) as total_members FROM UnionMembers WHERE country IN (\u0027Japan\u0027, \u0027Argentina\u0027, \u0027Brazil\u0027);", + "sql_explanation": "This query calculates the total number of union members in Japan, Argentina, and Brazil by filtering the UnionMembers table to only include rows with a country column of \u0027Japan\u0027, \u0027Argentina\u0027, or \u0027Brazil\u0027 and calculating the sum of the member_count column." +}, { + "id": "3143", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of workplaces with high injury rates?", + "sql_context": "CREATE TABLE workplace_safety (id INT, union_id INT, workplace_name VARCHAR(50), injury_rate DECIMAL(5,2)); INSERT INTO workplace_safety (id, union_id, workplace_name, injury_rate) VALUES (1, 1001, \u0027ABC Factory\u0027, 6.5), (2, 1002, \u0027XYZ Inc\u0027, 3.2), (3, 1003, \u0027LMN Corp\u0027, 9.1);", + "sql": "SELECT SUM(CASE WHEN injury_rate \u003e 5 THEN 1 ELSE 0 END) as high_injury_workplaces FROM workplace_safety;", + "sql_explanation": "The SQL query calculates the total number of workplaces with high injury rates (greater than 5) by using a CASE statement and SUM function." +}, { + "id": "3352", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of workplace safety incidents for unions in the \u0027services\u0027 sector that have more than 2000 members?", + "sql_context": "CREATE TABLE union_stats (id INT, union_name VARCHAR(30), sector VARCHAR(20), num_members INT, num_safety_incidents INT); INSERT INTO union_stats (id, union_name, sector, num_members, num_safety_incidents) VALUES (1, \u0027Union A\u0027, \u0027services\u0027, 3000, 15), (2, \u0027Union B\u0027, \u0027education\u0027, 2000, 8), (3, \u0027Union C\u0027, \u0027services\u0027, 1000, 2), (4, \u0027Union D\u0027, \u0027technology\u0027, 2500, 10);", + "sql": "SELECT AVG(num_safety_incidents) FROM union_stats WHERE sector \u003d \u0027services\u0027 AND num_members \u003e 2000;", + "sql_explanation": "This query calculates the average number of workplace safety incidents for unions in the \u0027services\u0027 sector that have more than 2000 members by averaging the \u0027num_safety_incidents\u0027 column in the \u0027union_stats\u0027 table where the \u0027sector\u0027 is \u0027services\u0027 and \u0027num_members\u0027 is greater than 2000." +}, { + "id": "3486", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of female workers employed in the \u0027manufacturing\u0027 sector with a minimum wage greater than $20 per hour?", + "sql_context": "CREATE TABLE workers (id INT, sector VARCHAR(20), gender VARCHAR(10), wage FLOAT); INSERT INTO workers (id, sector, gender, wage) VALUES (1, \u0027manufacturing\u0027, \u0027male\u0027, 18.50), (2, \u0027manufacturing\u0027, \u0027female\u0027, 22.00), (3, \u0027retail\u0027, \u0027male\u0027, 15.75), (4, \u0027retail\u0027, \u0027female\u0027, 16.50);", + "sql": "SELECT COUNT(*) FROM workers WHERE sector \u003d \u0027manufacturing\u0027 AND gender \u003d \u0027female\u0027 AND wage \u003e 20;", + "sql_explanation": "The SQL query counts the number of records in the \u0027workers\u0027 table where the \u0027sector\u0027 column is \u0027manufacturing\u0027, the \u0027gender\u0027 column is \u0027female\u0027, and the \u0027wage\u0027 column is greater than $20.00." +}, { + "id": "3526", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many collective bargaining agreements were signed in the \u0027Education\u0027 sector in 2018?", + "sql_context": "CREATE TABLE CollectiveBargaining (AgreementID INT, Sector VARCHAR(20), SignDate DATE); INSERT INTO CollectiveBargaining (AgreementID, Sector, SignDate) VALUES (1, \u0027Education\u0027, \u00272016-05-01\u0027), (2, \u0027Healthcare\u0027, \u00272018-09-15\u0027), (3, \u0027Education\u0027, \u00272017-03-25\u0027), (4, \u0027Education\u0027, \u00272018-12-31\u0027);", + "sql": "SELECT COUNT(*) FROM CollectiveBargaining WHERE Sector \u003d \u0027Education\u0027 AND YEAR(SignDate) \u003d 2018;", + "sql_explanation": "Count the number of collective bargaining agreements signed in the \u0027Education\u0027 sector in 2018 by filtering the CollectiveBargaining table by Sector and SignDate\u0027s year, then counting the number of records in the filtered dataset (COUNT function)." +}, { + "id": "3977", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the difference in safety scores between the union with the highest and lowest safety scores?", + "sql_context": "CREATE TABLE unions (id INT, name TEXT, location TEXT, type TEXT, safety_score INT); INSERT INTO unions (id, name, location, type, safety_score) VALUES (1, \u0027Union A\u0027, \u0027Germany\u0027, \u0027Manufacturing\u0027, 90), (2, \u0027Union B\u0027, \u0027France\u0027, \u0027Manufacturing\u0027, 70);", + "sql": "SELECT MAX(safety_score) - MIN(safety_score) FROM unions WHERE type \u003d \u0027Manufacturing\u0027;", + "sql_explanation": "This query calculates the difference in safety scores between the union with the highest and lowest safety scores. It does so by subtracting the minimum value of the \u0027safety_score\u0027 column from the maximum value, where the \u0027type\u0027 is \u0027Manufacturing\u0027." +}, { + "id": "4067", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum number of collective bargaining agreements signed in the \u0027technology\u0027 industry?", + "sql_context": "CREATE TABLE collective_bargaining (id INT, industry VARCHAR(50), num_agreements INT); INSERT INTO collective_bargaining (id, industry, num_agreements) VALUES (1, \u0027construction\u0027, 15); INSERT INTO collective_bargaining (id, industry, num_agreements) VALUES (2, \u0027manufacturing\u0027, 10); INSERT INTO collective_bargaining (id, industry, num_agreements) VALUES (3, \u0027technology\u0027, 5);", + "sql": "SELECT MIN(num_agreements) FROM collective_bargaining WHERE industry \u003d \u0027technology\u0027;", + "sql_explanation": "The SQL query calculates the minimum number of collective bargaining agreements in the \u0027technology\u0027 industry by using the MIN function." +}, { + "id": "4172", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many part-time workers are there in the \u0027technology\u0027 sector?", + "sql_context": "CREATE TABLE technology (id INT, employee_name TEXT, hours_worked INT, salary REAL); INSERT INTO technology (id, employee_name, hours_worked, salary) VALUES (1, \u0027Alice Davis\u0027, 20, 45000.00), (2, \u0027Bob Brown\u0027, 25, 50000.00), (3, \u0027Charlie Green\u0027, 30, 55000.00);", + "sql": "SELECT COUNT(*) FROM technology WHERE hours_worked \u003c 30 AND sector \u003d \u0027technology\u0027;", + "sql_explanation": "This query counts the number of part-time workers in the technology sector. It first filters the records where hours_worked is less than 30 and sector is technology. Then, it counts the number of rows that match this condition." +}, { + "id": "4195", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum salary of unionized workers in the \u0027Retail\u0027 industry?", + "sql_context": "CREATE TABLE Workers (EmployeeID INT, Industry VARCHAR(20), UnionMember BOOLEAN, Salary FLOAT); INSERT INTO Workers (EmployeeID, Industry, UnionMember, Salary) VALUES (1, \u0027Retail\u0027, true, 35000.0), (2, \u0027Retail\u0027, true, 36000.0), (3, \u0027Retail\u0027, false, 33000.0);", + "sql": "SELECT MAX(Salary) FROM Workers WHERE Industry \u003d \u0027Retail\u0027 AND UnionMember \u003d true;", + "sql_explanation": "Determine the maximum salary of unionized workers in the \u0027Retail\u0027 industry by filtering the Workers table by Industry and UnionMember, then computing the maximum salary (MAX function) from the filtered dataset." +}, { + "id": "4460", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many labor rights violations were reported in \u0027California\u0027 and \u0027New York\u0027?", + "sql_context": "CREATE TABLE violations (id INT, location TEXT, type TEXT, date DATE); INSERT INTO violations (id, location, type, date) VALUES (1, \u0027California\u0027, \u0027wage theft\u0027, \u00272021-01-01\u0027), (2, \u0027New York\u0027, \u0027unsafe working conditions\u0027, \u00272021-02-01\u0027);", + "sql": "SELECT COUNT(*) FROM violations WHERE location IN (\u0027California\u0027, \u0027New York\u0027);", + "sql_explanation": "This query counts the number of labor rights violations reported in California and New York." +}, { + "id": "4486", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum tenure for members in the \u0027manufacturing\u0027 department?", + "sql_context": "CREATE TABLE union_membership (id INT, name VARCHAR(50), department VARCHAR(50), tenure INT); INSERT INTO union_membership (id, name, department, tenure) VALUES (1, \u0027Alice\u0027, \u0027technology\u0027, 5); INSERT INTO union_membership (id, name, department, tenure) VALUES (2, \u0027Bob\u0027, \u0027technology\u0027, 3); INSERT INTO union_membership (id, name, department, tenure) VALUES (3, \u0027Charlie\u0027, \u0027manufacturing\u0027, 4);", + "sql": "SELECT MIN(tenure) FROM union_membership WHERE department \u003d \u0027manufacturing\u0027;", + "sql_explanation": "The SQL query calculates the minimum tenure for members in the \u0027manufacturing\u0027 department by using the MIN function." +}, { + "id": "4506", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of workplaces with successful collective bargaining agreements in Canada in 2020?", + "sql_context": "CREATE TABLE workplaces (id INT, country VARCHAR(50), num_employees INT, has_cba BOOLEAN); INSERT INTO workplaces (id, country, num_employees, has_cba) VALUES (1, \u0027Canada\u0027, 200, true), (2, \u0027USA\u0027, 300, false);", + "sql": "SELECT COUNT(*) FROM workplaces WHERE country \u003d \u0027Canada\u0027 AND has_cba \u003d true;", + "sql_explanation": "This query counts the number of rows in the workplaces table where the country is \u0027Canada\u0027 and has_cba is true." +}, { + "id": "4547", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of members in the \u0027Educators_Union\u0027 with a safety_rating above 8?", + "sql_context": "CREATE TABLE Educators_Union (union_member_id INT, member_id INT, safety_rating FLOAT); INSERT INTO Educators_Union (union_member_id, member_id, safety_rating) VALUES (1, 101, 8.25), (1, 102, 8.75), (1, 103, 9.25), (2, 201, 7.50), (2, 202, 8.75);", + "sql": "SELECT COUNT(union_member_id) FROM Educators_Union WHERE safety_rating \u003e 8;", + "sql_explanation": "This query counts the total number of members in the \u0027Educators_Union\u0027 with a safety_rating above 8 by using the COUNT function on the \u0027union_member_id\u0027 column, filtered by the \u0027safety_rating\u0027 column with a value greater than 8." +}, { + "id": "4559", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the \"Unions\" table with a UnionID of 5 and a UnionName of \"National Labor Union\".", + "sql_context": "CREATE TABLE Unions (UnionID INT, UnionName TEXT);", + "sql": "INSERT INTO Unions (UnionID, UnionName) VALUES (5, \u0027National Labor Union\u0027);", + "sql_explanation": "This query inserts a new record into the \"Unions\" table with a UnionID of 5 and a UnionName of \"National Labor Union\"." +}, { + "id": "4583", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many employees work in the \u0027finance\u0027 sector?", + "sql_context": "CREATE TABLE if not exists employment (id INT, industry VARCHAR, number_of_employees INT); INSERT INTO employment (id, industry, number_of_employees) VALUES (1, \u0027manufacturing\u0027, 5000), (2, \u0027technology\u0027, 8000), (3, \u0027healthcare\u0027, 7000), (4, \u0027retail\u0027, 6000), (5, \u0027education\u0027, 9000), (6, \u0027finance\u0027, 10000);", + "sql": "SELECT SUM(number_of_employees) FROM employment WHERE industry \u003d \u0027finance\u0027;", + "sql_explanation": "This query calculates the total number of employees in the \u0027finance\u0027 sector by summing the \u0027number_of_employees\u0027 column where the \u0027industry\u0027 is \u0027finance\u0027." +}, { + "id": "4689", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of workplace safety incidents reported in the Healthcare industry?", + "sql_context": "CREATE TABLE WorkplaceSafety (id INT, industry VARCHAR(255), incidents INT); INSERT INTO WorkplaceSafety (id, industry, incidents) VALUES (1, \u0027Healthcare\u0027, 15);", + "sql": "SELECT SUM(incidents) FROM WorkplaceSafety WHERE industry \u003d \u0027Healthcare\u0027;", + "sql_explanation": "This SQL query calculates the total number of workplace safety incidents in the Healthcare industry by summing the \u0027incidents\u0027 column where the \u0027industry\u0027 is \u0027Healthcare\u0027." +}, { + "id": "4801", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of members in union \u0027M\u0027 who joined after 2016 and their average salary.", + "sql_context": "CREATE TABLE UnionM(member_id INT, join_date DATE, salary INT); INSERT INTO UnionM(member_id, join_date, salary) VALUES(13001, \u00272017-01-01\u0027, 50000), (13002, \u00272016-12-31\u0027, 55000), (13003, \u00272018-01-01\u0027, 45000);", + "sql": "SELECT COUNT(*), AVG(salary) FROM UnionM WHERE YEAR(join_date) \u003e 2016;", + "sql_explanation": "This query selects the count of members who joined after 2016 and their average salary from UnionM." +}, { + "id": "4915", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum salary in the \u0027healthcare\u0027 sector?", + "sql_context": "CREATE TABLE company_data (company_name VARCHAR(30), sector VARCHAR(20), avg_salary INT); INSERT INTO company_data (company_name, sector, avg_salary) VALUES (\u0027CompanyA\u0027, \u0027Healthcare\u0027, 70000), (\u0027CompanyB\u0027, \u0027Healthcare\u0027, 90000), (\u0027CompanyC\u0027, \u0027Finance\u0027, 80000);", + "sql": "SELECT MAX(avg_salary) FROM company_data WHERE sector \u003d \u0027Healthcare\u0027;", + "sql_explanation": "This query calculates the maximum salary in the healthcare sector. It does this by using the MAX aggregate function on the avg_salary column after filtering the records based on sector condition." +}, { + "id": "5009", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum salary in the \u0027manufacturing\u0027 industry?", + "sql_context": "CREATE TABLE if not exists salaries5 (id INT, industry TEXT, region TEXT, salary REAL);INSERT INTO salaries5 (id, industry, region, salary) VALUES (1, \u0027manufacturing\u0027, \u0027east\u0027, 60000), (2, \u0027retail\u0027, \u0027west\u0027, 50000);", + "sql": "SELECT MAX(salary) FROM salaries5 WHERE industry \u003d \u0027manufacturing\u0027;", + "sql_explanation": "This query selects the maximum salary in the \u0027manufacturing\u0027 industry by selecting all records with the industry value of \u0027manufacturing\u0027 and then finding the maximum salary of the selected records." +}, { + "id": "5110", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum age of male \u0027service\u0027 union members?", + "sql_context": "CREATE TABLE service_union_members (member_id INT, gender VARCHAR(10), union VARCHAR(20), age INT); INSERT INTO service_union_members (member_id, gender, union, age) VALUES (1, \u0027Male\u0027, \u0027Service\u0027, 25); INSERT INTO service_union_members (member_id, gender, union, age) VALUES (2, \u0027Female\u0027, \u0027Service\u0027, 30);", + "sql": "SELECT MIN(age) FROM service_union_members WHERE gender \u003d \u0027Male\u0027;", + "sql_explanation": "This query finds the minimum age of male \u0027service\u0027 union members by selecting the \u0027age\u0027 column from the \u0027service_union_members\u0027 table where \u0027gender\u0027 is \u0027Male\u0027, then finding the minimum value among the selected values." +}, { + "id": "5263", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average salary of employees in the company", + "sql_context": "CREATE TABLE Employees (id INT, name VARCHAR(50), position VARCHAR(50), left_company BOOLEAN, salary DECIMAL(10,2));", + "sql": "SELECT AVG(salary) FROM Employees WHERE left_company \u003d FALSE;", + "sql_explanation": "The SQL query calculates the average salary of employees who have not left the company, returning the result as a single row with a single column." +}, { + "id": "5421", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of union members per union in the state of New York?", + "sql_context": "CREATE TABLE unions (id INT, name TEXT, state TEXT, members INT); INSERT INTO unions (id, name, state, members) VALUES (1, \u0027Union A\u0027, \u0027New York\u0027, 500); INSERT INTO unions (id, name, state, members) VALUES (2, \u0027Union B\u0027, \u0027California\u0027, 700); INSERT INTO unions (id, name, state, members) VALUES (3, \u0027Union C\u0027, \u0027New York\u0027, 800);", + "sql": "SELECT AVG(members) FROM unions WHERE state \u003d \u0027New York\u0027;", + "sql_explanation": "This query selects the members column from the unions table and filters for rows where the state is \u0027New York\u0027. It then calculates the average of the members column for these rows." +}, { + "id": "5491", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the industry with the most workers?", + "sql_context": "CREATE TABLE if not exists industry (industry_id INT, industry_name TEXT, total_workers INT); INSERT INTO industry (industry_id, industry_name, total_workers) VALUES (1, \u0027manufacturing\u0027, 5000), (2, \u0027technology\u0027, 7000), (3, \u0027healthcare\u0027, 6000), (4, \u0027finance\u0027, 4000), (5, \u0027retail\u0027, 3000);", + "sql": "SELECT industry_name, MAX(total_workers) FROM industry;", + "sql_explanation": "This SQL query retrieves the industry with the most workers by selecting the \u0027industry_name\u0027 and the maximum \u0027total_workers\u0027 from the \u0027industry\u0027 table." +}, { + "id": "5753", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of members in the \u0027LaborRightsAdvocacy\u0027 union?", + "sql_context": "CREATE TABLE LaborRightsAdvocacy (member_id INT, name TEXT, age INT, join_date DATE);", + "sql": "SELECT AVG(age) FROM LaborRightsAdvocacy;", + "sql_explanation": "The SQL query calculates the average of the \u0027age\u0027 column in the \u0027LaborRightsAdvocacy\u0027 table, providing the average age of its members." +}, { + "id": "5768", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many unions are present in the database?", + "sql_context": "CREATE TABLE unions (id INT, name VARCHAR(20)); INSERT INTO unions (id, name) VALUES (1, \u0027construction\u0027), (2, \u0027education\u0027), (3, \u0027manufacturing\u0027), (4, \u0027retail\u0027), (5, \u0027healthcare\u0027);", + "sql": "SELECT COUNT(DISTINCT name) FROM unions;", + "sql_explanation": "This SQL query calculates the number of unions in the database by counting the distinct \u0027name\u0027 values in the \u0027unions\u0027 table." +}, { + "id": "1519", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many biotech startups from Australia or New Zealand have received funding in the last 18 months?", + "sql_context": "CREATE TABLE biotech_startups (startup_name VARCHAR(255), funding_round DATE, country VARCHAR(255)); INSERT INTO biotech_startups (startup_name, funding_round, country) VALUES (\u0027StartupC\u0027, \u00272023-01-01\u0027, \u0027Australia\u0027);", + "sql": "SELECT COUNT(*) FROM biotech_startups WHERE funding_round BETWEEN DATEADD(MONTH, -18, GETDATE()) AND GETDATE() AND country IN (\u0027Australia\u0027, \u0027New Zealand\u0027);", + "sql_explanation": "The SQL query counts the number of records in the \u0027biotech_startups\u0027 table where the \u0027funding_round\u0027 is within the last 18 months and the \u0027country\u0027 is either \u0027Australia\u0027 or \u0027New Zealand\u0027." +}, { + "id": "3191", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum budget for bioprocess engineering in Japan?", + "sql_context": "CREATE SCHEMA engineering; CREATE TABLE engineering.budget (id INT, item VARCHAR(100), country VARCHAR(50), budget FLOAT); INSERT INTO engineering.budget (id, item, country, budget) VALUES (1, \u0027Bioprocess Engineering\u0027, \u0027Japan\u0027, 6000000.00); INSERT INTO engineering.budget (id, item, country, budget) VALUES (2, \u0027Automation\u0027, \u0027Japan\u0027, 4000000.00);", + "sql": "SELECT MIN(budget) FROM engineering.budget WHERE item \u003d \u0027Bioprocess Engineering\u0027 AND country \u003d \u0027Japan\u0027;", + "sql_explanation": "This SQL query calculates the minimum budget for bioprocess engineering in Japan by selecting the minimum value of the \u0027budget\u0027 column for rows where the \u0027item\u0027 column is \u0027Bioprocess Engineering\u0027 and the \u0027country\u0027 column is \u0027Japan\u0027." +}, { + "id": "3355", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all biotech startups that received funding in the last 6 months.", + "sql_context": "CREATE SCHEMA if not exists biotech;CREATE TABLE biotech.startups_funding (id INT, startup_name VARCHAR(50), funding_date DATE, funding_amount DECIMAL(10,2));INSERT INTO biotech.startups_funding (id, startup_name, funding_date, funding_amount) VALUES (1, \u0027StartupA\u0027, \u00272022-01-15\u0027, 5000000.00), (2, \u0027StartupB\u0027, \u00272022-06-30\u0027, 3000000.00), (3, \u0027StartupC\u0027, \u00272021-12-31\u0027, 2000000.00);", + "sql": "SELECT * FROM biotech.startups_funding WHERE funding_date \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 6 MONTH);", + "sql_explanation": "This SQL query lists all biotech startups that received funding in the last 6 months by selecting all columns from the biotech.startups_funding table where the funding date is within the last 6 months." +}, { + "id": "3398", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total biosensor technology development cost for projects in the UK?", + "sql_context": "CREATE TABLE projects (id INT, name VARCHAR(50), country VARCHAR(50), techniques VARCHAR(50), costs FLOAT); INSERT INTO projects (id, name, country, techniques, costs) VALUES (1, \u0027ProjectX\u0027, \u0027UK\u0027, \u0027Biosensor technology, bioinformatics\u0027, 20000); INSERT INTO projects (id, name, country, techniques, costs) VALUES (2, \u0027ProjectY\u0027, \u0027UK\u0027, \u0027PCR, bioinformatics\u0027, 15000); INSERT INTO projects (id, name, country, techniques, costs) VALUES (3, \u0027ProjectZ\u0027, \u0027UK\u0027, \u0027Biosensor technology, DNA sequencing\u0027, 25000);", + "sql": "SELECT SUM(costs) FROM projects WHERE country \u003d \u0027UK\u0027 AND techniques LIKE \u0027%Biosensor technology%\u0027;", + "sql_explanation": "This SQL query calculates the total biosensor technology development cost for projects in the UK by summing up the \u0027costs\u0027 column values for rows where the \u0027country\u0027 column value is \u0027UK\u0027 and the \u0027techniques\u0027 column value contains the substring \u0027Biosensor technology\u0027." +}, { + "id": "3457", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the biosensors and their prices associated with bioengineering?", + "sql_context": "CREATE TABLE Biosensor (Biosensor_Name VARCHAR(50) PRIMARY KEY, Department VARCHAR(50), Price DECIMAL(10, 2)); INSERT INTO Biosensor (Biosensor_Name, Department, Price) VALUES (\u0027Bio1\u0027, \u0027Genetic Research\u0027, 1000.00); INSERT INTO Biosensor (Biosensor_Name, Department, Price) VALUES (\u0027Bio2\u0027, \u0027BioProcess Engineering\u0027, 1500.00);", + "sql": "SELECT B.Biosensor_Name, B.Price FROM Biosensor B WHERE B.Department \u003d \u0027BioProcess Engineering\u0027;", + "sql_explanation": "This query selects the names and prices of biosensors associated with the bioengineering department." +}, { + "id": "3497", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many bioprocess engineering projects were successfully completed in the UK?", + "sql_context": "CREATE SCHEMA if not exists biotech;CREATE TABLE if not exists biotech.bioprocess_projects(id INT, name STRING, location STRING, status STRING);INSERT INTO biotech.bioprocess_projects(id, name, location, status) VALUES (1, \u0027ProjectA\u0027, \u0027UK\u0027, \u0027completed\u0027), (2, \u0027ProjectB\u0027, \u0027US\u0027, \u0027in_progress\u0027), (3, \u0027ProjectC\u0027, \u0027UK\u0027, \u0027completed\u0027);", + "sql": "SELECT COUNT(*) FROM biotech.bioprocess_projects WHERE location \u003d \u0027UK\u0027 AND status \u003d \u0027completed\u0027;", + "sql_explanation": "This query counts the number of successfully completed bioprocess engineering projects in the UK. It filters the bioprocess_projects table for completed projects located in the UK and counts the number of rows." +}, { + "id": "3683", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total funding for biotech startups in Q2 2022?", + "sql_context": "CREATE TABLE biotech_startups (name TEXT, funding FLOAT, date DATE); INSERT INTO biotech_startups (name, funding, date) VALUES (\u0027StartupA\u0027, 3000000, \u00272022-04-15\u0027); INSERT INTO biotech_startups (name, funding, date) VALUES (\u0027StartupB\u0027, 4000000, \u00272022-06-20\u0027);", + "sql": "SELECT SUM(funding) FROM biotech_startups WHERE date BETWEEN \u00272022-04-01\u0027 AND \u00272022-06-30\u0027;", + "sql_explanation": "The SQL query filters the biotech_startups table for entries in Q2 2022 and then calculates the total funding." +}, { + "id": "3748", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many biosensor technology development projects were conducted in India and Brazil?", + "sql_context": "CREATE SCHEMA if not exists biotech;USE biotech;CREATE TABLE if not exists projects (id INT, country VARCHAR(255), type VARCHAR(255));INSERT INTO projects (id, country, type) VALUES (1, \u0027India\u0027, \u0027Biosensor\u0027), (2, \u0027Brazil\u0027, \u0027Biosensor\u0027), (3, \u0027USA\u0027, \u0027Bioprocess\u0027), (4, \u0027India\u0027, \u0027Genetic\u0027);", + "sql": "SELECT COUNT(*) FROM projects WHERE country IN (\u0027India\u0027, \u0027Brazil\u0027) AND type \u003d \u0027Biosensor\u0027;", + "sql_explanation": "This query counts the number of biosensor technology development projects conducted in India and Brazil by using the COUNT function and filtering the rows using the WHERE clause to only include biosensor projects in the specified countries." +}, { + "id": "3906", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average biosensor price for a given manufacturer.", + "sql_context": "CREATE TABLE biosensors (id INT, manufacturer VARCHAR(50), model VARCHAR(50), price FLOAT, quantity INT, date DATE);", + "sql": "SELECT AVG(price) FROM biosensors WHERE manufacturer \u003d \u0027Example Inc.\u0027 AND quantity \u003e 0;", + "sql_explanation": "This query calculates the average biosensor price for \u0027Example Inc.\u0027 by averaging the \u0027price\u0027 column values where the manufacturer is \u0027Example Inc.\u0027 and quantity is greater than 0." +}, { + "id": "3942", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List biotech startups founded before 2010.", + "sql_context": "CREATE TABLE startups (id INT, name VARCHAR(50), location VARCHAR(50), industry VARCHAR(50), founding_date DATE);", + "sql": "SELECT name FROM startups WHERE industry \u003d \u0027biotech\u0027 AND founding_date \u003c \u00272010-01-01\u0027;", + "sql_explanation": "This query lists biotech startups founded before 2010 by selecting the \u0027name\u0027 column values where industry is \u0027biotech\u0027 and founding_date is earlier than \u00272010-01-01\u0027." +}, { + "id": "3964", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many biotech startups were founded by women in France?", + "sql_context": "CREATE TABLE startups(name VARCHAR(50), founder_gender VARCHAR(10), location VARCHAR(20));INSERT INTO startups(name, founder_gender, location) VALUES(\u0027StartupA\u0027, \u0027Female\u0027, \u0027France\u0027), (\u0027StartupB\u0027, \u0027Female\u0027, \u0027Germany\u0027), (\u0027StartupC\u0027, \u0027Male\u0027, \u0027France\u0027);", + "sql": "SELECT COUNT(*) FROM startups WHERE founder_gender \u003d \u0027Female\u0027 AND location \u003d \u0027France\u0027;", + "sql_explanation": "This query counts the number of biotech startups founded by women in France by filtering on the founder_gender and location columns." +}, { + "id": "3978", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which genetic research projects in the UK involve DNA sequencing?", + "sql_context": "CREATE TABLE projects (id INT, name VARCHAR(50), country VARCHAR(50), techniques VARCHAR(50)); INSERT INTO projects (id, name, country, techniques) VALUES (1, \u0027ProjectX\u0027, \u0027UK\u0027, \u0027DNA sequencing, PCR\u0027); INSERT INTO projects (id, name, country, techniques) VALUES (2, \u0027ProjectY\u0027, \u0027UK\u0027, \u0027PCR, bioinformatics\u0027);", + "sql": "SELECT name FROM projects WHERE country \u003d \u0027UK\u0027 AND techniques LIKE \u0027%DNA sequencing%\u0027;", + "sql_explanation": "This SQL query retrieves the names of genetic research projects in the UK that involve DNA sequencing by selecting rows from the \u0027projects\u0027 table where the \u0027country\u0027 column value is \u0027UK\u0027 and the \u0027techniques\u0027 column value contains the substring \u0027DNA sequencing\u0027." +}, { + "id": "4061", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the name and gene sequence of patients with illness \u0027Flu\u0027?", + "sql_context": "CREATE TABLE patients (id INT PRIMARY KEY, name VARCHAR(255), age INT, gender VARCHAR(10), illness VARCHAR(255)); INSERT INTO patients (id, name, age, gender, illness) VALUES (1, \u0027Jane Smith\u0027, 28, \u0027Female\u0027, \u0027Flu\u0027);", + "sql": "SELECT patients.name, patients.illness FROM patients WHERE patients.illness \u003d \u0027Flu\u0027;", + "sql_explanation": "Select name and illness columns from patients table where illness is Flu." +}, { + "id": "4079", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average biosensor technology development cost for companies in the UK?", + "sql_context": "CREATE SCHEMA if not exists biosensors;CREATE TABLE biosensors.development_costs (id INT, company_name VARCHAR(50), country VARCHAR(50), development_cost DECIMAL(10,2));INSERT INTO biosensors.development_costs (id, company_name, country, development_cost) VALUES (1, \u0027CompanyA\u0027, \u0027UK\u0027, 5000000.00), (2, \u0027CompanyB\u0027, \u0027Canada\u0027, 3500000.00), (3, \u0027CompanyC\u0027, \u0027USA\u0027, 8000000.00);", + "sql": "SELECT AVG(development_cost) FROM biosensors.development_costs WHERE country \u003d \u0027UK\u0027;", + "sql_explanation": "This SQL query calculates the average biosensor technology development cost for companies in the UK by averaging the development_cost column in the biosensors.development_costs table, filtering for rows where the country is \u0027UK\u0027." +}, { + "id": "4142", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average bioprocess engineering project duration for projects led by Dr. Patel?", + "sql_context": "CREATE TABLE bioprocess_engineering (id INT, project_name VARCHAR(100), lead_engineer VARCHAR(100), duration INT);", + "sql": "SELECT AVG(duration) FROM bioprocess_engineering WHERE lead_engineer \u003d \u0027Dr. Patel\u0027;", + "sql_explanation": "This SQL query calculates the average duration of bioprocess engineering projects led by Dr. Patel by finding the average \u0027duration\u0027 value where \u0027lead_engineer\u0027 is \u0027Dr. Patel\u0027." +}, { + "id": "4236", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the expression level of the \u0027GeneX\u0027 gene in the nervous system compared to the cardiovascular system?", + "sql_context": "CREATE SCHEMA if not exists genetic;CREATE TABLE if not exists genetic.gene_expression (id INT, gene_id INT, gene_name TEXT, tissue TEXT, expression DECIMAL(5,2));INSERT INTO genetic.gene_expression (id, gene_id, gene_name, tissue, expression) VALUES (1, 1, \u0027Gene1\u0027, \u0027Cardiovascular\u0027, 9.87), (2, 2, \u0027Gene2\u0027, \u0027Nervous\u0027, 7.34), (3, 3, \u0027Gene3\u0027, \u0027Cardiovascular\u0027, 10.12), (4, 4, \u0027GeneX\u0027, \u0027Nervous\u0027, 6.55), (5, 5, \u0027GeneX\u0027, \u0027Cardiovascular\u0027, 8.21);", + "sql": "SELECT tissue, expression FROM genetic.gene_expression WHERE gene_name \u003d \u0027GeneX\u0027;", + "sql_explanation": "This query retrieves the expression levels of the \u0027GeneX\u0027 gene in both the nervous and cardiovascular systems." +}, { + "id": "4260", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which genetic research experiments have been conducted in South America?", + "sql_context": "CREATE SCHEMA if not exists genetics;CREATE TABLE if not exists genetics.experiments (id INT PRIMARY KEY, name VARCHAR(100), location VARCHAR(100)); INSERT INTO genetics.experiments (id, name, location) VALUES (1, \u0027ExpA\u0027, \u0027Buenos Aires\u0027), (2, \u0027ExpB\u0027, \u0027Santiago\u0027), (3, \u0027ExpC\u0027, \u0027Rio de Janeiro\u0027), (4, \u0027ExpD\u0027, \u0027Lima\u0027), (5, \u0027ExpE\u0027, \u0027BogotÃĄ\u0027);", + "sql": "SELECT DISTINCT name FROM genetics.experiments WHERE location \u003d \u0027South America\u0027;", + "sql_explanation": "The SQL query lists all genetic research experiments conducted in South America by selecting distinct values from the name column and filtering the results to only include rows where the location is \u0027South America\u0027." +}, { + "id": "4305", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which genetic research projects have been completed in the UK?", + "sql_context": "CREATE SCHEMA if not exists biotech; CREATE TABLE if not exists biotech.research (id INT PRIMARY KEY, name VARCHAR(255), country VARCHAR(255), status VARCHAR(255)); INSERT INTO biotech.research (id, name, country, status) VALUES (1, \u0027Genome UK\u0027, \u0027UK\u0027, \u0027Ongoing\u0027); INSERT INTO biotech.research (id, name, country, status) VALUES (2, \u0027Brain UK\u0027, \u0027UK\u0027, \u0027Completed\u0027);", + "sql": "SELECT name FROM biotech.research WHERE country \u003d \u0027UK\u0027 AND status \u003d \u0027Completed\u0027;", + "sql_explanation": "This SQL query retrieves the names of genetic research projects that have been completed in the UK by selecting the rows in the biotech.research table where the country is \u0027UK\u0027 and the status is \u0027Completed\u0027, and then selecting the name column." +}, { + "id": "4334", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all genetic research projects with a budget greater than $800,000?", + "sql_context": "CREATE SCHEMA GeneticResearch; CREATE TABLE project_budgets (project_name VARCHAR(50), budget DECIMAL(10, 2)); INSERT INTO project_budgets VALUES (\u0027Project1\u0027, 600000), (\u0027Project2\u0027, 900000);", + "sql": "SELECT project_name FROM GeneticResearch.project_budgets WHERE budget \u003e 800000;", + "sql_explanation": "The SQL query lists all genetic research projects with a budget greater than $800,000 by selecting the \u0027project_name\u0027 column from the \u0027project_budgets\u0027 table in the \u0027GeneticResearch\u0027 schema and filtering the results using the WHERE clause and the \u0027\u003e\u0027 operator." +}, { + "id": "4419", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average salary of bioprocess engineers working in the Bay Area.", + "sql_context": "CREATE TABLE bioprocess_engineers (name TEXT, salary FLOAT, location TEXT); INSERT INTO bioprocess_engineers (name, salary, location) VALUES (\u0027EngrA\u0027, 80000, \u0027San Francisco\u0027); INSERT INTO bioprocess_engineers (name, salary, location) VALUES (\u0027EngrB\u0027, 90000, \u0027Berkeley\u0027);", + "sql": "SELECT AVG(salary) FROM bioprocess_engineers WHERE location \u003d \u0027San Francisco\u0027;", + "sql_explanation": "The SQL query filters the bioprocess_engineers table for records with location \u0027San Francisco\u0027 and then calculates the average salary." +}, { + "id": "4462", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "Identify bioprocess engineering papers published in 2021", + "sql_context": "CREATE TABLE papers (id INT, title VARCHAR(50), year INT, authors VARCHAR(50), publication VARCHAR(50)); INSERT INTO papers (id, title, year, authors, publication) VALUES (1, \u0027Paper A\u0027, 2021, \u0027John Doe\u0027, \u0027Journal of Bioprocessing\u0027); INSERT INTO papers (id, title, year, authors, publication) VALUES (2, \u0027Paper B\u0027, 2020, \u0027Jane Smith\u0027, \u0027Journal of Genetic Engineering\u0027);", + "sql": "SELECT * FROM papers WHERE year \u003d 2021 AND domain \u003d \u0027Bioprocess Engineering\u0027;", + "sql_explanation": "This query filters the papers table for 2021 publications in the Bioprocess Engineering domain, returning all columns." +}, { + "id": "4855", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average funding received by biotech startups in Australia?", + "sql_context": "CREATE SCHEMA if not exists biotech;CREATE TABLE if not exists biotech.startups (id INT PRIMARY KEY, name VARCHAR(100), country VARCHAR(50), funding FLOAT);INSERT INTO biotech.startups (id, name, country, funding) VALUES (1, \u0027StartupA\u0027, \u0027Australia\u0027, 3000000.0), (2, \u0027StartupB\u0027, \u0027Australia\u0027, 1500000.0), (3, \u0027StartupC\u0027, \u0027Canada\u0027, 800000.0);", + "sql": "SELECT AVG(funding) FROM biotech.startups WHERE country \u003d \u0027Australia\u0027;", + "sql_explanation": "This SQL query calculates the average funding for biotech startups located in Australia by using the AVG function on the funding column, filtered by the country column equal to \u0027Australia\u0027." +}, { + "id": "4953", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names of genetic research projects with a start date in 2021?", + "sql_context": "CREATE TABLE research_projects (id INT, name VARCHAR(255), start_date DATE); INSERT INTO research_projects (id, name, start_date) VALUES (1, \u0027ProjectA\u0027, \u00272020-01-05\u0027); INSERT INTO research_projects (id, name, start_date) VALUES (2, \u0027ProjectB\u0027, \u00272019-12-31\u0027);", + "sql": "SELECT name FROM research_projects WHERE start_date \u003e\u003d \u00272021-01-01\u0027;", + "sql_explanation": "This SQL query retrieves the names of genetic research projects with a start date in 2021. It uses the WHERE clause to filter rows based on the \u0027start_date\u0027 column, only selecting rows where the start date is greater than or equal to \u00272021-01-01\u0027. Then, it uses the SELECT statement to retrieve the \u0027name\u0027 column for these rows. However, the provided context does not include any projects with a start date in 2021, so this query will not return any results." +}, { + "id": "4975", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum funding received by a biotech startup in Canada?", + "sql_context": "CREATE SCHEMA if not exists biotech;CREATE TABLE if not exists biotech.startups (id INT PRIMARY KEY, name VARCHAR(100), country VARCHAR(50), funding DECIMAL(10, 2)); INSERT INTO biotech.startups (id, name, country, funding) VALUES (1, \u0027StartupA\u0027, \u0027USA\u0027, 1500000.00), (2, \u0027StartupB\u0027, \u0027USA\u0027, 2000000.00), (3, \u0027StartupC\u0027, \u0027Canada\u0027, 1200000.00);", + "sql": "SELECT MAX(funding) FROM biotech.startups WHERE country \u003d \u0027Canada\u0027;", + "sql_explanation": "This query calculates the maximum funding received by a biotech startup located in Canada." +}, { + "id": "5107", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average funding amount for biotech startups in the US?", + "sql_context": "CREATE TABLE biotech_startups (id INT, name VARCHAR(100), location VARCHAR(100), funding FLOAT); INSERT INTO biotech_startups (id, name, location, funding) VALUES (1, \u0027Startup A\u0027, \u0027USA\u0027, 15000000); INSERT INTO biotech_startups (id, name, location, funding) VALUES (2, \u0027Startup B\u0027, \u0027USA\u0027, 22000000);", + "sql": "SELECT AVG(funding) FROM biotech_startups WHERE location \u003d \u0027USA\u0027;", + "sql_explanation": "This query calculates the average funding for biotech startups located in the USA by using the AVG function on the funding column." +}, { + "id": "5120", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total funding received by biotech startups in India?", + "sql_context": "CREATE TABLE biotech_startups (id INT, name VARCHAR(50), budget DECIMAL(10,2), region VARCHAR(50)); INSERT INTO biotech_startups (id, name, budget, region) VALUES (1, \u0027Genetix\u0027, 5000000.00, \u0027India\u0027); INSERT INTO biotech_startups (id, name, budget, region) VALUES (2, \u0027BioEngineerz\u0027, 7000000.00, \u0027USA\u0027); INSERT INTO biotech_startups (id, name, budget, region) VALUES (3, \u0027SensoraBio\u0027, 6000000.00, \u0027Germany\u0027);", + "sql": "SELECT SUM(budget) FROM biotech_startups WHERE region \u003d \u0027India\u0027;", + "sql_explanation": "We calculate the total funding received by biotech startups in India by selecting the budget column and applying the SUM() function, filtering by the region \u0027India\u0027." +}, { + "id": "5286", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the genetic research projects by disease category.", + "sql_context": "CREATE SCHEMA if not exists genetic_research;CREATE TABLE if not exists genetic_research.projects(id INT, name TEXT, lead_researcher TEXT, disease_category TEXT);INSERT INTO genetic_research.projects (id, name, lead_researcher, disease_category) VALUES (1, \u0027ProjectX\u0027, \u0027Dr. Jane Smith\u0027, \u0027Cancer\u0027), (2, \u0027ProjectY\u0027, \u0027Dr. John Doe\u0027, \u0027Neurological Disorders\u0027), (3, \u0027ProjectZ\u0027, \u0027Dr. Maria Garcia\u0027, \u0027Cancer\u0027);", + "sql": "SELECT disease_category, name FROM genetic_research.projects;", + "sql_explanation": "This query retrieves the disease_category and name columns from the genetic_research.projects table, which returns a list of genetic research projects and their respective disease categories." +}, { + "id": "5387", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average budget of biotech startups founded in 2020?", + "sql_context": "CREATE TABLE startup_funding (name VARCHAR(255), year INT, budget FLOAT); INSERT INTO startup_funding (name, year, budget) VALUES (\u0027StartupA\u0027, 2020, 5000000), (\u0027StartupB\u0027, 2020, 7000000), (\u0027StartupC\u0027, 2019, 6000000);", + "sql": "SELECT AVG(budget) FROM startup_funding WHERE year \u003d 2020;", + "sql_explanation": "This SQL query calculates the average budget for biotech startups founded in 2020 by selecting the budget column from the startup_funding table where the year is 2020, then calculating the average of the returned values." +}, { + "id": "5423", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which genetic research experiments involved CRISPR technology?", + "sql_context": "CREATE TABLE experiments (id INT, name VARCHAR(50), technology VARCHAR(50), description TEXT); INSERT INTO experiments (id, name, technology, description) VALUES (1, \u0027Experiment1\u0027, \u0027CRISPR\u0027, \u0027Genetic manipulation using CRISPR...\u0027);", + "sql": "SELECT name FROM experiments WHERE technology \u003d \u0027CRISPR\u0027;", + "sql_explanation": "This query retrieves the names of genetic research experiments that involved CRISPR technology by selecting the \u0027name\u0027 column where the \u0027technology\u0027 is \u0027CRISPR\u0027." +}, { + "id": "5476", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total funding received by biotech startups located in the UK?", + "sql_context": "CREATE TABLE startups (id INT, name VARCHAR(50), location VARCHAR(50), funding FLOAT); INSERT INTO startups (id, name, location, funding) VALUES (1, \u0027Genomic Solutions\u0027, \u0027USA\u0027, 5000000), (2, \u0027BioTech Innovations\u0027, \u0027Europe\u0027, 7000000), (3, \u0027Medical Innovations\u0027, \u0027UK\u0027, 6000000);", + "sql": "SELECT SUM(funding) FROM startups WHERE location \u003d \u0027UK\u0027;", + "sql_explanation": "This query calculates the total funding received by biotech startups located in the UK by summing the \u0027funding\u0027 column in the \u0027startups\u0027 table, filtering for rows where the \u0027location\u0027 is \u0027UK\u0027." +}, { + "id": "5642", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total funding amount for all biotech startups?", + "sql_context": "CREATE TABLE biotech_startups (id INT, name TEXT, location TEXT, funding_amount INT); INSERT INTO biotech_startups (id, name, location, funding_amount) VALUES (1, \u0027GenSolutions\u0027, \u0027California\u0027, 12000000), (2, \u0027BioInnovate\u0027, \u0027Texas\u0027, 20000000), (3, \u0027TechGen\u0027, \u0027Texas\u0027, 15000000);", + "sql": "SELECT SUM(funding_amount) FROM biotech_startups;", + "sql_explanation": "The SQL query calculates the total funding amount for all biotech startups by summing the funding_amount column using the SUM function." +}, { + "id": "1171", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which policies have been updated in the last month and are related to password complexity?", + "sql_context": "CREATE TABLE Policies (policy_id INT, policy_name VARCHAR(50), policy_date DATE, policy_category VARCHAR(50));", + "sql": "SELECT policy_id, policy_name FROM Policies WHERE policy_category \u003d \u0027password complexity\u0027 AND policy_date BETWEEN DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH) AND CURRENT_DATE;", + "sql_explanation": "This query first selects the policy_id and policy_name from Policies table where policy_category is \u0027password complexity\u0027 and policy_date is between the current date minus one month and the current date. The result is the policies that have been updated in the last month and are related to password complexity." +}, { + "id": "1377", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record of an incident type in the \u0027incident_types\u0027 table", + "sql_context": "CREATE TABLE incident_types (id INT, name VARCHAR, description TEXT);", + "sql": "INSERT INTO incident_types (id, name, description) VALUES (1, \u0027Phishing Attack\u0027, \u0027Attempt to obtain sensitive information by disguising as a trustworthy entity\u0027);", + "sql_explanation": "This query inserts a new record into the \u0027incident_types\u0027 table with the following details: id\u003d1, name\u003d\u0027Phishing Attack\u0027, description\u003d\u0027Attempt to obtain sensitive information by disguising as a trustworthy entity\u0027." +}, { + "id": "1736", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add a new record of a security incident in the \u0027security_incidents\u0027 table", + "sql_context": "CREATE TABLE security_incidents (id INT, name VARCHAR, description TEXT, resolved_date DATE);", + "sql": "INSERT INTO security_incidents (id, name, description, resolved_date) VALUES (1, \u0027New Incident\u0027, \u0027New security incident description\u0027, \u00272022-05-15\u0027);", + "sql_explanation": "This query inserts a new record into the \u0027security_incidents\u0027 table with the following details: id\u003d1, name\u003d\u0027New Incident\u0027, description\u003d\u0027New security incident description\u0027, resolved_date\u003d\u00272022-05-15\u0027." +}, { + "id": "1788", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of unique IP addresses associated with malware activity in the \u0027Asia-Pacific\u0027 region in the past week.", + "sql_context": "CREATE TABLE malware_activity_v2 (id INT, ip_address VARCHAR(15), malware_type VARCHAR(255), region VARCHAR(100), last_seen DATE); INSERT INTO malware_activity_v2 (id, ip_address, malware_type, region, last_seen) VALUES (4, \u002710.0.0.2\u0027, \u0027wannacry\u0027, \u0027Asia-Pacific\u0027, \u00272022-01-12\u0027), (5, \u002710.0.0.3\u0027, \u0027ransomware\u0027, \u0027Asia-Pacific\u0027, \u00272022-01-15\u0027), (6, \u002710.0.0.4\u0027, \u0027virut\u0027, \u0027Asia-Pacific\u0027, \u00272022-01-18\u0027);", + "sql": "SELECT COUNT(DISTINCT ip_address) FROM malware_activity_v2 WHERE region \u003d \u0027Asia-Pacific\u0027 AND last_seen \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 WEEK);", + "sql_explanation": "Finds the number of unique IP addresses associated with malware activity in the \u0027Asia-Pacific\u0027 region in the past week." +}, { + "id": "2011", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of days taken to resolve a critical vulnerability in the government sector?", + "sql_context": "CREATE TABLE vulnerability_resolution (id INT, severity VARCHAR(255), sector VARCHAR(255), resolution_date DATE, detection_date DATE); INSERT INTO vulnerability_resolution (id, severity, sector, resolution_date, detection_date) VALUES (1, \u0027critical\u0027, \u0027government\u0027, \u00272021-03-01\u0027, \u00272021-01-15\u0027);", + "sql": "SELECT MAX(DATEDIFF(resolution_date, detection_date)) FROM vulnerability_resolution WHERE severity \u003d \u0027critical\u0027 AND sector \u003d \u0027government\u0027;", + "sql_explanation": "This query finds the maximum number of days taken to resolve a critical vulnerability in the government sector. It does this by selecting all records with a severity of \u0027critical\u0027 and a sector of \u0027government\u0027, then calculating the difference between the resolution date and the detection date for each record, and finally selecting the maximum of those differences." +}, { + "id": "2401", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many unique IP addresses attempted to exploit vulnerabilities in the last week?", + "sql_context": "CREATE TABLE VulnerabilityScans(id INT, ip VARCHAR(50), scan_date DATE);", + "sql": "SELECT COUNT(DISTINCT ip) as unique_ips FROM VulnerabilityScans WHERE scan_date \u003e\u003d DATE_SUB(CURRENT_DATE(), INTERVAL 1 WEEK);", + "sql_explanation": "This query counts the number of unique IP addresses that attempted to exploit vulnerabilities in the last week by selecting distinct IP addresses and filtering for the last week." +}, { + "id": "2421", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the security incidents that occurred in the IT department in the last week?", + "sql_context": "CREATE TABLE security_incidents (id INT, department VARCHAR(255), incident_time TIMESTAMP); INSERT INTO security_incidents (id, department, incident_time) VALUES (1, \u0027HR\u0027, \u00272022-01-17 15:45:00\u0027), (2, \u0027IT\u0027, \u00272022-01-25 11:00:00\u0027), (3, \u0027HR\u0027, \u00272022-01-04 08:30:00\u0027);", + "sql": "SELECT * FROM security_incidents WHERE department \u003d \u0027IT\u0027 AND incident_time \u003e\u003d DATE_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 WEEK);", + "sql_explanation": "This query selects all the security incidents from the security incidents table that occurred in the IT department in the last week." +}, { + "id": "2694", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average time to resolution for high severity incidents in the energy sector?", + "sql_context": "CREATE TABLE incidents (incident_id INT, incident_severity VARCHAR(255), incident_sector VARCHAR(255), incident_resolution_time INT);", + "sql": "SELECT AVG(incident_resolution_time) FROM incidents WHERE incident_severity \u003d \u0027High\u0027 AND incident_sector \u003d \u0027Energy\u0027;", + "sql_explanation": "This query creates a table called \u0027incidents\u0027 that contains information about each security incident, including the severity level, the sector it belongs to, and the time it took to resolve the incident. The SQL query then filters the results to only include high severity incidents in the energy sector and calculates the average time to resolution using the \u0027AVG\u0027 function on the \u0027incident_resolution_time\u0027 column. This allows us to see the average time to resolution for high severity incidents in the energy sector." +}, { + "id": "2855", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many security incidents were there in Q2 2022 that originated from India?", + "sql_context": "CREATE TABLE SecurityIncidents (id INT, incident_name VARCHAR(255), country VARCHAR(255), date DATE); INSERT INTO SecurityIncidents (id, incident_name, country, date) VALUES (2, \u0027Ransomware Attack\u0027, \u0027India\u0027, \u00272022-04-15\u0027);", + "sql": "SELECT COUNT(*) FROM SecurityIncidents WHERE country \u003d \u0027India\u0027 AND date \u003e\u003d \u00272022-04-01\u0027 AND date \u003c \u00272022-07-01\u0027;", + "sql_explanation": "The SQL query counts the number of security incidents that originated from India in Q2 2022. It does this by filtering the SecurityIncidents table for records where the country is \u0027India\u0027 and the date is within Q2 2022. Then, it counts the number of records that meet these criteria using the COUNT function." +}, { + "id": "3196", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the policy names and their corresponding policy owners for policies that have not been reviewed in the past 6 months, based on the PolicyReview table.", + "sql_context": "CREATE TABLE PolicyReview (policy_id INT, policy_name VARCHAR(50), policy_owner VARCHAR(50), last_reviewed DATETIME);", + "sql": "SELECT policy_name, policy_owner FROM PolicyReview WHERE last_reviewed \u003c DATEADD(month, -6, GETDATE());", + "sql_explanation": "The SQL query lists the policy names and their corresponding policy owners for policies that have not been reviewed in the past 6 months by filtering the results based on the last_reviewed column." +}, { + "id": "3276", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the security incidents involving ransomware attacks in the healthcare sector in 2020 and 2021.", + "sql_context": "CREATE TABLE incidents (incident_id INT, type TEXT, sector TEXT, year INT); INSERT INTO incidents (incident_id, type, sector, year) VALUES (1, \u0027Ransomware\u0027, \u0027Healthcare\u0027, 2020), (2, \u0027Ransomware\u0027, \u0027Healthcare\u0027, 2021), (3, \u0027Phishing\u0027, \u0027Healthcare\u0027, 2020), (4, \u0027Phishing\u0027, \u0027Healthcare\u0027, 2021), (5, \u0027Ransomware\u0027, \u0027Healthcare\u0027, 2021);", + "sql": "SELECT * FROM incidents WHERE sector \u003d \u0027Healthcare\u0027 AND type \u003d \u0027Ransomware\u0027 AND year IN (2020, 2021);", + "sql_explanation": "The SQL query filters incidents by sector, type, and year. It retrieves all records that match the specified criteria." +}, { + "id": "3306", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of high severity vulnerabilities in the \u0027Vulnerabilities\u0027 table?", + "sql_context": "CREATE TABLE Vulnerabilities (id INT, vulnerability_name VARCHAR(50), severity VARCHAR(10), affected_devices INT); INSERT INTO Vulnerabilities (id, vulnerability_name, severity, affected_devices) VALUES (1, \u0027Vulnerability1\u0027, \u0027High\u0027, 2), (2, \u0027Vulnerability2\u0027, \u0027Medium\u0027, 3), (3, \u0027Vulnerability3\u0027, \u0027Low\u0027, 1), (4, \u0027Vulnerability1\u0027, \u0027High\u0027, 1), (5, \u0027Vulnerability4\u0027, \u0027Low\u0027, 1);", + "sql": "SELECT COUNT(*) as total_high_severity_vulnerabilities FROM Vulnerabilities WHERE severity \u003d \u0027High\u0027;", + "sql_explanation": "This query retrieves the total number of high severity vulnerabilities in the Vulnerabilities table by using the COUNT() function on the Vulnerabilities table and filtering the results by severity where it is equal to \u0027High\u0027." +}, { + "id": "3448", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 5 threat actors, based on the number of security incidents they are responsible for, in the last 6 months?", + "sql_context": "CREATE TABLE security_incidents (id INT, threat_actor VARCHAR(255), timestamp TIMESTAMP);CREATE VIEW threat_actor_count AS SELECT threat_actor, COUNT(*) as incident_count FROM security_incidents WHERE timestamp \u003e\u003d NOW() - INTERVAL \u00276 months\u0027 GROUP BY threat_actor;", + "sql": "SELECT threat_actor, incident_count FROM threat_actor_count ORDER BY incident_count DESC LIMIT 5;", + "sql_explanation": "The SQL query lists the top 5 threat actors based on the number of security incidents they are responsible for in the last 6 months. It retrieves data from the threat_actor_count view, which groups threat actors by incident count and filters results to only include incidents from the last 6 months. The query orders the results in descending order based on the number of incidents per threat actor and limits the results to the top 5." +}, { + "id": "3479", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all cybersecurity policies that have not been reviewed in the past year", + "sql_context": "CREATE TABLE cybersecurity_policies (id INT, policy_name VARCHAR(50), review_date DATE);", + "sql": "SELECT policy_name FROM cybersecurity_policies WHERE review_date \u003c DATEADD(year, -1, GETDATE());", + "sql_explanation": "This query lists all cybersecurity policies that have not been reviewed in the past year by selecting policy names with a review date earlier than one year ago." +}, { + "id": "3540", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum number of days it took to remediate vulnerabilities in the \u0027HR\u0027 department?", + "sql_context": "CREATE TABLE hr_dept_vulnerabilities (id INT, incident_date DATE, department VARCHAR(255), days_to_remediate INT); INSERT INTO hr_dept_vulnerabilities (id, incident_date, department, days_to_remediate) VALUES (1, \u00272022-01-01\u0027, \u0027HR\u0027, 3), (2, \u00272022-02-01\u0027, \u0027HR\u0027, 7), (3, \u00272022-03-01\u0027, \u0027HR\u0027, 5);", + "sql": "SELECT department, MIN(days_to_remediate) FROM hr_dept_vulnerabilities WHERE department \u003d \u0027HR\u0027;", + "sql_explanation": "Find the minimum number of days it took to remediate vulnerabilities in the \u0027HR\u0027 department by filtering records with the \u0027HR\u0027 department value and calculating the minimum value of the days_to_remediate column." +}, { + "id": "4244", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update security policies\u0027 category", + "sql_context": "CREATE TABLE security_policies (id INT, policy_id VARCHAR(255), policy_name VARCHAR(255), category VARCHAR(255), last_updated DATETIME); INSERT INTO security_policies (id, policy_id, policy_name, category, last_updated) VALUES (1, \u0027POL-002\u0027, \u0027Incident Response\u0027, \u0027Detection\u0027, \u00272021-07-01 11:00:00\u0027);", + "sql": "UPDATE security_policies SET category \u003d \u0027Monitoring\u0027 WHERE policy_id \u003d \u0027POL-002\u0027;", + "sql_explanation": "This query updates the category of the policy with policy_id \u0027POL-002\u0027 in the security_policies table to \u0027Monitoring\u0027 from the previous value of \u0027Detection\u0027." +}, { + "id": "4432", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of times the retail sector has been targeted by phishing attacks.", + "sql_context": "CREATE TABLE incidents (id INT, sector VARCHAR(20), type VARCHAR(50)); INSERT INTO incidents (id, sector, type) VALUES (1, \u0027Retail\u0027, \u0027Phishing\u0027), (2, \u0027Healthcare\u0027, \u0027Phishing\u0027), (3, \u0027Financial\u0027, \u0027Ransomware\u0027);", + "sql": "SELECT COUNT(*) FROM incidents WHERE sector \u003d \u0027Retail\u0027 AND type \u003d \u0027Phishing\u0027;", + "sql_explanation": "1. Select all records from the incidents table. 2. Filter the records where the sector is \u0027Retail\u0027 and type is \u0027Phishing\u0027. 3. Count the number of remaining records, which gives the number of times the retail sector has been targeted by phishing attacks." +}, { + "id": "4770", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average severity of vulnerabilities in the finance department?", + "sql_context": "CREATE TABLE vulnerabilities (id INT, department VARCHAR(255), severity FLOAT); INSERT INTO vulnerabilities (id, department, severity) VALUES (1, \u0027finance\u0027, 7.5), (2, \u0027marketing\u0027, 5.0), (3, \u0027finance\u0027, 8.0);", + "sql": "SELECT AVG(severity) FROM vulnerabilities WHERE department \u003d \u0027finance\u0027;", + "sql_explanation": "The SQL query calculates the average severity of vulnerabilities in the finance department by using the AVG function on the severity column, filtering the records with a WHERE clause for the finance department." +}, { + "id": "4868", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of vulnerabilities with a severity of \u0027High\u0027?", + "sql_context": "CREATE TABLE schema1.vulnerabilities (id INT, name VARCHAR(255), severity VARCHAR(50), description TEXT, date_discovered DATE, last_observed DATE); INSERT INTO schema1.vulnerabilities (id, name, severity, description, date_discovered, last_observed) VALUES (1, \u0027SQL Injection\u0027, \u0027Critical\u0027, \u0027Allows unauthorized access\u0027, \u00272021-01-01\u0027, \u00272021-02-01\u0027);", + "sql": "SELECT COUNT(*) FROM schema1.vulnerabilities WHERE severity \u003d \u0027High\u0027;", + "sql_explanation": "This query uses the WHERE clause to filter records based on the \u0027severity\u0027 column, then counts all records with a severity of \u0027High\u0027 using the COUNT(*) function." +}, { + "id": "5037", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all threat intelligence data related to the United States.", + "sql_context": "CREATE TABLE threat_intelligence (id INT, source VARCHAR(20), description TEXT, country VARCHAR(20)); INSERT INTO threat_intelligence (id, source, description, country) VALUES (1, \u0027NSA\u0027, \u0027Zero-day exploit\u0027, \u0027United States\u0027);", + "sql": "SELECT * FROM threat_intelligence WHERE country \u003d \u0027United States\u0027;", + "sql_explanation": "The SQL query is selecting all columns from the threat_intelligence table where the country is \u0027United States\u0027." +}, { + "id": "5085", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of threat intelligence data in the energy sector?", + "sql_context": "CREATE TABLE threat_intelligence (id INT, sector VARCHAR(20), data VARCHAR(50)); INSERT INTO threat_intelligence (id, sector, data) VALUES (1, \u0027Energy\u0027, \u0027IP Address\u0027);", + "sql": "SELECT COUNT(*) FROM threat_intelligence WHERE sector \u003d \u0027Energy\u0027;", + "sql_explanation": "This query counts the number of records in the \u0027threat_intelligence\u0027 table where the \u0027sector\u0027 column is equal to \u0027Energy\u0027." +}, { + "id": "5207", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all the unique threat types in the healthcare sector.", + "sql_context": "CREATE TABLE threats (id INT, sector VARCHAR(20), type VARCHAR(50)); INSERT INTO threats (id, sector, type) VALUES (1, \u0027Healthcare\u0027, \u0027Phishing\u0027), (2, \u0027Healthcare\u0027, \u0027Malware\u0027), (3, \u0027Financial\u0027, \u0027Ransomware\u0027);", + "sql": "SELECT DISTINCT type FROM threats WHERE sector \u003d \u0027Healthcare\u0027;", + "sql_explanation": "1. Select all distinct type values from the threats table. 2. Filter the records where the sector is \u0027Healthcare\u0027. 3. Display the unique threat types in the healthcare sector." +}, { + "id": "5571", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Remove the \u0027Data Breach\u0027 record from the \u0027incident_types\u0027 table", + "sql_context": "CREATE TABLE incident_types (id INT, name VARCHAR, description TEXT); INSERT INTO incident_types (id, name, description) VALUES (1, \u0027Data Breach\u0027, \u0027Unauthorized access to data\u0027);", + "sql": "DELETE FROM incident_types WHERE name\u003d\u0027Data Breach\u0027;", + "sql_explanation": "This query deletes the record with the name \u0027Data Breach\u0027 from the \u0027incident_types\u0027 table." +}, { + "id": "5703", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the unique regions represented in the IncidentResponse table?", + "sql_context": "CREATE TABLE IncidentResponse (region VARCHAR(50), incidentDate DATE); INSERT INTO IncidentResponse (region, incidentDate) VALUES (\u0027EMEA\u0027, \u00272022-01-05\u0027), (\u0027APAC\u0027, \u00272022-01-12\u0027), (\u0027AMER\u0027, \u00272022-01-20\u0027);", + "sql": "SELECT DISTINCT region FROM IncidentResponse;", + "sql_explanation": "This SQL query selects the unique values in the region column from the IncidentResponse table." +}, { + "id": "5751", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete the vulnerability with id 3 from the vulnerabilities table", + "sql_context": "CREATE TABLE vulnerabilities (id INT, name VARCHAR); INSERT INTO vulnerabilities (id, name) VALUES (1, \u0027SQL Injection\u0027), (2, \u0027Cross-site Scripting\u0027), (3, \u0027Broken Authentication\u0027), (4, \u0027Remote Code Execution\u0027);", + "sql": "DELETE FROM vulnerabilities WHERE id \u003d 3;", + "sql_explanation": "The SQL query deletes the record with an id of 3 from the vulnerabilities table." +}, { + "id": "653", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert the following suppliers into the suppliers table: (1, \u0027Acme Quinoa Inc.\u0027, \u0027Colorado, USA\u0027, 4.2), (2, \u0027Boulder Salt Co.\u0027, \u0027Utah, USA\u0027, 4.5), (3, \u0027Green Olive Oil Ltd.\u0027, \u0027Italy\u0027, 4.8)", + "sql_context": "CREATE TABLE suppliers (id INT PRIMARY KEY, name TEXT, location TEXT, sustainability_rating REAL);", + "sql": "INSERT INTO suppliers (id, name, location, sustainability_rating) VALUES (1, \u0027Acme Quinoa Inc.\u0027, \u0027Colorado, USA\u0027, 4.2), (2, \u0027Boulder Salt Co.\u0027, \u0027Utah, USA\u0027, 4.5), (3, \u0027Green Olive Oil Ltd.\u0027, \u0027Italy\u0027, 4.8);", + "sql_explanation": "The SQL query inserts 3 new records into the suppliers table with specific values. This represents adding suppliers to the database with their corresponding details." +}, { + "id": "1275", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all suppliers from India with more than 3 delivery incidents in the last 6 months.", + "sql_context": "CREATE TABLE FoodSuppliers (supplier_id INTEGER, supplier_name TEXT, country TEXT, delivery_incidents INTEGER, last_delivery_date DATETIME); INSERT INTO FoodSuppliers (supplier_id, supplier_name, country, delivery_incidents, last_delivery_date) VALUES (1, \u0027Supplier A\u0027, \u0027India\u0027, 4, \u00272022-01-15 12:00:00\u0027);", + "sql": "SELECT supplier_name, country FROM FoodSuppliers WHERE country \u003d \u0027India\u0027 AND delivery_incidents \u003e 3 AND last_delivery_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 6 MONTH);", + "sql_explanation": "This SQL query lists all suppliers from India with more than 3 delivery incidents in the last 6 months by selecting the supplier name and country from the FoodSuppliers table where the country is \u0027India\u0027, the delivery incidents are more than 3, and the last delivery date is within the last 6 months using the DATE_SUB function." +}, { + "id": "1957", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total weight of organic fruits imported from Brazil to the USA?", + "sql_context": "CREATE TABLE FruitImport(id INT, name TEXT, weight FLOAT, is_organic BOOLEAN, export_country TEXT, import_country TEXT); INSERT INTO FruitImport(id, name, weight, is_organic, export_country, import_country) VALUES (1, \u0027Pineapple\u0027, 500.5, TRUE, \u0027Brazil\u0027, \u0027USA\u0027), (2, \u0027Orange\u0027, 350.2, FALSE, \u0027Brazil\u0027, \u0027USA\u0027);", + "sql": "SELECT SUM(weight) FROM FruitImport WHERE name \u003d \u0027Pineapple\u0027 AND is_organic \u003d TRUE AND export_country \u003d \u0027Brazil\u0027 AND import_country \u003d \u0027USA\u0027;", + "sql_explanation": "This SQL query calculates the total weight of organic fruits imported from Brazil to the USA by using the SUM function on the weight column. It filters the data for rows where the name is \u0027Pineapple\u0027, is_organic is TRUE, export_country is \u0027Brazil\u0027, and import_country is \u0027USA\u0027." +}, { + "id": "2321", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show me the total quantity of organic fruits and vegetables imported from India in 2018.", + "sql_context": "CREATE TABLE OrganicFruitsVeggies (id INT, category VARCHAR(50), country VARCHAR(50), year INT, quantity INT); INSERT INTO OrganicFruitsVeggies (id, category, country, year, quantity) VALUES (1, \u0027Fruits\u0027, \u0027India\u0027, 2017, 500), (2, \u0027Fruits\u0027, \u0027India\u0027, 2018, 700), (3, \u0027Vegetables\u0027, \u0027India\u0027, 2017, 800), (4, \u0027Vegetables\u0027, \u0027India\u0027, 2018, 1000);", + "sql": "SELECT SUM(quantity) FROM OrganicFruitsVeggies WHERE category IN (\u0027Fruits\u0027, \u0027Vegetables\u0027) AND country \u003d \u0027India\u0027 AND year \u003d 2018;", + "sql_explanation": "This SQL query calculates the sum of the \u0027quantity\u0027 column in the \u0027OrganicFruitsVeggies\u0027 table for records where the \u0027category\u0027 column is either \u0027Fruits\u0027 or \u0027Vegetables\u0027, the \u0027country\u0027 column is \u0027India\u0027, and the \u0027year\u0027 column is 2018." +}, { + "id": "2525", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add a new record to the suppliers table", + "sql_context": "CREATE TABLE suppliers (id INT PRIMARY KEY, name VARCHAR(100), address VARCHAR(255), city VARCHAR(100), country VARCHAR(100));", + "sql": "INSERT INTO suppliers (id, name, address, city, country) VALUES (1, \u0027Green Garden\u0027, \u0027123 Main St\u0027, \u0027Springfield\u0027, \u0027USA\u0027);", + "sql_explanation": "This query inserts a new record into the suppliers table by using the INSERT INTO statement, specifying the table name and column names, and providing the values for each column in the VALUES clause." +}, { + "id": "3163", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum caloric content of dishes in the \u0027Japanese\u0027 and \u0027Korean\u0027 cuisine types?", + "sql_context": "CREATE TABLE CuisineTypes (CuisineTypeID INT, CuisineType VARCHAR(50));CREATE TABLE Dishes (DishID INT, DishName VARCHAR(50), CuisineTypeID INT, CaloricContent INT); INSERT INTO CuisineTypes VALUES (1, \u0027Italian\u0027), (2, \u0027Chinese\u0027), (3, \u0027Japanese\u0027), (4, \u0027Korean\u0027); INSERT INTO Dishes VALUES (1, \u0027Pizza Margherita\u0027, 1, 500), (2, \u0027Spaghetti Bolognese\u0027, 1, 700), (3, \u0027Ramen\u0027, 3, 600), (4, \u0027Bibimbap\u0027, 4, 400), (5, \u0027Sushi\u0027, 3, 300);", + "sql": "SELECT MIN(CaloricContent) as MinCaloricContent FROM Dishes WHERE CuisineType IN (\u0027Japanese\u0027, \u0027Korean\u0027);", + "sql_explanation": "This query calculates the minimum caloric content for dishes in the Japanese and Korean cuisine types by filtering the Dishes table for those cuisine types and then calculating the minimum caloric content." +}, { + "id": "3193", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total weight of fish imported from Japan with sustainable fishing certifications?", + "sql_context": "CREATE TABLE FishImport(id INT, name TEXT, weight FLOAT, is_sustainable BOOLEAN, country TEXT); INSERT INTO FishImport(id, name, weight, is_sustainable, country) VALUES (1, \u0027Tuna\u0027, 250.5, TRUE, \u0027Japan\u0027), (2, \u0027Salmon\u0027, 320.3, FALSE, \u0027Norway\u0027);", + "sql": "SELECT SUM(weight) FROM FishImport WHERE name \u003d \u0027Tuna\u0027 AND is_sustainable \u003d TRUE AND country \u003d \u0027Japan\u0027;", + "sql_explanation": "This SQL query calculates the total weight of fish imported from Japan with sustainable fishing certifications by using the SUM function on the weight column. It filters the data for rows where the name is \u0027Tuna\u0027, is_sustainable is TRUE, and country is \u0027Japan\u0027." +}, { + "id": "3220", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total weight of all milk sold last month?", + "sql_context": "CREATE TABLE sales (id INT, product TEXT, quantity INT, date DATE); INSERT INTO sales (id, product, quantity, date) VALUES (1, \u0027eggs\u0027, 12, \u00272021-01-01\u0027), (2, \u0027milk\u0027, 24, \u00272021-01-05\u0027), (3, \u0027eggs\u0027, 18, \u00272021-01-07\u0027), (4, \u0027flour\u0027, 36, \u00272021-01-10\u0027);", + "sql": "SELECT SUM(quantity) FROM sales WHERE product \u003d \u0027milk\u0027 AND date BETWEEN \u00272021-01-01\u0027 AND \u00272021-01-31\u0027;", + "sql_explanation": "This query calculates the total (SUM) quantity of the \u0027milk\u0027 product sold in January 2021 using SUM and a WHERE clause with BETWEEN for date range." +}, { + "id": "3777", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the calorie count of the dish \u0027PÃŖo de Queijo\u0027 in Brazil to 350.", + "sql_context": "CREATE TABLE dishes (id INT, name TEXT, cuisine TEXT, calorie_count INT, country TEXT); INSERT INTO dishes (id, name, cuisine, calorie_count, country) VALUES (1, \u0027PÃŖo de Queijo\u0027, \u0027Brazilian\u0027, 300, \u0027Brazil\u0027); INSERT INTO dishes (id, name, cuisine, calorie_count, country) VALUES (2, \u0027Feijoada\u0027, \u0027Brazilian\u0027, 800, \u0027Brazil\u0027);", + "sql": "UPDATE dishes SET calorie_count \u003d 350 WHERE name \u003d \u0027PÃŖo de Queijo\u0027 AND country \u003d \u0027Brazil\u0027;", + "sql_explanation": "This query updates the calorie count of the dish \u0027PÃŖo de Queijo\u0027 in Brazil by using the UPDATE statement and specifying the conditions with the WHERE clause." +}, { + "id": "4145", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum sodium content in vegan appetizers?", + "sql_context": "CREATE TABLE MenuItems (id INT, is_vegan BOOLEAN, category VARCHAR(20), sodium INT); INSERT INTO MenuItems (id, is_vegan, category, sodium) VALUES (1, true, \u0027appetizer\u0027, 200), (2, false, \u0027appetizer\u0027, 400), (3, true, \u0027entree\u0027, 300);", + "sql": "SELECT MAX(sodium) FROM MenuItems WHERE is_vegan \u003d true AND category \u003d \u0027appetizer\u0027;", + "sql_explanation": "The SQL query finds the maximum sodium content in vegan appetizers by using the MAX function on the sodium column, filtered by the true value in the is_vegan column and the appetizer value in the category column." +}, { + "id": "4164", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total weight of \u0027apples\u0027 in the \u0027inventory\u0027 table?", + "sql_context": "CREATE TABLE inventory (product VARCHAR(255), weight FLOAT); INSERT INTO inventory (product, weight) VALUES (\u0027Apples\u0027, 500.0), (\u0027Bananas\u0027, 300.0), (\u0027Apples\u0027, 700.0);", + "sql": "SELECT SUM(weight) as total_apples_weight FROM inventory WHERE product \u003d \u0027Apples\u0027;", + "sql_explanation": "The SQL query calculates the total weight of apples in the inventory table using the SUM function and a filter condition on the product column." +}, { + "id": "4275", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of organic and vegan items in the inventory?", + "sql_context": "CREATE TABLE Inventory(item_id INT, item_name VARCHAR(50), is_organic BOOLEAN, is_vegan BOOLEAN, quantity INT); INSERT INTO Inventory VALUES(1,\u0027Apples\u0027,TRUE,TRUE,100),(2,\u0027Bananas\u0027,TRUE,TRUE,200),(3,\u0027Carrots\u0027,TRUE,FALSE,150);", + "sql": "SELECT SUM(quantity) FROM Inventory WHERE is_organic \u003d TRUE AND is_vegan \u003d TRUE;", + "sql_explanation": "Filter records where both is_organic and is_vegan are TRUE, then calculate the total quantity." +}, { + "id": "4299", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete expired shipments from the supply_chain table.", + "sql_context": "CREATE TABLE supply_chain (id INTEGER, product_id VARCHAR(10), shipped_date DATE, expiration_date DATE);", + "sql": "DELETE FROM supply_chain WHERE shipped_date + INTERVAL \u002730 days\u0027 \u003c CURRENT_DATE;", + "sql_explanation": "This query deletes records from the supply_chain table where the shipped date is more than 30 days before the current date." +}, { + "id": "4343", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many nutrition facts are missing in the nutrition_facts table?", + "sql_context": "CREATE TABLE nutrition_facts (fact_id INT, meal_id INT, calories INT, protein INT, vitamins VARCHAR(50)); INSERT INTO nutrition_facts (fact_id, meal_id, calories, protein, vitamins) VALUES (1, 1, NULL, 15, \u0027A, C, D\u0027), (2, 2, 220, NULL, \u0027B12, E\u0027), (3, 3, 400, 12, \u0027B6, K\u0027), (4, 5, 300, 20, NULL);", + "sql": "SELECT COUNT(*) FROM nutrition_facts WHERE calories IS NULL OR protein IS NULL;", + "sql_explanation": "* The query counts the number of rows where calories or protein is NULL * This is done using the COUNT function with * as the argument * The IS NULL condition filters the data" +}, { + "id": "4508", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of genetically modified crops grown in India?", + "sql_context": "CREATE TABLE crops (id INT, type VARCHAR(20), country VARCHAR(20), quantity INT); INSERT INTO crops (id, type, country, quantity) VALUES (1, \u0027cotton\u0027, \u0027India\u0027, 12000000), (2, \u0027soybean\u0027, \u0027Brazil\u0027, 8000000);", + "sql": "SELECT SUM(quantity) FROM crops WHERE type \u003d \u0027cotton\u0027 AND country \u003d \u0027India\u0027;", + "sql_explanation": "Calculate the total quantity of genetically modified cotton crops grown in India by summing the quantity column, filtering for records where the type is cotton and the country is India." +}, { + "id": "4552", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total weight of all \u0027Certified Organic\u0027 products in \u0027NatureMarket\u0027?", + "sql_context": "CREATE TABLE NatureMarket (product_id INT, product_name VARCHAR(50), weight FLOAT, eco_label VARCHAR(50)); INSERT INTO NatureMarket (product_id, product_name, weight, eco_label) VALUES (1, \u0027Apples\u0027, 2.5, \u0027Certified Organic\u0027), (2, \u0027Bananas\u0027, 3.0, \u0027Fair Trade\u0027), (3, \u0027Carrots\u0027, 1.5, \u0027Certified Organic\u0027), (4, \u0027Dates\u0027, 1.0, \u0027Fair Trade\u0027);", + "sql": "SELECT SUM(weight) FROM NatureMarket WHERE eco_label \u003d \u0027Certified Organic\u0027;", + "sql_explanation": "This query calculates the total weight of all \u0027Certified Organic\u0027 products in \u0027NatureMarket\u0027 by summing the weight column for rows with the eco_label \u0027Certified Organic\u0027." +}, { + "id": "4615", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of GMO-free cereals produced in the USA in 2019.", + "sql_context": "CREATE TABLE GMOFreeCereals (id INT, country VARCHAR(50), year INT, quantity INT); INSERT INTO GMOFreeCereals (id, country, year, quantity) VALUES (1, \u0027USA\u0027, 2018, 300), (2, \u0027USA\u0027, 2019, 400), (3, \u0027Canada\u0027, 2018, 250), (4, \u0027Canada\u0027, 2019, 275);", + "sql": "SELECT COUNT(*) FROM GMOFreeCereals WHERE country \u003d \u0027USA\u0027 AND year \u003d 2019;", + "sql_explanation": "This SQL query counts the number of records in the \u0027GMOFreeCereals\u0027 table that meet the specified conditions: the \u0027country\u0027 column is \u0027USA\u0027 and the \u0027year\u0027 column is 2019." +}, { + "id": "4619", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the total revenue for \u0027Gluten-Free\u0027 products in the \u0027Sales\u0027 table", + "sql_context": "CREATE TABLE Sales (id INT PRIMARY KEY, product VARCHAR(255), price DECIMAL(5,2), quantity INT); INSERT INTO Sales (id, product, price, quantity) VALUES (1, \u0027Bread\u0027, 2.50, 10), (2, \u0027Gluten-Free Bread\u0027, 3.50, 15), (3, \u0027Pasta\u0027, 1.99, 20);", + "sql": "SELECT SUM(price * quantity) FROM Sales WHERE product LIKE \u0027Gluten-Free%\u0027;", + "sql_explanation": "This query shows the total revenue for \u0027Gluten-Free\u0027 products in the \u0027Sales\u0027 table by filtering the table for \u0027Gluten-Free\u0027 products (WHERE product LIKE \u0027Gluten-Free%\u0027) and then calculating the sum of the revenue (SUM(price * quantity))." +}, { + "id": "4749", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all organic produce with a price per unit less than $1.50.", + "sql_context": "CREATE TABLE produce (id INT, name VARCHAR(255), organic BOOLEAN, price_per_unit DECIMAL(5,2)); INSERT INTO produce (id, name, organic, price_per_unit) VALUES (1, \u0027Apples\u0027, TRUE, 1.25), (2, \u0027Bananas\u0027, TRUE, 0.99), (3, \u0027Carrots\u0027, FALSE, 1.75);", + "sql": "SELECT name FROM produce WHERE organic \u003d TRUE AND price_per_unit \u003c 1.50;", + "sql_explanation": "This query lists all organic produce with a price per unit less than $1.50 by filtering for rows with the organic column set to TRUE and the price_per_unit column less than 1.50." +}, { + "id": "5175", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average rating of restaurants in \u0027California\u0027", + "sql_context": "CREATE TABLE restaurants (restaurant_id INT PRIMARY KEY, name VARCHAR(255), rating INT);", + "sql": "SELECT AVG(rating) FROM restaurants WHERE state \u003d \u0027California\u0027;", + "sql_explanation": "This command calculates the average rating of restaurants located in \u0027California\u0027 by filtering the \u0027restaurants\u0027 table with the state column set to \u0027California\u0027 and using the AVG function." +}, { + "id": "5300", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the maximum quantity of \u0027Quinoa\u0027 in the \u0027Warehouse\u0027 table", + "sql_context": "CREATE TABLE Warehouse (id INT PRIMARY KEY, product VARCHAR(255), quantity INT); INSERT INTO Warehouse (id, product, quantity) VALUES (1, \u0027Quinoa\u0027, 100), (2, \u0027Rice\u0027, 75), (3, \u0027Quinoa\u0027, 125);", + "sql": "SELECT MAX(quantity) FROM Warehouse WHERE product \u003d \u0027Quinoa\u0027;", + "sql_explanation": "This query finds the maximum quantity of \u0027Quinoa\u0027 in the \u0027Warehouse\u0027 table. It does this by filtering the table for \u0027Quinoa\u0027 (WHERE product \u003d \u0027Quinoa\u0027) and then calculating the maximum quantity of those products (MAX(quantity))." +}, { + "id": "5379", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average calorie count for vegetarian dishes in our restaurant chain?", + "sql_context": "CREATE TABLE dishes (id INT, name TEXT, type TEXT, calories INT); INSERT INTO dishes (id, name, type, calories) VALUES (1, \u0027Quinoa Salad\u0027, \u0027vegetarian\u0027, 350), (2, \u0027Pizza Margherita\u0027, \u0027non_vegetarian\u0027, 800);", + "sql": "SELECT AVG(calories) FROM dishes WHERE type \u003d \u0027vegetarian\u0027;", + "sql_explanation": "This SQL query calculates the average calorie count for vegetarian dishes. It does this by using the AVG function on the calories column, filtering the data where the type is vegetarian." +}, { + "id": "5381", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total calorie count for dishes that contain both meat and dairy products?", + "sql_context": "CREATE TABLE Dishes (DishID INT, DishName VARCHAR(50), Type VARCHAR(20), Calories INT); INSERT INTO Dishes (DishID, DishName, Type, Calories) VALUES (1, \u0027Beef Lasagna\u0027, \u0027Meat-dairy\u0027, 800), (2, \u0027Cheese Pizza\u0027, \u0027Dairy\u0027, 600), (3, \u0027Chicken Caesar Salad\u0027, \u0027Meat-dairy\u0027, 500), (4, \u0027Veggie Pizza\u0027, \u0027Dairy\u0027, 700);", + "sql": "SELECT SUM(Calories) FROM Dishes WHERE Type \u003d \u0027Meat-dairy\u0027;", + "sql_explanation": "The SQL query calculates the total calorie count for dishes that contain both meat and dairy products. It uses the WHERE clause to filter the Dishes table for rows where the Type column is \u0027Meat-dairy\u0027, then calculates the sum of the Calories column for the filtered rows." +}, { + "id": "5443", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all suppliers from \u0027California\u0027 in the \u0027Suppliers\u0027 table", + "sql_context": "CREATE TABLE Suppliers (id INT PRIMARY KEY, name VARCHAR(255), location VARCHAR(255)); INSERT INTO Suppliers (id, name, location) VALUES (1, \u0027Supplier A\u0027, \u0027California\u0027), (2, \u0027Supplier B\u0027, \u0027New York\u0027), (3, \u0027Supplier C\u0027, \u0027Texas\u0027);", + "sql": "SELECT name FROM Suppliers WHERE location \u003d \u0027California\u0027;", + "sql_explanation": "This query lists all suppliers from \u0027California\u0027 in the \u0027Suppliers\u0027 table. It does this by filtering the table for suppliers from \u0027California\u0027 (WHERE location \u003d \u0027California\u0027) and then returning the list of names (SELECT name)." +}, { + "id": "5460", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average weight of local fruits in the inventory.", + "sql_context": "CREATE TABLE Inventory (fruit text, weight integer, is_local boolean); INSERT INTO Inventory (fruit, weight, is_local) VALUES (\u0027Bananas\u0027, 150, false), (\u0027Apples\u0027, 200, true), (\u0027Oranges\u0027, 120, false), (\u0027Strawberries\u0027, 80, true);", + "sql": "SELECT AVG(weight) FROM Inventory WHERE is_local \u003d true;", + "sql_explanation": "This query calculates the average weight of local fruits in the inventory by filtering the Inventory table for local fruits and calculating the average of the weight column." +}, { + "id": "5497", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete the supplier from \u0027USA\u0027 with id 4", + "sql_context": "CREATE TABLE suppliers (id INT PRIMARY KEY, name VARCHAR(100), address VARCHAR(255), city VARCHAR(100), country VARCHAR(100));", + "sql": "DELETE FROM suppliers WHERE id \u003d 4 AND country \u003d \u0027USA\u0027;", + "sql_explanation": "This query removes the supplier with id 4 from \u0027USA\u0027 in the suppliers table by using the DELETE statement and specifying the condition in the WHERE clause." +}, { + "id": "5504", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average price (in USD) of organic products?", + "sql_context": "CREATE TABLE Products (ProductID INT, ProductName VARCHAR(50), IsOrganic BOOLEAN, Price INT); INSERT INTO Products (ProductID, ProductName, IsOrganic, Price) VALUES (1, \u0027Apple\u0027, true, 100), (2, \u0027Carrot\u0027, false, 80), (3, \u0027Banana\u0027, true, 120), (4, \u0027Potato\u0027, false, 90);", + "sql": "SELECT AVG(Price) FROM Products WHERE IsOrganic \u003d true;", + "sql_explanation": "This query calculates the average price of organic products. It does this by filtering the Products table for organic products and then calculating the average price using the AVG function." +}, { + "id": "5585", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average price of vegan dishes", + "sql_context": "CREATE TABLE dishes (id INT, name VARCHAR(50), is_vegan BOOLEAN, price INT); INSERT INTO dishes (id, name, is_vegan, price) VALUES (1, \u0027Veggie Burger\u0027, TRUE, 7), (2, \u0027Steak\u0027, FALSE, 20), (3, \u0027Tofu Stir Fry\u0027, TRUE, 12);", + "sql": "SELECT AVG(price) FROM dishes WHERE is_vegan \u003d TRUE;", + "sql_explanation": "We calculate the average price of vegan dishes by selecting the average value of the \u0027price\u0027 column, filtering rows where the \u0027is_vegan\u0027 column equals TRUE." +}, { + "id": "5640", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records for products with a calorie count greater than 500 in the nutrition_facts table.", + "sql_context": "CREATE TABLE nutrition_facts (product_id VARCHAR(255), calories INT, protein INT, fat INT);", + "sql": "DELETE FROM nutrition_facts WHERE calories \u003e 500;", + "sql_explanation": "This query deletes all records for products with a calorie count greater than 500 in the nutrition_facts table. It uses the DELETE keyword to remove the rows with a calories value greater than 500." +}, { + "id": "849", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the menu_items table for a non-vegan dish, \u0027chicken parmesan\u0027, priced at $18.99", + "sql_context": "CREATE TABLE menu_items (menu_item_id INT, name VARCHAR(50), description TEXT, price DECIMAL(5,2), category VARCHAR(20), is_vegan BOOLEAN);", + "sql": "INSERT INTO menu_items (name, description, price, category, is_vegan) VALUES (\u0027chicken parmesan\u0027, \u0027Breaded chicken breast with marinara sauce and melted mozzarella\u0027, 18.99, \u0027entree\u0027, FALSE);", + "sql_explanation": "This query inserts a new record into the menu_items table with the following values: \u0027chicken parmesan\u0027 for the name column, a description for the description column, 18.99 for the price column, \u0027entree\u0027 for the category column, and FALSE for the is_vegan column." +}, { + "id": "1440", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which menu items have higher sales in the West coast compared to the East coast?", + "sql_context": "CREATE TABLE menu_items (item_id INT, item_name VARCHAR(50), west_coast_sales INT, east_coast_sales INT); INSERT INTO menu_items (item_id, item_name, west_coast_sales, east_coast_sales) VALUES (1, \u0027Cheeseburger\u0027, 300, 200), (2, \u0027Fried Chicken\u0027, 250, 270), (3, \u0027Veggie Burger\u0027, 180, 350);", + "sql": "SELECT item_name, west_coast_sales, east_coast_sales, (west_coast_sales - east_coast_sales) as sales_difference FROM menu_items ORDER BY sales_difference DESC;", + "sql_explanation": "The SQL query calculates the sales difference between the West coast and East coast for each menu item by subtracting the east_coast_sales from the west_coast_sales column. It then orders the results by the sales difference in descending order." +}, { + "id": "2082", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of meat products that will expire in the next 7 days?", + "sql_context": "CREATE TABLE Inventory (item_id INT, name VARCHAR(50), is_meat BOOLEAN, expiration_date DATE); INSERT INTO Inventory (item_id, name, is_meat, expiration_date) VALUES (1, \u0027Chicken Breast\u0027, true, \u00272023-04-15\u0027), (2, \u0027Potatoes\u0027, false, \u00272023-04-20\u0027), (3, \u0027Ground Beef\u0027, true, \u00272023-04-22\u0027);", + "sql": "SELECT SUM(quantity) FROM Inventory WHERE is_meat \u003d true AND expiration_date BETWEEN CURDATE() AND DATE_ADD(CURDATE(), INTERVAL 7 DAY);", + "sql_explanation": "The SQL query calculates the total quantity of meat products expiring in the next 7 days by filtering records based on the expiration_date and is_meat columns." +}, { + "id": "2481", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average quantity of \u0027Local Greens\u0027 sold per day in the Southeast region?", + "sql_context": "CREATE TABLE Daily_Region_Sales(Date DATE, Region VARCHAR(20), Menu_Item VARCHAR(30), Quantity INT); INSERT INTO Daily_Region_Sales(Date, Region, Menu_Item, Quantity) VALUES(\u00272022-01-01\u0027, \u0027Southeast\u0027, \u0027Local Greens\u0027, 10), (\u00272022-01-02\u0027, \u0027Southeast\u0027, \u0027Local Greens\u0027, 15);", + "sql": "SELECT AVG(Quantity) as Average_Quantity FROM Daily_Region_Sales WHERE Menu_Item \u003d \u0027Local Greens\u0027 AND Region \u003d \u0027Southeast\u0027;", + "sql_explanation": "This query calculates the average quantity of \u0027Local Greens\u0027 sold per day in the Southeast region. It filters the Daily_Region_Sales table for \u0027Local Greens\u0027 and \u0027Southeast\u0027 and calculates the average of Quantity for the filtered table." +}, { + "id": "2521", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue from gluten-free dishes in the past month?", + "sql_context": "CREATE TABLE Restaurant (id INT, dish_type VARCHAR(10), revenue DECIMAL(10,2)); INSERT INTO Restaurant (id, dish_type, revenue) VALUES (1, \u0027gluten-free\u0027, 300.00), (2, \u0027regular\u0027, 800.00);", + "sql": "SELECT SUM(revenue) FROM Restaurant WHERE dish_type \u003d \u0027gluten-free\u0027 AND date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH);", + "sql_explanation": "This SQL query calculates the total revenue for gluten-free dishes in the past month. It uses the SUM() function to add up the revenue values in the Restaurant table where the dish_type is \u0027gluten-free\u0027 and the date is within the last month." +}, { + "id": "2680", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new \u0027Sustainable Seafood\u0027 option in the \u0027Seafood\u0027 section with \u0027Grilled Sustainable Tuna\u0027 priced at $21.99 and quantity 15.", + "sql_context": "CREATE TABLE Menu (item VARCHAR(20), type VARCHAR(20), price DECIMAL(5,2), quantity INT);", + "sql": "INSERT INTO Menu (item, type, price, quantity) VALUES (\u0027Grilled Sustainable Tuna\u0027, \u0027Sustainable Seafood\u0027, 21.99, 15);", + "sql_explanation": "The query inserts a new \u0027Sustainable Seafood\u0027 option with \u0027Grilled Sustainable Tuna\u0027 in the \u0027Seafood\u0027 section by inserting a new record into the Menu table with the specified values." +}, { + "id": "2930", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update customer orders with a total cost under $25 and order date over a year ago, setting the status to \u0027Inactive\u0027", + "sql_context": "CREATE TABLE customer_orders (order_id INT PRIMARY KEY, customer_id INT, total_cost DECIMAL(5,2), order_date TIMESTAMP, status VARCHAR(255));", + "sql": "UPDATE customer_orders SET status \u003d \u0027Inactive\u0027 WHERE total_cost \u003c 25 AND order_date \u003c NOW() - INTERVAL 1 YEAR;", + "sql_explanation": "This SQL query updates the status column of records in the customer_orders table where the total_cost is less than $25 and the order_date is over 1 year old from the current date (NOW()). The status is set to \u0027Inactive\u0027." +}, { + "id": "3224", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all customers who have a food allergy?", + "sql_context": "CREATE TABLE customers (customer_id INT, first_name VARCHAR(50), last_name VARCHAR(50), food_allergy VARCHAR(50)); INSERT INTO customers (customer_id, first_name, last_name, food_allergy) VALUES (1, \u0027John\u0027, \u0027Doe\u0027, \u0027Nuts\u0027), (2, \u0027Jane\u0027, \u0027Doe\u0027, \u0027Seafood\u0027), (3, \u0027Bob\u0027, \u0027Smith\u0027, \u0027Eggs\u0027);", + "sql": "SELECT customer_id, first_name, last_name, food_allergy FROM customers WHERE food_allergy IS NOT NULL;", + "sql_explanation": "This SQL query retrieves all customers who have a food allergy by selecting the relevant columns from the customers table where the food_allergy column is not null." +}, { + "id": "3286", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue generated from vegetarian dishes in the last month?", + "sql_context": "CREATE TABLE orders (order_date DATE, dish VARCHAR(255), vegetarian BOOLEAN, price FLOAT); INSERT INTO orders (order_date, dish, vegetarian, price) VALUES (\u00272022-01-01\u0027, \u0027Pizza\u0027, FALSE, 9.99), (\u00272022-01-02\u0027, \u0027Pasta\u0027, TRUE, 7.99), (\u00272022-01-03\u0027, \u0027Burger\u0027, FALSE, 11.99), (\u00272022-01-04\u0027, \u0027Salad\u0027, TRUE, 8.99), (\u00272022-01-05\u0027, \u0027Pizza\u0027, FALSE, 9.99);", + "sql": "SELECT SUM(price) FROM orders WHERE vegetarian \u003d TRUE AND order_date \u003e\u003d DATEADD(day, -30, GETDATE());", + "sql_explanation": "The SQL query calculates the total revenue generated from vegetarian dishes in the last month by summing the price of all vegetarian orders with an order date within the last 30 days." +}, { + "id": "3340", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the name of the \u0027Quinoa Salad\u0027 on the \u0027Lunch\u0027 menu to \u0027Quinoa Bowl\u0027", + "sql_context": "CREATE TABLE Menu (menu_name VARCHAR(20), item_name VARCHAR(30), price DECIMAL(5,2)); INSERT INTO Menu (menu_name, item_name, price) VALUES (\u0027Lunch\u0027, \u0027Chicken Sandwich\u0027, 9.99), (\u0027Lunch\u0027, \u0027Steak Wrap\u0027, 12.49), (\u0027Lunch\u0027, \u0027Quinoa Salad\u0027, 14.50);", + "sql": "UPDATE Menu SET item_name \u003d \u0027Quinoa Bowl\u0027 WHERE menu_name \u003d \u0027Lunch\u0027 AND item_name \u003d \u0027Quinoa Salad\u0027;", + "sql_explanation": "This query updates the name of the \u0027Quinoa Salad\u0027 on the \u0027Lunch\u0027 menu to \u0027Quinoa Bowl\u0027. It does this by using the UPDATE statement to change the \u0027item_name\u0027 of the record where the \u0027menu_name\u0027 is \u0027Lunch\u0027 and the \u0027item_name\u0027 is \u0027Quinoa Salad\u0027." +}, { + "id": "4147", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average price of vegan dishes in the San Francisco region?", + "sql_context": "CREATE TABLE menu (item_id INT, dish_type VARCHAR(10), price DECIMAL(5,2), region VARCHAR(20)); INSERT INTO menu (item_id, dish_type, price, region) VALUES (1, \u0027vegan\u0027, 12.99, \u0027San Francisco\u0027), (2, \u0027vegetarian\u0027, 9.99, \u0027Los Angeles\u0027);", + "sql": "SELECT AVG(price) FROM menu WHERE dish_type \u003d \u0027vegan\u0027 AND region \u003d \u0027San Francisco\u0027;", + "sql_explanation": "The SQL query calculates the average price of vegan dishes in the San Francisco region by selecting the price from the menu table, which is then filtered based on the dish_type and region columns to only include vegan dishes and orders in the San Francisco region. The AVG function is then used to calculate the average price of vegan dishes in the San Francisco region." +}, { + "id": "4153", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of dairy-free menu items sold in the month of March 2022?", + "sql_context": "CREATE TABLE menu (menu_id INT, menu_name VARCHAR(50), category VARCHAR(50), quantity_sold INT, price DECIMAL(5,2), month_sold INT, is_dairy_free BOOLEAN); INSERT INTO menu (menu_id, menu_name, category, quantity_sold, price, month_sold, is_dairy_free) VALUES (21, \u0027Quinoa Salad\u0027, \u0027Salads\u0027, 12, 8.99, 3, true);", + "sql": "SELECT SUM(quantity_sold) FROM menu WHERE is_dairy_free \u003d true AND month_sold \u003d 3;", + "sql_explanation": "This query calculates the total quantity of dairy-free menu items sold in the month of March (month_sold \u003d 3) by summing the quantity_sold column and filtering for the is_dairy_free column set to true and month_sold set to 3." +}, { + "id": "4405", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the email of the customer with customer_id 1 to \"john.doe_new@example.com\"", + "sql_context": "CREATE TABLE customers (customer_id INT, name VARCHAR(50), email VARCHAR(50), loyalty_points INT);", + "sql": "UPDATE customers SET email \u003d \u0027john.doe_new@example.com\u0027 WHERE customer_id \u003d 1;", + "sql_explanation": "This query updates the email of the customer with a customer_id of 1 to \"john.doe_new@example.com\"." +}, { + "id": "4484", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue for today?", + "sql_context": "CREATE TABLE orders (order_id INT, order_date DATE, total DECIMAL(5,2)); INSERT INTO orders (order_id, order_date, total) VALUES (1, \u00272023-03-22\u0027, 25.00), (2, \u00272023-03-22\u0027, 18.50), (3, \u00272023-03-23\u0027, 32.75), (4, \u00272023-03-23\u0027, 11.25), (5, \u00272023-03-24\u0027, 41.00); CREATE TABLE location (location_id INT, name VARCHAR(255));", + "sql": "SELECT SUM(total) as total_revenue FROM orders WHERE order_date \u003d CURDATE();", + "sql_explanation": "This query calculates the total revenue for today by summing the total column in the orders table where the order_date matches the current date." +}, { + "id": "4698", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average price of vegan dishes in the Italian cuisine category?", + "sql_context": "CREATE TABLE Menu (menu_item VARCHAR(50), cuisine VARCHAR(20), type VARCHAR(20), price DECIMAL(5,2)); INSERT INTO Menu (menu_item, cuisine, type, price) VALUES (\u0027Vegan Pizza\u0027, \u0027Italian\u0027, \u0027Vegan\u0027, 12.99), (\u0027Eggplant Parmesan\u0027, \u0027Italian\u0027, \u0027Vegetarian\u0027, 13.99), (\u0027Spaghetti Bolognese\u0027, \u0027Italian\u0027, \u0027Non-vegetarian\u0027, 14.99), (\u0027Tofu Alfredo\u0027, \u0027Italian\u0027, \u0027Vegan\u0027, 15.99);", + "sql": "SELECT AVG(price) FROM Menu WHERE cuisine \u003d \u0027Italian\u0027 AND type \u003d \u0027Vegan\u0027;", + "sql_explanation": "This query calculates the average price of vegan dishes in the Italian cuisine category by averaging the \u0027price\u0027 column where \u0027cuisine\u0027 is \u0027Italian\u0027 and \u0027type\u0027 is \u0027Vegan\u0027." +}, { + "id": "4973", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total weight of beef used in the dinner menu?", + "sql_context": "CREATE TABLE DinnerMenu(menu_item VARCHAR(50), ingredients TEXT, weight DECIMAL(5,2)); INSERT INTO DinnerMenu VALUES(\u0027Steak\u0027, \u0027beef 500g, potatoes 200g\u0027, 500), (\u0027Spaghetti Bolognese\u0027, \u0027beef 300g, tomatoes 400g\u0027, 300), (\u0027Vegetable Lasagna\u0027, \u0027eggplant 400g, zucchini 400g\u0027, 0);", + "sql": "SELECT SUM(weight) FROM DinnerMenu WHERE ingredients LIKE \u0027%beef%\u0027;", + "sql_explanation": "This query calculates the total weight of beef used in the dinner menu. It does this by summing the weight column for all rows where the ingredients column contains the word \u0027beef\u0027." +}, { + "id": "5030", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new menu item \u0027Steak\u0027 with a price of 25.50 dollars", + "sql_context": "CREATE TABLE menu_items (item_id INT, item_name TEXT, price DECIMAL(5,2));", + "sql": "INSERT INTO menu_items (item_name, price) VALUES (\u0027Steak\u0027, 25.50);", + "sql_explanation": "This query inserts a new record into the menu_items table with the item_name \u0027Steak\u0027 and a price of 25.50 dollars." +}, { + "id": "5058", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records from the sustainability_initiatives table where the end_date column is before the current date", + "sql_context": "CREATE TABLE sustainability_initiatives (sustainability_initiative_id INT, name VARCHAR(50), description TEXT, start_date DATE, end_date DATE);", + "sql": "DELETE FROM sustainability_initiatives WHERE end_date \u003c CURDATE();", + "sql_explanation": "This query deletes all records from the sustainability_initiatives table where the end_date column has a value before the current date." +}, { + "id": "5134", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the total number of menu items with a sustainability_rating of 5", + "sql_context": "CREATE TABLE menu_items (menu_item_id INT, name VARCHAR(255), description TEXT, price DECIMAL(5,2), category VARCHAR(255), sustainability_rating INT);", + "sql": "SELECT COUNT(*) FROM menu_items WHERE sustainability_rating \u003d 5;", + "sql_explanation": "This query selects the total number of records from the menu_items table where the sustainability_rating is 5." +}, { + "id": "5152", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records with zero quantity sold in the \u0027appetizers\u0027 category from the menu.", + "sql_context": "CREATE TABLE menu (id INT, category VARCHAR(255), item VARCHAR(255), qty_sold INT); INSERT INTO menu (id, category, item, qty_sold) VALUES (1, \u0027appetizers\u0027, \u0027Bruschetta\u0027, 0), (2, \u0027appetizers\u0027, \u0027Spring Rolls\u0027, 10), (3, \u0027desserts\u0027, \u0027Chocolate Cake\u0027, 15);", + "sql": "DELETE FROM menu WHERE category \u003d \u0027appetizers\u0027 AND qty_sold \u003d 0;", + "sql_explanation": "This query removes all records with zero quantity sold in the \u0027appetizers\u0027 category from the menu table by deleting the corresponding records where the category is \u0027appetizers\u0027 and the qty_sold is 0." +}, { + "id": "5492", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records with a sustainability_rating of 1 from the menu_items table", + "sql_context": "CREATE TABLE menu_items (menu_item_id INT, name VARCHAR(255), description TEXT, price DECIMAL(5,2), category VARCHAR(255), sustainability_rating INT);", + "sql": "DELETE FROM menu_items WHERE sustainability_rating \u003d 1;", + "sql_explanation": "This query deletes all records from the menu_items table where the sustainability_rating is 1." +}, { + "id": "5495", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum price of vegetarian menu items?", + "sql_context": "CREATE TABLE menus (menu_id INT, menu_name VARCHAR(255), type VARCHAR(255), price DECIMAL(5,2)); INSERT INTO menus (menu_id, menu_name, type, price) VALUES (1, \u0027Quinoa Salad\u0027, \u0027Vegetarian\u0027, 12.99), (2, \u0027Margherita Pizza\u0027, \u0027Non-Vegetarian\u0027, 9.99), (3, \u0027Falafel Wrap\u0027, \u0027Vegetarian\u0027, 8.99), (4, \u0027Steak Sandwich\u0027, \u0027Non-Vegetarian\u0027, 11.99), (5, \u0027Vegan Burger\u0027, \u0027Vegan\u0027, 10.99), (8, \u0027Vegetable Curry\u0027, \u0027Vegetarian\u0027, 13.99);", + "sql": "SELECT MAX(price) FROM menus WHERE type \u003d \u0027Vegetarian\u0027;", + "sql_explanation": "This query calculates the maximum price of vegetarian menu items by selecting the maximum price of all rows with a type of \u0027Vegetarian\u0027." +}, { + "id": "5509", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the quantity of \u0027Beef\u0027 to 75 in the inventory table", + "sql_context": "CREATE TABLE inventory (inventory_id INT, item VARCHAR(50), quantity INT); INSERT INTO inventory (inventory_id, item, quantity) VALUES (1, \u0027Beef\u0027, 100), (2, \u0027Pork\u0027, 75);", + "sql": "UPDATE inventory SET quantity \u003d 75 WHERE item \u003d \u0027Beef\u0027;", + "sql_explanation": "This query updates the quantity of \u0027Beef\u0027 to 75 in the inventory table." +}, { + "id": "5816", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the name and price of the least expensive menu item", + "sql_context": "CREATE TABLE Menu (id INT PRIMARY KEY, name VARCHAR(255), category VARCHAR(255), price DECIMAL(5,2));", + "sql": "SELECT name, MIN(price) FROM Menu;", + "sql_explanation": "The SELECT statement is used to retrieve the name and minimum price of all menu items in the Menu table." +}, { + "id": "1530", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the circular_economy_initiatives table for a \u0027Remanufacturing Program\u0027 by \u0027ManufacturerE\u0027.", + "sql_context": "CREATE TABLE circular_economy_initiatives (initiative_id INT, manufacturer_name TEXT, initiative_description TEXT);", + "sql": "INSERT INTO circular_economy_initiatives (initiative_id, manufacturer_name, initiative_description) VALUES (5, \u0027ManufacturerE\u0027, \u0027Remanufacturing Program\u0027);", + "sql_explanation": "This query inserts a new record into the circular_economy_initiatives table for a \u0027Remanufacturing Program\u0027 by \u0027ManufacturerE\u0027." +}, { + "id": "2620", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the suppliers in Germany with a sustainability rating above 4.5?", + "sql_context": "CREATE TABLE suppliers (id INT PRIMARY KEY, name VARCHAR(255), location VARCHAR(255), sustainability_rating FLOAT); INSERT INTO suppliers (id, name, location, sustainability_rating) VALUES (1, \u0027Green Supplies\u0027, \u0027Germany\u0027, 4.8), (2, \u0027Eco-Friendly Imports\u0027, \u0027Germany\u0027, 4.7);", + "sql": "SELECT s.name, s.sustainability_rating FROM suppliers s WHERE s.location \u003d \u0027Germany\u0027 AND s.sustainability_rating \u003e 4.5;", + "sql_explanation": "This query selects the name and sustainability_rating from the suppliers table where the location is \u0027Germany\u0027 and the sustainability_rating is greater than 4.5." +}, { + "id": "2813", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the \u0027machines\u0027 table with id 301, model \u0027CNC Milling Machine\u0027, manufacturing_country \u0027Japan\u0027, and year 2018", + "sql_context": "CREATE TABLE machines (id INT, model VARCHAR(255), manufacturing_country VARCHAR(255), year INT);", + "sql": "INSERT INTO machines (id, model, manufacturing_country, year) VALUES (301, \u0027CNC Milling Machine\u0027, \u0027Japan\u0027, 2018);", + "sql_explanation": "{1. The INSERT INTO statement is used to add new records, 2. VALUES keyword is used to specify the new records to be added, 3. Columns and their respective values are enclosed in parentheses and separated by commas}" +}, { + "id": "3069", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of machines in the \u0027ethical_manufacturing\u0027 category that were produced before 2010?", + "sql_context": "CREATE TABLE machines (id INT, name VARCHAR(255), category VARCHAR(255), production_date DATE); INSERT INTO machines (id, name, category, production_date) VALUES (1, \u0027ABC Machine\u0027, \u0027ethical_manufacturing\u0027, \u00272008-01-01\u0027), (2, \u0027XYZ Machine\u0027, \u0027ethical_manufacturing\u0027, \u00272012-01-01\u0027), (3, \u0027DEF Machine\u0027, \u0027ethical_manufacturing\u0027, \u00272009-01-01\u0027);", + "sql": "SELECT COUNT(*) FROM machines WHERE category \u003d \u0027ethical_manufacturing\u0027 AND production_date \u003c \u00272010-01-01\u0027;", + "sql_explanation": "This SQL query finds the total number of machines in the \u0027ethical_manufacturing\u0027 category that were produced before 2010 by using the WHERE clause to filter the machines table to only show machines in the \u0027ethical_manufacturing\u0027 category that were produced before 2010 and then using the COUNT(*) function to count the number of rows that meet the criteria." +}, { + "id": "3096", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Which countries have the highest plastic waste production in 2021? Update the waste_production table with the latest data.", + "sql_context": "CREATE TABLE waste_production (country VARCHAR(50), year INT, plastic_waste_kg_per_capita FLOAT);", + "sql": "UPDATE waste_production SET plastic_waste_kg_per_capita \u003d 120.5 WHERE country \u003d \u0027Brazil\u0027 AND year \u003d 2021;", + "sql_explanation": "This query updates the plastic_waste_kg_per_capita field for Brazil in the waste_production table to 120.5 kg for the year 2021." +}, { + "id": "3283", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add new ethical manufacturing record with ID 3, name \u0027Ethical Manufacturer C\u0027, region \u0027Central\u0027", + "sql_context": "CREATE SCHEMA manufacturing;CREATE TABLE ethical_manufacturers (id INT PRIMARY KEY, name TEXT, region TEXT);INSERT INTO ethical_manufacturers (id, name, region) VALUES (1, \u0027Ethical Manufacturer A\u0027, \u0027North\u0027); INSERT INTO ethical_manufacturers (id, name, region) VALUES (2, \u0027Ethical Manufacturer B\u0027, \u0027South\u0027);", + "sql": "INSERT INTO ethical_manufacturers (id, name, region) VALUES (3, \u0027Ethical Manufacturer C\u0027, \u0027Central\u0027);", + "sql_explanation": "Inserts a new record into the \u0027ethical_manufacturers\u0027 table with \u0027id\u0027 3, \u0027name\u0027 \u0027Ethical Manufacturer C\u0027, and \u0027region\u0027 \u0027Central\u0027." +}, { + "id": "3322", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Retrieve the maximum and minimum last maintenance dates for all machines", + "sql_context": "CREATE TABLE machines (id INT PRIMARY KEY, name VARCHAR(255), type VARCHAR(255), status VARCHAR(255), last_maintenance_date DATE); INSERT INTO machines (id, name, type, status, last_maintenance_date) VALUES (1, \u0027Machine A\u0027, \u0027CNC\u0027, \u0027Operational\u0027, \u00272021-01-01\u0027), (2, \u0027Machine B\u0027, \u0027Robotic Arm\u0027, \u0027Under Maintenance\u0027, \u00272022-05-15\u0027), (3, \u0027Machine C\u0027, \u0027CNC\u0027, \u0027Operational\u0027, \u00272021-10-05\u0027), (4, \u0027Machine D\u0027, \u0027Robotic Arm\u0027, \u0027Operational\u0027, \u00272022-02-03\u0027), (5, \u0027Machine E\u0027, \u0027Conveyor Belt\u0027, \u0027Operational\u0027, \u00272021-06-12\u0027);", + "sql": "SELECT MAX(last_maintenance_date) AS max_date, MIN(last_maintenance_date) AS min_date FROM machines;", + "sql_explanation": "This SQL query retrieves the maximum and minimum values in the \u0027last_maintenance_date\u0027 column from the \u0027machines\u0027 table. The query uses the MAX and MIN aggregate functions to retrieve the maximum and minimum values, respectively. The results are aliased as \u0027max_date\u0027 and \u0027min_date\u0027 using the AS keyword." +}, { + "id": "3441", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average salary of engineers working in the \u0027manufacturing\u0027 schema?", + "sql_context": "CREATE TABLE employees (id INT, name VARCHAR(50), title VARCHAR(50), salary FLOAT, department VARCHAR(50)); INSERT INTO employees (id, name, title, salary, department) VALUES (1, \u0027John Doe\u0027, \u0027Senior Engineer\u0027, 80000.0, \u0027manufacturing\u0027); INSERT INTO employees (id, name, title, salary, department) VALUES (2, \u0027Jane Smith\u0027, \u0027Engineer\u0027, 60000.0, \u0027manufacturing\u0027);", + "sql": "SELECT AVG(salary) FROM employees WHERE department \u003d \u0027manufacturing\u0027 AND title LIKE \u0027%Engineer%\u0027;", + "sql_explanation": "This SQL query calculates the average salary of engineers in the \u0027manufacturing\u0027 schema. It does so by filtering the \u0027employees\u0027 table for rows where the \u0027department\u0027 is \u0027manufacturing\u0027 and the \u0027title\u0027 contains \u0027Engineer\u0027. Then, it computes the average \u0027salary\u0027 from the filtered rows." +}, { + "id": "3807", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many \u0027renewable\u0027 energy sources are used in the \u0027manufacturing\u0027 plant?", + "sql_context": "CREATE TABLE plants (id INT, name TEXT, energy_source TEXT); INSERT INTO plants (id, name, energy_source) VALUES (1, \u0027manufacturing\u0027, \u0027renewable\u0027), (2, \u0027assembly\u0027, \u0027non-renewable\u0027);", + "sql": "SELECT COUNT(*) FROM plants WHERE name \u003d \u0027manufacturing\u0027 AND energy_source \u003d \u0027renewable\u0027;", + "sql_explanation": "This SQL query counts the number of \u0027renewable\u0027 energy sources in the \u0027manufacturing\u0027 plant by using the COUNT function with no specified column (which counts all rows), and filtering for rows where the \u0027name\u0027 column is \u0027manufacturing\u0027 and the \u0027energy_source\u0027 column is \u0027renewable\u0027." +}, { + "id": "3925", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average experience of employees in \u0027quality_control\u0027 skill?", + "sql_context": "CREATE TABLE employee_skills (employee_id INT, skill_name VARCHAR(50), experience_years INT); INSERT INTO employee_skills (employee_id, skill_name, experience_years) VALUES (1, \u0027sustainable_manufacturing\u0027, 3), (2, \u0027quality_control\u0027, 1), (3, \u0027sustainable_manufacturing\u0027, 5);", + "sql": "SELECT AVG(experience_years) FROM employee_skills WHERE skill_name \u003d \u0027quality_control\u0027;", + "sql_explanation": "This query calculates the average experience_years for employees with the \u0027quality_control\u0027 skill in the employee_skills table." +}, { + "id": "3946", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total waste produced by the textile industry in Asia?", + "sql_context": "CREATE TABLE waste (factory_id INT, industry VARCHAR(50), region VARCHAR(50), waste_generated INT);", + "sql": "SELECT SUM(waste_generated) FROM waste WHERE industry \u003d \u0027textile\u0027 AND region \u003d \u0027Asia\u0027;", + "sql_explanation": "The SQL query filters the records based on the conditions specified in the WHERE clause and calculates the total waste generated by the textile industry in Asia." +}, { + "id": "4088", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the salaries for workers in the \u0027TextileWorkers\u0027 table who have completed an apprenticeship program by 5%", + "sql_context": "CREATE TABLE TextileWorkers (WorkerID INT, Salary DECIMAL(5,2), ApprenticeshipProgram BOOLEAN);", + "sql": "UPDATE TextileWorkers SET Salary \u003d Salary * 1.05 WHERE ApprenticeshipProgram \u003d TRUE;", + "sql_explanation": "The UPDATE statement increases the \u0027Salary\u0027 column values by 5% for rows in the \u0027TextileWorkers\u0027 table where the \u0027ApprenticeshipProgram\u0027 column is true (completed). It uses the multiplication operator (*) to multiply the \u0027Salary\u0027 column values by 1.05." +}, { + "id": "4092", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total number of employees in companies with circular economy initiatives in France.", + "sql_context": "CREATE TABLE companies (id INT, name TEXT, country TEXT, circular_economy BOOLEAN); INSERT INTO companies (id, name, country, circular_economy) VALUES (1, \u0027ABC Corp\u0027, \u0027France\u0027, TRUE), (2, \u0027DEF Corp\u0027, \u0027Germany\u0027, FALSE), (3, \u0027GHI Corp\u0027, \u0027France\u0027, TRUE);", + "sql": "SELECT COUNT(*) FROM companies WHERE country \u003d \u0027France\u0027 AND circular_economy \u003d TRUE;", + "sql_explanation": "This query finds the total number of employees in companies with circular economy initiatives in France. It does this by counting the number of records in the companies table where the country column is \u0027France\u0027 and the circular_economy column is TRUE." +}, { + "id": "4132", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the suppliers located in \u0027Paris\u0027 with a sustainability rating greater than 85?", + "sql_context": "CREATE TABLE suppliers (id INT, name VARCHAR(255), location VARCHAR(255), sustainability_rating INT); INSERT INTO suppliers (id, name, location, sustainability_rating) VALUES (1, \u0027Supplier A\u0027, \u0027Paris\u0027, 86);", + "sql": "SELECT name FROM suppliers WHERE location \u003d \u0027Paris\u0027 AND sustainability_rating \u003e 85;", + "sql_explanation": "Select the names of suppliers located in \u0027Paris\u0027 and have a sustainability rating greater than 85." +}, { + "id": "4429", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of manufacturing plants in the United Kingdom that have implemented ethical labor practices?", + "sql_context": "CREATE TABLE plants (id INT, name VARCHAR(50), country VARCHAR(50), ethical INT);", + "sql": "SELECT COUNT(*) FROM plants WHERE country \u003d \u0027United Kingdom\u0027 AND ethical \u003d 1;", + "sql_explanation": "The query filters the data for manufacturing plants in the United Kingdom that have implemented ethical labor practices. Then it counts the number of rows in the filtered dataset." +}, { + "id": "4570", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of machines in the \u0027machining\u0027 and \u0027assembly\u0027 areas?", + "sql_context": "CREATE TABLE machines (id INT, name VARCHAR(50), area VARCHAR(20), quantity INT); INSERT INTO machines (id, name, area, quantity) VALUES (1, \u0027CNC Mill\u0027, \u0027machining\u0027, 10), (2, \u0027Lathe\u0027, \u0027machining\u0027, 5), (3, \u0027Assembly Robot\u0027, \u0027assembly\u0027, 8), (4, \u0027Inspection Gauge\u0027, \u0027quality control\u0027, 3);", + "sql": "SELECT SUM(quantity) FROM machines WHERE area IN (\u0027machining\u0027, \u0027assembly\u0027);", + "sql_explanation": "This SQL query calculates the total number of machines in the \u0027machining\u0027 and \u0027assembly\u0027 areas by summing the \u0027quantity\u0027 column in the \u0027machines\u0027 table where the \u0027area\u0027 is either \u0027machining\u0027 or \u0027assembly\u0027." +}, { + "id": "4582", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total production output of plants located in \u0027CityX\u0027 or \u0027CityY\u0027?", + "sql_context": "CREATE TABLE plants (plant_id INT, plant_name VARCHAR(50), city VARCHAR(50), production_output INT); INSERT INTO plants (plant_id, plant_name, city, production_output) VALUES (1, \u0027PlantA\u0027, \u0027CityX\u0027, 500), (2, \u0027PlantB\u0027, \u0027CityY\u0027, 700), (3, \u0027PlantC\u0027, \u0027CityX\u0027, 600), (4, \u0027PlantD\u0027, \u0027CityZ\u0027, 800), (5, \u0027PlantE\u0027, \u0027CityY\u0027, 900);", + "sql": "SELECT SUM(production_output) FROM plants WHERE city IN (\u0027CityX\u0027, \u0027CityY\u0027);", + "sql_explanation": "This query calculates the total production output of plants located in \u0027CityX\u0027 or \u0027CityY\u0027. It sums up the production_output of the \u0027plants\u0027 table where the city is either \u0027CityX\u0027 or \u0027CityY\u0027." +}, { + "id": "4700", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the \"machines\" table with the following details: name \"MachineA\", type \"CNC\", and year 2018", + "sql_context": "CREATE TABLE machines (id INT PRIMARY KEY, name VARCHAR(100), type VARCHAR(50), year INT);", + "sql": "INSERT INTO machines (name, type, year) VALUES (\u0027MachineA\u0027, \u0027CNC\u0027, 2018);", + "sql_explanation": "{1. Inserts a new record into the \"machines\" table, 2. Specifies the values for the \"name\", \"type\", and \"year\" columns}" +}, { + "id": "4798", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average power consumption (in kWh) by machines in the \u0027molding\u0027 category?", + "sql_context": "CREATE TABLE machines (machine_id INT, category VARCHAR(10), power_consumption FLOAT); INSERT INTO machines (machine_id, category, power_consumption) VALUES (1, \u0027molding\u0027, 5.6), (2, \u0027tooling\u0027, 7.3), (3, \u0027molding\u0027, 6.2), (4, \u0027molding\u0027, 4.8);", + "sql": "SELECT AVG(power_consumption) FROM machines WHERE category \u003d \u0027molding\u0027;", + "sql_explanation": "This query calculates the average power consumption from the \u0027machines\u0027 table for those records where the category is \u0027molding\u0027." +}, { + "id": "4892", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 5 suppliers with the highest ethical certification scores?", + "sql_context": "CREATE TABLE suppliers (id INT, name TEXT, certification_score INT); INSERT INTO suppliers (id, name, certification_score) VALUES (1, \u0027Supplier A\u0027, 95), (2, \u0027Supplier B\u0027, 90), (3, \u0027Supplier C\u0027, 88), (4, \u0027Supplier D\u0027, 85), (5, \u0027Supplier E\u0027, 80), (6, \u0027Supplier F\u0027, 75);", + "sql": "SELECT name FROM suppliers ORDER BY certification_score DESC LIMIT 5;", + "sql_explanation": "The SQL query lists the top 5 suppliers with the highest ethical certification scores by ordering the certification_score column in descending order and limiting the results to 5 rows." +}, { + "id": "4893", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average salary of employees in the \u0027manufacturing\u0027 department?", + "sql_context": "CREATE TABLE employees (id INT, name VARCHAR(50), department VARCHAR(50), salary FLOAT); INSERT INTO employees (id, name, department, salary) VALUES (1, \u0027John Doe\u0027, \u0027manufacturing\u0027, 60000), (2, \u0027Jane Smith\u0027, \u0027marketing\u0027, 70000);", + "sql": "SELECT AVG(salary) FROM employees WHERE department \u003d \u0027manufacturing\u0027;", + "sql_explanation": "This query calculates the average salary for employees in the \u0027manufacturing\u0027 department. It does this by selecting the \u0027salary\u0027 column for all rows where the \u0027department\u0027 is \u0027manufacturing\u0027, and then calculating the average of those salaries." +}, { + "id": "4968", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum salary in the Engineering department?", + "sql_context": "CREATE TABLE Employees (id INT, name VARCHAR(50), department VARCHAR(50), salary DECIMAL(10,2));", + "sql": "SELECT MIN(salary) FROM Employees WHERE department \u003d \u0027Engineering\u0027;", + "sql_explanation": "The SQL query calculates the minimum salary for employees in the Engineering department by filtering the Employees table based on department, then calculating the minimum salary using the MIN function." +}, { + "id": "4995", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total waste generated by factory 3?", + "sql_context": "CREATE TABLE factories (factory_id INT, waste_generated_kg INT); INSERT INTO factories VALUES (1, 500), (2, 300), (3, 700), (4, 400), (5, 600);", + "sql": "SELECT SUM(waste_generated_kg) FROM factories WHERE factory_id \u003d 3;", + "sql_explanation": "The SQL query calculates the total waste generated by factory 3 by summing the \u0027waste_generated_kg\u0027 column where the \u0027factory_id\u0027 is 3." +}, { + "id": "5146", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records from the \u0027workforce_development\u0027 table where the \u0027end_date\u0027 is before \u00272022-06-01\u0027", + "sql_context": "CREATE TABLE workforce_development (id INT PRIMARY KEY, program_name VARCHAR(100), country VARCHAR(50), start_date DATE, end_date DATE);", + "sql": "DELETE FROM workforce_development WHERE end_date \u003c \u00272022-06-01\u0027;", + "sql_explanation": "This query deletes all records in the \u0027workforce_development\u0027 table where the \u0027end_date\u0027 is before \u00272022-06-01\u0027." +}, { + "id": "5182", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete the \u0027automation\u0027 department from the database.", + "sql_context": "CREATE TABLE company_departments (dept_name TEXT, avg_salary NUMERIC); INSERT INTO company_departments (dept_name, avg_salary) VALUES (\u0027automation\u0027, 42000.00);", + "sql": "DELETE FROM company_departments WHERE dept_name \u003d \u0027automation\u0027;", + "sql_explanation": "1. Delete records from the \u0027company_departments\u0027 table. 2. Remove the records with a department name of \u0027automation\u0027." +}, { + "id": "5196", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who is the \u0027manager\u0027 of the \u0027quality control\u0027 department?", + "sql_context": "CREATE TABLE departments (name VARCHAR(50), manager VARCHAR(100)); INSERT INTO departments (name, manager) VALUES (\u0027quality control\u0027, \u0027Alex Johnson\u0027), (\u0027production\u0027, \u0027Mike Brown\u0027);", + "sql": "SELECT manager FROM departments WHERE name \u003d \u0027quality control\u0027;", + "sql_explanation": "This SQL query retrieves the manager of the \u0027quality control\u0027 department by filtering for rows with \u0027quality control\u0027 in the \u0027name\u0027 column." +}, { + "id": "5261", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of workforce development training hours received by workers in the \u0027Food\u0027 industry?", + "sql_context": "CREATE TABLE training_hours (id INT, industry VARCHAR(255), hours INT);", + "sql": "SELECT AVG(hours) FROM training_hours WHERE industry \u003d \u0027Food\u0027;", + "sql_explanation": "This query calculates the average number of workforce development training hours received by workers in the \u0027Food\u0027 industry by finding the average of the \u0027hours\u0027 column in the \u0027training_hours\u0027 table where the \u0027industry\u0027 column is equal to \u0027Food\u0027." +}, { + "id": "5301", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names and locations of all factories that have implemented Industry 4.0 technologies?", + "sql_context": "CREATE TABLE factories (factory_id INT, name TEXT, location TEXT, industry40 BOOLEAN); INSERT INTO factories (factory_id, name, location, industry40) VALUES (1, \u0027Flex Factory\u0027, \u0027USA\u0027, TRUE), (2, \u0027GreenTech Plant\u0027, \u0027Germany\u0027, FALSE), (3, \u0027Smart Manufacturing Co.\u0027, \u0027Japan\u0027, TRUE);", + "sql": "SELECT name, location FROM factories WHERE industry40 \u003d TRUE;", + "sql_explanation": "This query filters the factories table to only include rows where the industry40 column is TRUE. It then selects the name and location columns from the filtered table, returning the names and locations of all factories that have implemented Industry 4.0 technologies." +}, { + "id": "5350", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the location of the inventory with id 1 in the \"inventory\" table to \"Warehouse 2\"", + "sql_context": "CREATE TABLE inventory (id INT PRIMARY KEY, product_id INT, quantity INT, location VARCHAR(50)); INSERT INTO inventory (id, product_id, quantity, location) VALUES (1, 1, 100, \u0027Warehouse 1\u0027), (2, 2, 50, \u0027Warehouse 2\u0027);", + "sql": "UPDATE inventory SET location \u003d \u0027Warehouse 2\u0027 WHERE id \u003d 1;", + "sql_explanation": "{1. Selects the record with id 1 from the \"inventory\" table, 2. Updates the \"location\" column with the new value \"Warehouse 2\"}" +}, { + "id": "5355", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the type of circular economy with ID 2 to \u0027Redesign\u0027", + "sql_context": "CREATE SCHEMA manufacturing;CREATE TABLE circular_economy (id INT PRIMARY KEY, name TEXT, type TEXT);INSERT INTO circular_economy (id, name, type) VALUES (1, \u0027Circular Economy A\u0027, \u0027Reuse\u0027); INSERT INTO circular_economy (id, name, type) VALUES (2, \u0027Circular Economy B\u0027, \u0027Reduce\u0027); INSERT INTO circular_economy (id, name, type) VALUES (3, \u0027Circular Economy C\u0027, \u0027Repurpose\u0027);", + "sql": "UPDATE circular_economy SET type \u003d \u0027Redesign\u0027 WHERE id \u003d 2;", + "sql_explanation": "Updates the \u0027type\u0027 field of the record with \u0027id\u0027 2 to \u0027Redesign\u0027 in the \u0027circular_economy\u0027 table." +}, { + "id": "5517", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all plants that have machines in the \u0027tooling\u0027 category.", + "sql_context": "CREATE TABLE machines (machine_id INT, plant VARCHAR(10), category VARCHAR(10)); INSERT INTO machines (machine_id, plant, category) VALUES (1, \u0027plant1\u0027, \u0027molding\u0027), (2, \u0027plant2\u0027, \u0027tooling\u0027), (3, \u0027plant1\u0027, \u0027tooling\u0027), (4, \u0027plant3\u0027, \u0027molding\u0027);", + "sql": "SELECT plant FROM machines WHERE category \u003d \u0027tooling\u0027;", + "sql_explanation": "This query selects all records from the \u0027machines\u0027 table where the category is \u0027tooling\u0027, returning the plants that have machines in this category." +}, { + "id": "5702", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all Industry 4.0 projects at the \u0027Smart Manufacturing\u0027 plant.", + "sql_context": "CREATE TABLE Projects (id INT, name VARCHAR(255), plant_id INT); INSERT INTO Projects (id, name, plant_id) VALUES (1, \u0027Automation\u0027, 5), (2, \u0027AI Implementation\u0027, 5), (3, \u0027Data Analytics\u0027, 5); CREATE TABLE Plants (id INT, name VARCHAR(255)); INSERT INTO Plants (id, name) VALUES (5, \u0027Smart Manufacturing\u0027);", + "sql": "SELECT name FROM Projects WHERE plant_id \u003d 5;", + "sql_explanation": "This SQL query retrieves the names of all Industry 4.0 projects at the Smart Manufacturing plant by selecting the name field from the Projects table where the plant_id is 5." +}, { + "id": "5805", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "Show all records from the \u0027ethical_manufacturing\u0027 table", + "sql_context": "CREATE TABLE ethical_manufacturing (id INT AUTO_INCREMENT, company_name VARCHAR(50), location VARCHAR(50), ethical_certification VARCHAR(50), PRIMARY KEY(id));", + "sql": "SELECT * FROM ethical_manufacturing;", + "sql_explanation": "This query retrieves all records from the \u0027ethical_manufacturing\u0027 table." +}, { + "id": "5810", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "Retrieve all records from the \u0027operational_machines\u0027 view", + "sql_context": "CREATE TABLE machines (id INT PRIMARY KEY, name VARCHAR(255), type VARCHAR(255), status VARCHAR(255)); CREATE VIEW operational_machines AS SELECT * FROM machines WHERE status \u003d \u0027Operational\u0027;", + "sql": "SELECT * FROM operational_machines;", + "sql_explanation": "This SQL query retrieves all records from the \u0027operational_machines\u0027 view. The query uses the SELECT statement to retrieve all columns (*) from the view." +}, { + "id": "507", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the average ages of patients diagnosed with \u0027Depression\u0027 and \u0027Bipolar Disorder\u0027 in \u0027clinic_l\u0027?", + "sql_context": "CREATE TABLE patient_diagnosis_l (patient_id INT, diagnosis VARCHAR(50), age INT, treatment_center VARCHAR(50)); INSERT INTO patient_diagnosis_l (patient_id, diagnosis, age, treatment_center) VALUES (12, \u0027Depression\u0027, 30, \u0027clinic_l\u0027), (13, \u0027Bipolar Disorder\u0027, 40, \u0027clinic_l\u0027);", + "sql": "SELECT AVG(age) FROM patient_diagnosis_l WHERE diagnosis \u003d \u0027Depression\u0027 AND treatment_center \u003d \u0027clinic_l\u0027; SELECT AVG(age) FROM patient_diagnosis_l WHERE diagnosis \u003d \u0027Bipolar Disorder\u0027 AND treatment_center \u003d \u0027clinic_l\u0027;", + "sql_explanation": "This query calculates the average ages of patients diagnosed with \u0027Depression\u0027 and \u0027Bipolar Disorder\u0027 in \u0027clinic_l\u0027." +}, { + "id": "2339", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the count of patients who received \u0027Dialectical Behavior Therapy\u0027 in \u0027clinic_k\u0027?", + "sql_context": "CREATE TABLE patient_treatment (patient_id INT, treatment_name VARCHAR(50), treatment_center VARCHAR(50)); INSERT INTO patient_treatment (patient_id, treatment_name, treatment_center) VALUES (11, \u0027Dialectical Behavior Therapy\u0027, \u0027clinic_k\u0027);", + "sql": "SELECT COUNT(*) FROM patient_treatment WHERE treatment_name \u003d \u0027Dialectical Behavior Therapy\u0027 AND treatment_center \u003d \u0027clinic_k\u0027;", + "sql_explanation": "This query counts the number of patients who received \u0027Dialectical Behavior Therapy\u0027 in \u0027clinic_k\u0027." +}, { + "id": "2914", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many patients with PTSD were treated in Germany in the last 6 months?", + "sql_context": "CREATE TABLE patients (patient_id INT, has_ptsd BOOLEAN, treatment_date DATE, country VARCHAR(50)); INSERT INTO patients (patient_id, has_ptsd, treatment_date, country) VALUES (1, TRUE, \u00272022-01-01\u0027, \u0027Germany\u0027), (2, FALSE, \u00272021-12-25\u0027, \u0027Germany\u0027), (3, TRUE, \u00272022-03-15\u0027, \u0027Canada\u0027);", + "sql": "SELECT COUNT(*) FROM patients WHERE has_ptsd \u003d TRUE AND treatment_date \u003e\u003d \u00272021-07-01\u0027 AND country \u003d \u0027Germany\u0027;", + "sql_explanation": "This query counts the number of patients with PTSD who were treated in Germany in the last 6 months. It does so by using the COUNT function on the entire table, filtering the data for patients with PTSD who had a treatment date in the last 6 months and are from Germany." +}, { + "id": "3000", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the names and number of campaigns in the public_awareness_campaigns table, ordered by the number of campaigns in descending order.", + "sql_context": "CREATE TABLE public_awareness_campaigns (campaign_id INT, campaign_name VARCHAR(255), number_of_campaigns INT);", + "sql": "SELECT campaign_name, number_of_campaigns FROM public_awareness_campaigns ORDER BY number_of_campaigns DESC;", + "sql_explanation": "The SQL query lists the names and number of campaigns in the public_awareness_campaigns table, ordered by the number of campaigns in descending order. It does this by using the ORDER BY clause on the number_of_campaigns column in descending order, and selecting the campaign_name column." +}, { + "id": "3509", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of unique mental health treatment centers in Canada and Australia.", + "sql_context": "CREATE TABLE treatment_centers (id INT, name VARCHAR(255), country VARCHAR(255)); INSERT INTO treatment_centers (id, name, country) VALUES (1, \u0027Sunshine Mental Health\u0027, \u0027Canada\u0027); INSERT INTO treatment_centers (id, name, country) VALUES (2, \u0027Oceanic Mental Health\u0027, \u0027Australia\u0027); INSERT INTO treatment_centers (id, name, country) VALUES (3, \u0027Peak Mental Health\u0027, \u0027Canada\u0027);", + "sql": "SELECT COUNT(DISTINCT country) FROM treatment_centers WHERE country IN (\u0027Canada\u0027, \u0027Australia\u0027);", + "sql_explanation": "This query counts the number of unique countries in the treatment_centers table, filtering the data where country is either \u0027Canada\u0027 or \u0027Australia\u0027." +}, { + "id": "3768", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of patients who received talk therapy or group therapy in the \u0027treatments\u0027 schema?", + "sql_context": "CREATE TABLE treatments (treatment_id INT, patient_id INT, treatment_type VARCHAR(255)); INSERT INTO treatments (treatment_id, patient_id, treatment_type) VALUES (1, 1, \u0027Individual Therapy\u0027), (2, 2, \u0027Group Therapy\u0027), (3, 3, \u0027Medication\u0027), (4, 4, \u0027CBT\u0027), (5, 5, \u0027DBT\u0027);", + "sql": "SELECT COUNT(*) FROM treatments WHERE treatment_type IN (\u0027Talk Therapy\u0027, \u0027Group Therapy\u0027);", + "sql_explanation": "This query counts the number of patients who received talk therapy or group therapy in the \u0027treatments\u0027 schema by filtering the records where the treatment_type is either \u0027Talk Therapy\u0027 or \u0027Group Therapy\u0027 and then calculating the count of the filtered records." +}, { + "id": "3972", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of patients who received therapy in Argentina?", + "sql_context": "CREATE TABLE patients (id INT, country VARCHAR(255), age INT, therapy_received BOOLEAN); INSERT INTO patients (id, country, age, therapy_received) VALUES (1, \u0027Argentina\u0027, 30, true), (2, \u0027Argentina\u0027, 25, false);", + "sql": "SELECT AVG(age) FROM patients WHERE country \u003d \u0027Argentina\u0027 AND therapy_received \u003d true;", + "sql_explanation": "This query calculates the average age of patients in Argentina who received therapy by filtering the patients table for Argentine patients who received therapy, then calculating the average age of the resulting set." +}, { + "id": "4049", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of patients diagnosed with depression in Canada?", + "sql_context": "CREATE TABLE patients (id INT, age INT, diagnosis TEXT, country TEXT); INSERT INTO patients (id, age, diagnosis, country) VALUES (1, 35, \u0027Depression\u0027, \u0027Canada\u0027); INSERT INTO patients (id, age, diagnosis, country) VALUES (2, 42, \u0027Anxiety\u0027, \u0027USA\u0027);", + "sql": "SELECT AVG(age) FROM patients WHERE diagnosis \u003d \u0027Depression\u0027 AND country \u003d \u0027Canada\u0027;", + "sql_explanation": "Filter the patients table for rows where the diagnosis is \u0027Depression\u0027 and the country is \u0027Canada\u0027. Then, calculate the average age of the filtered rows." +}, { + "id": "4658", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of sessions attended by a patient in Spain for any therapy?", + "sql_context": "CREATE TABLE therapy_attendance (id INT, patient_id INT, session_name TEXT, num_sessions INT, country TEXT);", + "sql": "SELECT MAX(num_sessions) FROM therapy_attendance WHERE country \u003d \u0027Spain\u0027;", + "sql_explanation": "This query calculates the maximum number of sessions attended by a patient in Spain for any therapy. We use the MAX function to find the greatest num_sessions value for patients in Spain." +}, { + "id": "4750", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of patients who received psychodynamic therapy?", + "sql_context": "CREATE TABLE patients (patient_id INT, age INT, treatment VARCHAR(20)); INSERT INTO patients (patient_id, age, treatment) VALUES (1, 32, \u0027psychodynamic therapy\u0027), (2, 45, \u0027psychodynamic therapy\u0027), (3, 50, \u0027CBT\u0027);", + "sql": "SELECT AVG(age) FROM patients WHERE treatment \u003d \u0027psychodynamic therapy\u0027;", + "sql_explanation": "The SQL query calculates the average age of patients who received psychodynamic therapy by using the AVG function on the age column, filtering for rows where the treatment is psychodynamic therapy." +}, { + "id": "5063", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many patients have been treated with CBT or DBT?", + "sql_context": "CREATE TABLE treatments (patient_id INT, treatment VARCHAR(20)); INSERT INTO treatments (patient_id, treatment) VALUES (1, \u0027CBT\u0027), (2, \u0027DBT\u0027), (3, \u0027Medication\u0027), (4, \u0027CBT\u0027), (5, \u0027DBT\u0027);", + "sql": "SELECT COUNT(*) FROM treatments WHERE treatment IN (\u0027CBT\u0027, \u0027DBT\u0027);", + "sql_explanation": "This query counts the number of patients who have received Cognitive Behavioral Therapy (CBT) or Dialectical Behavior Therapy (DBT) by filtering the treatments table where the treatment column is equal to \u0027CBT\u0027 or \u0027DBT\u0027 and then counting the number of rows that match this criteria." +}, { + "id": "5199", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many mental health campaigns were launched in \u0027Southwest\u0027 region in 2021?", + "sql_context": "CREATE TABLE campaigns_2021 (campaign_id INT, name VARCHAR(50), budget INT, region VARCHAR(50)); INSERT INTO campaigns_2021 (campaign_id, name, budget, region) VALUES (1, \u0027Mental Health Matters\u0027, 15000, \u0027Northeast\u0027), (2, \u0027Break the Stigma\u0027, 20000, \u0027Southwest\u0027), (3, \u0027Healing Hearts\u0027, 12000, \u0027Midwest\u0027);", + "sql": "SELECT COUNT(*) FROM campaigns_2021 WHERE region \u003d \u0027Southwest\u0027;", + "sql_explanation": "The SQL query counts the number of mental health campaigns launched in the \u0027Southwest\u0027 region in 2021 using the COUNT() function. It filters the records where region is \u0027Southwest\u0027 and returns the number of matching records." +}, { + "id": "5303", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of patients who received cognitive behavioral therapy (CBT) or dialectical behavior therapy (DBT) in the United States?", + "sql_context": "CREATE TABLE patients (patient_id INT, therapy VARCHAR(10)); INSERT INTO patients (patient_id, therapy) VALUES (1, \u0027CBT\u0027), (2, \u0027DBT\u0027), (3, \u0027CBT\u0027), (4, \u0027NA\u0027);", + "sql": "SELECT SUM(therapy \u003d \u0027CBT\u0027 OR therapy \u003d \u0027DBT\u0027) FROM patients;", + "sql_explanation": "This SQL query calculates the total number of patients who received CBT or DBT by counting the number of rows where the therapy column is either \u0027CBT\u0027 or \u0027DBT\u0027. It uses a MySQL-specific syntax for conditional counting." +}, { + "id": "5441", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the treatment duration for the patient with ID 4 to 18 sessions.", + "sql_context": "CREATE TABLE patients (id INT PRIMARY KEY, name VARCHAR(255), age INT); INSERT INTO patients (id, name, age) VALUES (4, \u0027Alex Sam\u0027, 27); CREATE TABLE treatments (id INT PRIMARY KEY, patient_id INT, name VARCHAR(255), duration INT); INSERT INTO treatments (id, patient_id, name, duration) VALUES (2, 4, \u0027Cognitive Behavioral Therapy (CBT)\u0027, 15);", + "sql": "UPDATE treatments SET duration \u003d 18 WHERE patient_id \u003d 4;", + "sql_explanation": "This query updates the duration column value to 18 for the row with patient_id 4 in the treatments table." +}, { + "id": "5477", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the average age of patients who received therapy in 2022?", + "sql_context": "CREATE TABLE patients (id INT, name TEXT, age INT, therapy TEXT, therapy_year INT); INSERT INTO patients (id, name, age, therapy, therapy_year) VALUES (1, \u0027Alice\u0027, 30, \u0027CBT\u0027, 2022), (2, \u0027Bob\u0027, 45, \u0027DBT\u0027, 2021), (3, \u0027Charlie\u0027, 60, \u0027CBT\u0027, 2018), (4, \u0027David\u0027, 50, \u0027CBT\u0027, 2022), (5, \u0027Eve\u0027, 55, \u0027DBT\u0027, 2019);", + "sql": "SELECT AVG(age) FROM patients WHERE therapy_year \u003d 2022;", + "sql_explanation": "This SQL query calculates the average age of patients who received therapy in 2022 by using the AVG function on the age column, filtering the rows where therapy_year is 2022." +}, { + "id": "5521", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of patients who improved after therapy?", + "sql_context": "CREATE TABLE patients (patient_id INT, age INT, improvement CHAR(1)); INSERT INTO patients (patient_id, age, improvement) VALUES (1, 30, \u0027Y\u0027), (2, 25, \u0027N\u0027), (3, 45, \u0027Y\u0027);", + "sql": "SELECT AVG(age) FROM patients WHERE improvement \u003d \u0027Y\u0027;", + "sql_explanation": "The SQL query calculates the average age of patients who have shown improvement (\u0027Y\u0027) in their condition after therapy by using the AVG function." +}, { + "id": "5523", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the average age of patients who received CBT?", + "sql_context": "CREATE TABLE patients (id INT, name TEXT, age INT, treatment TEXT); INSERT INTO patients (id, name, age, treatment) VALUES (1, \u0027Alice\u0027, 30, \u0027CBT\u0027), (2, \u0027Bob\u0027, 45, \u0027DBT\u0027), (3, \u0027Charlie\u0027, 60, \u0027CBT\u0027);", + "sql": "SELECT AVG(age) FROM patients WHERE treatment \u003d \u0027CBT\u0027;", + "sql_explanation": "This SQL query calculates the average age of patients who received Cognitive Behavioral Therapy (CBT) by using the AVG function on the age column, filtering the rows where treatment is \u0027CBT\u0027." +}, { + "id": "5655", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the difference in budgets between the most expensive campaign and the least expensive campaign?", + "sql_context": "CREATE TABLE campaigns (id INT, name VARCHAR(50), location VARCHAR(50), budget INT);", + "sql": "SELECT MAX(budget) - MIN(budget) FROM campaigns;", + "sql_explanation": "This query calculates the difference between the maximum and minimum budgets of all campaigns, providing the difference in spending between the most expensive and least expensive campaigns." +}, { + "id": "1495", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many people have been vaccinated against measles in the African region in the last 5 years?", + "sql_context": "CREATE TABLE vaccinations (vaccination_id INT, patient_id INT, vaccine VARCHAR(20), date DATE); INSERT INTO vaccinations (vaccination_id, patient_id, vaccine, date) VALUES (1, 3, \u0027Measles\u0027, \u00272018-01-01\u0027); INSERT INTO vaccinations (vaccination_id, patient_id, vaccine, date) VALUES (2, 4, \u0027Influenza\u0027, \u00272020-02-01\u0027);", + "sql": "SELECT COUNT(*) FROM vaccinations WHERE vaccine \u003d \u0027Measles\u0027 AND date BETWEEN DATE_SUB(CURRENT_DATE, INTERVAL 5 YEAR) AND CURRENT_DATE AND region \u003d \u0027African\u0027", + "sql_explanation": "We count the number of vaccinations from the vaccinations table where the vaccine is measles, the date is within the last 5 years, and the region is African." +}, { + "id": "1943", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of the population that has access to clean water in Southeast Asia?", + "sql_context": "CREATE TABLE WaterData (Country VARCHAR(50), Population INT, CleanWaterPopulation INT); INSERT INTO WaterData (Country, Population, CleanWaterPopulation) VALUES (\u0027Indonesia\u0027, 273523615, 221523615), (\u0027Philippines\u0027, 113523615, 81523615);", + "sql": "SELECT Country, (CleanWaterPopulation / Population) * 100 AS PercentCleanWater FROM WaterData WHERE Country IN (\u0027Indonesia\u0027, \u0027Philippines\u0027);", + "sql_explanation": "This query calculates the percentage of the population that has access to clean water in Southeast Asia by dividing the number of people with access to clean water by the total population and multiplying by 100. It filters the data to only include Indonesia and the Philippines, which are both in Southeast Asia." +}, { + "id": "2146", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many patients with asthma in Illinois have visited a hospital in the last month?", + "sql_context": "CREATE TABLE Patients (PatientID INT, Asthma TEXT, LastHospitalVisit DATE, State TEXT); INSERT INTO Patients (PatientID, Asthma, LastHospitalVisit, State) VALUES (1, \u0027Yes\u0027, \u00272021-12-01\u0027, \u0027Illinois\u0027);", + "sql": "SELECT COUNT(*) FROM Patients WHERE Asthma IS NOT NULL AND LastHospitalVisit \u003e\u003d DATEADD(month, -1, GETDATE()) AND State \u003d \u0027Illinois\u0027;", + "sql_explanation": "This query counts the number of patients with asthma in Illinois who have visited a hospital in the last month. It does this by selecting the COUNT function on all records, and filtering the data by Asthma, LastHospitalVisit and State. It uses the DATEADD function to subtract one month from the current date." +}, { + "id": "2322", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many males in Texas have accessed healthcare services in the last month?", + "sql_context": "CREATE TABLE HealthcareAccess (ID INT, Gender VARCHAR(10), AccessDate DATE); INSERT INTO HealthcareAccess (ID, Gender, AccessDate) VALUES (1, \u0027Male\u0027, \u00272022-01-15\u0027);", + "sql": "SELECT COUNT(*) FROM HealthcareAccess WHERE Gender \u003d \u0027Male\u0027 AND AccessDate \u003e\u003d DATEADD(MONTH, -1, GETDATE()) AND State \u003d \u0027Texas\u0027;", + "sql_explanation": "This query determines the number of males in Texas who have accessed healthcare services in the last month by filtering the HealthcareAccess table based on Gender, AccessDate, and State columns using the COUNT() function to count the number of rows." +}, { + "id": "2524", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many patients in South Africa were diagnosed with Hepatitis B in 2019?", + "sql_context": "CREATE TABLE Patients (ID INT, Gender VARCHAR(10), Disease VARCHAR(20), Country VARCHAR(30), Diagnosis_Date DATE); INSERT INTO Patients (ID, Gender, Disease, Country, Diagnosis_Date) VALUES (1, \u0027Male\u0027, \u0027Hepatitis B\u0027, \u0027South Africa\u0027, \u00272019-01-01\u0027);", + "sql": "SELECT COUNT(*) FROM Patients WHERE Disease \u003d \u0027Hepatitis B\u0027 AND Country \u003d \u0027South Africa\u0027 AND YEAR(Diagnosis_Date) \u003d 2019;", + "sql_explanation": "Count the number of patients diagnosed with Hepatitis B in South Africa in 2019." +}, { + "id": "2586", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum age of unvaccinated patients against measles in Florida?", + "sql_context": "CREATE TABLE Patients (PatientID INT, Age INT, Gender TEXT, VaccinationStatus TEXT, State TEXT); INSERT INTO Patients (PatientID, Age, Gender, VaccinationStatus, State) VALUES (1, 5, \u0027Male\u0027, \u0027Not Vaccinated\u0027, \u0027Florida\u0027);", + "sql": "SELECT MIN(Age) FROM Patients WHERE VaccinationStatus \u003d \u0027Not Vaccinated\u0027 AND State \u003d \u0027Florida\u0027 AND Disease \u003d \u0027Measles\u0027;", + "sql_explanation": "This query calculates the minimum age of unvaccinated patients against measles in Florida. It does this by selecting the MIN function on the Age column, and filtering the data by VaccinationStatus, State and Disease." +}, { + "id": "2732", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of patients diagnosed with Dengue Fever in India in 2021?", + "sql_context": "CREATE TABLE Patients (ID INT, Gender VARCHAR(10), Disease VARCHAR(20), Country VARCHAR(30), Diagnosis_Date DATE); INSERT INTO Patients (ID, Gender, Disease, Country, Diagnosis_Date) VALUES (1, \u0027Male\u0027, \u0027Dengue Fever\u0027, \u0027India\u0027, \u00272021-01-01\u0027);", + "sql": "SELECT COUNT(*) FROM Patients WHERE Disease \u003d \u0027Dengue Fever\u0027 AND Country \u003d \u0027India\u0027 AND YEAR(Diagnosis_Date) \u003d 2021;", + "sql_explanation": "Count the total number of patients diagnosed with Dengue Fever in India in 2021." +}, { + "id": "2765", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average life expectancy for African American communities?", + "sql_context": "CREATE TABLE countries (id INT, name VARCHAR(100), continent VARCHAR(50), life_expectancy INT, community VARCHAR(20));", + "sql": "SELECT continent, AVG(life_expectancy) as avg_life_expectancy FROM countries WHERE community \u003d \u0027African American\u0027;", + "sql_explanation": "This query calculates the average life expectancy for African American communities by filtering for only the African American community and then calculating the average of the life_expectancy column." +}, { + "id": "3107", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many cases of tuberculosis were reported in Chicago in the past 6 months?", + "sql_context": "CREATE TABLE tb_cases (id INT, case_number INT, report_date DATE, city TEXT); INSERT INTO tb_cases (id, case_number, report_date, city) VALUES (1, 123, \u00272022-01-01\u0027, \u0027Chicago\u0027); INSERT INTO tb_cases (id, case_number, report_date, city) VALUES (2, 456, \u00272022-06-15\u0027, \u0027Chicago\u0027);", + "sql": "SELECT COUNT(*) FROM tb_cases WHERE report_date \u003e\u003d DATEADD(month, -6, CURRENT_DATE) AND city \u003d \u0027Chicago\u0027;", + "sql_explanation": "This query counts the number of tuberculosis cases reported in Chicago in the past 6 months by using the COUNT function and filtering the data for records where the report date is within the past 6 months and the city is Chicago." +}, { + "id": "3148", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new public health policy analysis data for a specific policy", + "sql_context": "CREATE TABLE public_health_policy_analysis_v2 (id INT, policy_name VARCHAR(30), impact_score INT);", + "sql": "INSERT INTO public_health_policy_analysis_v2 (id, policy_name, impact_score) VALUES (4, \u0027Policy B\u0027, 87);", + "sql_explanation": "This query inserts a new record into the public_health_policy_analysis_v2 table with id 4, policy_name as \u0027Policy B\u0027, and impact_score as 87." +}, { + "id": "3336", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many mental health facilities are there in Seattle in 2021?", + "sql_context": "CREATE TABLE Mental_Health (ID INT, Name VARCHAR(50), City VARCHAR(50), State VARCHAR(20), Year INT); INSERT INTO Mental_Health (ID, Name, City, State, Year) VALUES (1, \u0027Seattle Mental Health\u0027, \u0027Seattle\u0027, \u0027Washington\u0027, 2021); INSERT INTO Mental_Health (ID, Name, City, State, Year) VALUES (2, \u0027Seattle Counseling Center\u0027, \u0027Seattle\u0027, \u0027Washington\u0027, 2021);", + "sql": "SELECT COUNT(*) FROM Mental_Health WHERE City \u003d \u0027Seattle\u0027 AND State \u003d \u0027Washington\u0027 AND Year \u003d 2021;", + "sql_explanation": "This query calculates the number of mental health facilities in Seattle in 2021. It does so by using the COUNT function to find the number of records from the \u0027Mental_Health\u0027 table, filtered by \u0027Seattle\u0027 city, \u0027Washington\u0027 state, and 2021 year." +}, { + "id": "3463", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many confirmed COVID-19 cases were reported in \u0027clinic_data\u0027 for the month of January 2021?", + "sql_context": "CREATE SCHEMA clinic_data; CREATE TABLE covid_cases (id INT, clinic_id INT, date DATE, cases INT); INSERT INTO clinic_data.covid_cases (id, clinic_id, date, cases) VALUES (1, 1001, \u00272021-01-01\u0027, 5), (2, 1001, \u00272021-01-02\u0027, 7), (3, 1002, \u00272021-01-01\u0027, 3), (4, 1002, \u00272021-01-03\u0027, 8);", + "sql": "SELECT SUM(cases) FROM clinic_data.covid_cases WHERE date BETWEEN \u00272021-01-01\u0027 AND \u00272021-01-31\u0027;", + "sql_explanation": "This query sums the \u0027cases\u0027 column for all rows in the \u0027covid_cases\u0027 table in the \u0027clinic_data\u0027 schema where the date falls within January 2021, returning the total number of confirmed COVID-19 cases." +}, { + "id": "3696", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the infection rate of Tuberculosis in India?", + "sql_context": "CREATE TABLE Infections (Country VARCHAR(50), Disease VARCHAR(50), Infection_Rate FLOAT); INSERT INTO Infections (Country, Disease, Infection_Rate) VALUES (\u0027India\u0027, \u0027Tuberculosis\u0027, 0.27);", + "sql": "SELECT Infection_Rate FROM Infections WHERE Country \u003d \u0027India\u0027 AND Disease \u003d \u0027Tuberculosis\u0027;", + "sql_explanation": "This query retrieves the Infection_Rate value for India and Tuberculosis in the Infections table, representing the infection rate of Tuberculosis in India." +}, { + "id": "3918", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the prevalence of diabetes in South Africa?", + "sql_context": "CREATE TABLE Disease (ID INT, Country VARCHAR(100), Disease VARCHAR(50), Prevalence FLOAT); INSERT INTO Disease (ID, Country, Disease, Prevalence) VALUES (1, \u0027South Africa\u0027, \u0027Diabetes\u0027, 10.3);", + "sql": "SELECT Prevalence FROM Disease WHERE Country \u003d \u0027South Africa\u0027 AND Disease \u003d \u0027Diabetes\u0027;", + "sql_explanation": "The SQL query retrieves the prevalence of diabetes in South Africa by using the SELECT statement with the Prevalence column. It filters the data for South Africa and diabetes by using the WHERE clause with the Country and Disease columns." +}, { + "id": "3975", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many people have been vaccinated against measles in New York?", + "sql_context": "CREATE TABLE vaccinations (id INT, vaccine TEXT, date TEXT, location TEXT); INSERT INTO vaccinations (id, vaccine, date, location) VALUES (1, \u0027measles\u0027, \u00272021-01-01\u0027, \u0027New York\u0027); INSERT INTO vaccinations (id, vaccine, date, location) VALUES (2, \u0027measles\u0027, \u00272021-01-02\u0027, \u0027New York\u0027);", + "sql": "SELECT COUNT(*) FROM vaccinations WHERE vaccine \u003d \u0027measles\u0027 AND location \u003d \u0027New York\u0027;", + "sql_explanation": "The SQL query filters the vaccinations table for records with the vaccine measles and location New York, and then counts the number of rows that meet this criteria." +}, { + "id": "4169", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many hospitals are there in North America?", + "sql_context": "CREATE TABLE Country (name VARCHAR(50), hospital_count INT); INSERT INTO Country (name, hospital_count) VALUES (\u0027Canada\u0027, 1357), (\u0027United States\u0027, 6146);", + "sql": "SELECT SUM(hospital_count) FROM Country WHERE name IN (\u0027Canada\u0027, \u0027United States\u0027);", + "sql_explanation": "The SQL query sums up the number of hospitals in Canada and the United States, which are countries in North America." +}, { + "id": "4180", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many people have access to clean water in Oceania?", + "sql_context": "CREATE TABLE Continent (name VARCHAR(50), clean_water INT); INSERT INTO Continent (name, clean_water) VALUES (\u0027Australia\u0027, 21000000), (\u0027New Zealand\u0027, 4500000);", + "sql": "SELECT SUM(clean_water) FROM Continent WHERE name IN (\u0027Australia\u0027, \u0027New Zealand\u0027);", + "sql_explanation": "The SQL query sums up the number of people with access to clean water in Australia and New Zealand, which are countries in Oceania." +}, { + "id": "4204", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of infectious diseases reported in New York in 2019?", + "sql_context": "CREATE TABLE InfectiousDiseases (DiseaseID INT, State VARCHAR(20), Year INT, Disease VARCHAR(50)); INSERT INTO InfectiousDiseases (DiseaseID, State, Year, Disease) VALUES (1, \u0027New York\u0027, 2019, \u0027COVID-19\u0027); INSERT INTO InfectiousDiseases (DiseaseID, State, Year, Disease) VALUES (2, \u0027New York\u0027, 2018, \u0027Influenza\u0027);", + "sql": "SELECT COUNT(*) FROM InfectiousDiseases WHERE State \u003d \u0027New York\u0027 AND Year \u003d 2019;", + "sql_explanation": "Calculates the total number of infectious diseases reported in New York in 2019 by using the COUNT function and filtering the rows by \u0027New York\u0027 state and \u00272019\u0027 year." +}, { + "id": "4232", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records of providers in Wisconsin that do not offer mental health services.", + "sql_context": "CREATE TABLE providers (id INT, name TEXT, service TEXT, location TEXT); INSERT INTO providers (id, name, service, location) VALUES (1, \u0027Healthcare One\u0027, \u0027Primary Care\u0027, \u0027Wisconsin\u0027); INSERT INTO providers (id, name, service, location) VALUES (2, \u0027Care Central\u0027, \u0027Mental Health\u0027, \u0027Wisconsin\u0027); INSERT INTO providers (id, name, service, location) VALUES (3, \u0027Provider Plus\u0027, \u0027Primary Care\u0027, \u0027Wisconsin\u0027);", + "sql": "DELETE FROM providers WHERE location \u003d \u0027Wisconsin\u0027 AND service !\u003d \u0027Mental Health\u0027", + "sql_explanation": "This SQL query deletes records of providers in Wisconsin that do not offer mental health services by using the DELETE statement, filtered by the specified service and location." +}, { + "id": "4311", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of hospitals in New York state that offer free COVID-19 testing?", + "sql_context": "CREATE TABLE Hospitals (HospitalID INT, Name VARCHAR(50), State VARCHAR(20), FreeTesting BOOLEAN); INSERT INTO Hospitals (HospitalID, Name, State, FreeTesting) VALUES (1, \u0027Mount Sinai\u0027, \u0027New York\u0027, TRUE); INSERT INTO Hospitals (HospitalID, Name, State, FreeTesting) VALUES (2, \u0027NYU Langone\u0027, \u0027New York\u0027, TRUE);", + "sql": "SELECT COUNT(*) FROM Hospitals WHERE State \u003d \u0027New York\u0027 AND FreeTesting \u003d TRUE;", + "sql_explanation": "This query counts the number of hospitals in New York state that offer free COVID-19 testing. It uses the COUNT function to count the number of records and WHERE clause to filter the hospitals that offer free testing." +}, { + "id": "4366", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many infectious disease outbreaks were reported in the state of New York in the year 2021?", + "sql_context": "CREATE TABLE public.outbreaks (id SERIAL PRIMARY KEY, state TEXT, year INTEGER, disease TEXT); INSERT INTO public.outbreaks (state, year, disease) VALUES (\u0027New York\u0027, 2021, \u0027COVID-19\u0027), (\u0027California\u0027, 2021, \u0027Monkeypox\u0027), (\u0027Florida\u0027, 2021, \u0027Dengue\u0027);", + "sql": "SELECT COUNT(*) FROM public.outbreaks WHERE state \u003d \u0027New York\u0027 AND year \u003d 2021;", + "sql_explanation": "The SQL query counts the number of rows in the outbreaks table where the state column is equal to \u0027New York\u0027 and the year column is equal to 2021 using the COUNT function and WHERE clause." +}, { + "id": "4374", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the location of a healthcare provider with the name Dr. Johnson.", + "sql_context": "CREATE TABLE HealthcareProviders (Id INT, Name TEXT, Location TEXT, Specialty TEXT); INSERT INTO HealthcareProviders (Id, Name, Location, Specialty) VALUES (1, \u0027Dr. Smith\u0027, \u0027City X\u0027, \u0027Family Medicine\u0027); INSERT INTO HealthcareProviders (Id, Name, Location, Specialty) VALUES (2, \u0027Dr. Johnson\u0027, \u0027City X\u0027, \u0027Cardiology\u0027);", + "sql": "UPDATE HealthcareProviders SET Location \u003d \u0027City Y\u0027 WHERE Name \u003d \u0027Dr. Johnson\u0027;", + "sql_explanation": "The query updates the location of Dr. Johnson from City X to City Y." +}, { + "id": "4393", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the suicide rate in New Zealand?", + "sql_context": "CREATE TABLE Suicide (ID INT, Country VARCHAR(100), Year INT, SuicideRate FLOAT); INSERT INTO Suicide (ID, Country, Year, SuicideRate) VALUES (1, \u0027New Zealand\u0027, 2020, 12);", + "sql": "SELECT SuicideRate FROM Suicide WHERE Country \u003d \u0027New Zealand\u0027 AND Year \u003d 2020;", + "sql_explanation": "The SQL query retrieves the suicide rate in New Zealand in the year 2020 by using the SELECT statement with the SuicideRate column. It filters the data for New Zealand and the year 2020 by using the WHERE clause with the Country and Year columns." +}, { + "id": "4534", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Select the total number of vaccinations for \u0027California\u0027", + "sql_context": "CREATE TABLE vaccination_stats (id INT PRIMARY KEY, state VARCHAR(50), total_vaccinations INT); INSERT INTO vaccination_stats (id, state, total_vaccinations) VALUES (1, \u0027California\u0027, 25000000);", + "sql": "SELECT total_vaccinations FROM vaccination_stats WHERE state \u003d \u0027California\u0027;", + "sql_explanation": "1. This command selects the \u0027total_vaccinations\u0027 column from the \u0027vaccination_stats\u0027 table. 2. It filters the records to only include those where the \u0027state\u0027 column is \u0027California\u0027." +}, { + "id": "4553", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the obesity rate among children in East Africa in 2020?", + "sql_context": "CREATE TABLE obesity (country VARCHAR(255), region VARCHAR(255), year INT, rate DECIMAL(5,2)); INSERT INTO obesity (country, region, year, rate) VALUES (\u0027Country C\u0027, \u0027East Africa\u0027, 2020, 0.05), (\u0027Country D\u0027, \u0027East Africa\u0027, 2020, 0.06);", + "sql": "SELECT AVG(rate) FROM obesity WHERE region \u003d \u0027East Africa\u0027 AND year \u003d 2020;", + "sql_explanation": "Calculates the average obesity rate among children in East Africa in 2020 by averaging the rate column for rows where the region is \u0027East Africa\u0027 and year is 2020." +}, { + "id": "4832", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the names and locations of all female healthcare workers.", + "sql_context": "CREATE TABLE healthcare_workers (id INT, name VARCHAR(50), age INT, gender VARCHAR(10), location VARCHAR(50)); INSERT INTO healthcare_workers (id, name, age, gender, location) VALUES (1, \u0027John Doe\u0027, 35, \u0027Male\u0027, \u0027New York\u0027); INSERT INTO healthcare_workers (id, name, age, gender, location) VALUES (2, \u0027Jane Smith\u0027, 32, \u0027Female\u0027, \u0027California\u0027);", + "sql": "SELECT name, location FROM healthcare_workers WHERE gender \u003d \u0027Female\u0027;", + "sql_explanation": "This SQL query lists the names and locations of all female healthcare workers by selecting the \"name\" and \"location\" columns from the \"healthcare_workers\" table where the \"gender\" column is equal to \u0027Female\u0027." +}, { + "id": "4845", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of patients diagnosed with diabetes in the state of Texas?", + "sql_context": "CREATE TABLE public.patient_data (id SERIAL PRIMARY KEY, age INTEGER, diagnosis TEXT); INSERT INTO public.patient_data (age, diagnosis) VALUES (45, \u0027Diabetes\u0027), (60, \u0027Hypertension\u0027), (55, \u0027Diabetes\u0027);", + "sql": "SELECT AVG(age) FROM public.patient_data WHERE diagnosis \u003d \u0027Diabetes\u0027;", + "sql_explanation": "The SQL query calculates the average age of patients in the patient_data table where the diagnosis column is equal to \u0027Diabetes\u0027 using the AVG function." +}, { + "id": "4890", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 3 provinces in Canada with the highest number of COVID-19 cases.", + "sql_context": "CREATE TABLE canada_covid (province VARCHAR(50), cases INT); INSERT INTO canada_covid (province, cases) VALUES (\u0027Ontario\u0027, 120000), (\u0027Quebec\u0027, 150000), (\u0027British Columbia\u0027, 80000);", + "sql": "SELECT province, cases FROM canada_covid ORDER BY cases DESC LIMIT 3;", + "sql_explanation": "This SQL query orders the canada_covid table by the cases column in descending order and limits the results to the top 3 rows using the LIMIT clause." +}, { + "id": "4927", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average population of \u0027clinics\u0027 in the \u0027health_facilities\u0027 table?", + "sql_context": "CREATE TABLE health_facilities (facility_id INT, name VARCHAR(50), type VARCHAR(50), population INT, city VARCHAR(50), state VARCHAR(50));", + "sql": "SELECT AVG(population) FROM health_facilities WHERE type \u003d \u0027clinic\u0027;", + "sql_explanation": "This query calculates the average \u0027population\u0027 of rows in the \u0027health_facilities\u0027 table where the \u0027type\u0027 is \u0027clinic\u0027, giving the average population served by clinics." +}, { + "id": "4934", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all the public health policies for California and Texas.", + "sql_context": "CREATE TABLE HealthPolicies (id INT, name VARCHAR(50), state VARCHAR(50), description TEXT); INSERT INTO HealthPolicies VALUES (1, \u0027Policy A\u0027, \u0027California\u0027, \u0027Description A\u0027); INSERT INTO HealthPolicies VALUES (2, \u0027Policy B\u0027, \u0027California\u0027, \u0027Description B\u0027); INSERT INTO HealthPolicies VALUES (3, \u0027Policy C\u0027, \u0027Texas\u0027, \u0027Description C\u0027);", + "sql": "SELECT * FROM HealthPolicies WHERE state IN (\u0027California\u0027, \u0027Texas\u0027);", + "sql_explanation": "The SQL query selects all the rows from the HealthPolicies table where the state column value is either California or Texas." +}, { + "id": "5240", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the unique types of community health programs in \u0027northwest\u0027 regions.", + "sql_context": "CREATE TABLE programs (id INT, name TEXT, region TEXT, type TEXT); INSERT INTO programs (id, name, region, type) VALUES (1, \u0027Program A\u0027, \u0027northwest\u0027, \u0027prevention\u0027); INSERT INTO programs (id, name, region, type) VALUES (2, \u0027Program B\u0027, \u0027southwest\u0027, \u0027treatment\u0027); INSERT INTO programs (id, name, region, type) VALUES (3, \u0027Program C\u0027, \u0027northwest\u0027, \u0027outreach\u0027);", + "sql": "SELECT DISTINCT type FROM programs WHERE region \u003d \u0027northwest\u0027;", + "sql_explanation": "The SQL query selects distinct \u0027type\u0027 values from the \u0027programs\u0027 table, filtering for records in the \u0027northwest\u0027 region. This results in a list of unique types of community health programs in the specified region." +}, { + "id": "5554", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many hospitals are there in the state of Texas?", + "sql_context": "CREATE TABLE Hospitals (ID INT, Name VARCHAR(100), State VARCHAR(50)); INSERT INTO Hospitals (ID, Name, State) VALUES (1, \u0027MD Anderson Cancer Center\u0027, \u0027Texas\u0027), (2, \u0027Baylor University Medical Center\u0027, \u0027Texas\u0027);", + "sql": "SELECT COUNT(*) FROM Hospitals WHERE State \u003d \u0027Texas\u0027;", + "sql_explanation": "This SQL query counts the number of hospitals in the state of Texas by using the COUNT function on all rows (*) and filtering the data using the WHERE clause to only include hospitals from Texas." +}, { + "id": "5691", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of HIV diagnoses for each race in the hiv_diagnoses table?", + "sql_context": "CREATE TABLE hiv_diagnoses (race TEXT, num_diagnoses INT); INSERT INTO hiv_diagnoses (race, num_diagnoses) VALUES (\u0027White\u0027, 5000), (\u0027Black\u0027, 8000), (\u0027Hispanic\u0027, 6000), (\u0027Asian\u0027, 3000), (\u0027Other\u0027, 2000);", + "sql": "SELECT race, num_diagnoses FROM hiv_diagnoses;", + "sql_explanation": "This query returns the number of HIV diagnoses for each race in the hiv_diagnoses table. It selects the race and num_diagnoses columns from the hiv_diagnoses table and returns the number of diagnoses for each race." +}, { + "id": "5763", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many hospitals are there in each state of the USA?", + "sql_context": "CREATE TABLE States (State VARCHAR(50), HospitalCount INT); INSERT INTO States (State, HospitalCount) VALUES (\u0027California\u0027, 401), (\u0027Texas\u0027, 417), (\u0027New York\u0027, 212), (\u0027Florida\u0027, 214);", + "sql": "SELECT State, HospitalCount FROM States;", + "sql_explanation": "This SQL query retrieves the number of hospitals in each state of the USA by simply selecting the State and HospitalCount columns from the States table." +}, { + "id": "776", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new emergency calls in \u0027Seattle\u0027 for October 2021", + "sql_context": "CREATE TABLE emergency_calls(id INT, location VARCHAR(20), month_year DATE, emergency_type VARCHAR(20));", + "sql": "INSERT INTO emergency_calls(id, location, month_year, emergency_type) VALUES (1, \u0027Seattle\u0027, \u00272021-10-01\u0027, \u0027medical\u0027), (2, \u0027Seattle\u0027, \u00272021-10-02\u0027, \u0027fire\u0027), (3, \u0027Seattle\u0027, \u00272021-10-03\u0027, \u0027police\u0027);", + "sql_explanation": "*Insert three new records with the specified values for \u0027Seattle\u0027 and October 2021*" +}, { + "id": "1592", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum response time for any emergency in Miami-Dade County in 2021?", + "sql_context": "CREATE TABLE emergency_responses (id INT, incident_id INT, response_time INT, city VARCHAR(255), state VARCHAR(255), county VARCHAR(255)); INSERT INTO emergency_responses (id, incident_id, response_time, city, state, county) VALUES (1, 1, 15, \u0027Miami\u0027, \u0027Florida\u0027, \u0027Miami-Dade County\u0027); INSERT INTO emergency_responses (id, incident_id, response_time, city, state, county) VALUES (2, 2, 8, \u0027Miami Beach\u0027, \u0027Florida\u0027, \u0027Miami-Dade County\u0027);", + "sql": "SELECT MAX(response_time) FROM emergency_responses WHERE county \u003d \u0027Miami-Dade County\u0027 AND reported_date \u003e\u003d \u00272021-01-01\u0027 AND reported_date \u003c \u00272022-01-01\u0027;", + "sql_explanation": "This query calculates the maximum response time for any emergency in Miami-Dade County in 2021 by filtering on the county and reported date within the specified range and then calculating the maximum response time." +}, { + "id": "1690", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new disaster preparedness event with id 2001, name \u0027Earthquake Drill\u0027, date 2023-09-15, and location \u0027Los Angeles\u0027.", + "sql_context": "CREATE TABLE disaster_preparedness_events (event_id INT, event_name VARCHAR(255), event_date DATE, location VARCHAR(255)); INSERT INTO disaster_preparedness_events (event_id, event_name, event_date, location) VALUES (1001, \u0027National Night Out\u0027, \u00272023-08-01\u0027, \u0027Central Park\u0027); INSERT INTO disaster_preparedness_events (event_id, event_name, event_date, location) VALUES (1002, \u0027First Aid Training\u0027, \u00272023-08-10\u0027, \u0027New York Public Library\u0027);", + "sql": "INSERT INTO disaster_preparedness_events (event_id, event_name, event_date, location) VALUES (2001, \u0027Earthquake Drill\u0027, \u00272023-09-15\u0027, \u0027Los Angeles\u0027);", + "sql_explanation": "This query inserts a new row into the disaster_preparedness_events table with the specified values for the event_id, event_name, event_date, and location columns." +}, { + "id": "1962", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the maximum response time for medical emergencies in Miami in 2022?", + "sql_context": "CREATE TABLE medical_emergencies (id INT, incident_time TIMESTAMP, response_time INT); INSERT INTO medical_emergencies (id, incident_time, response_time) VALUES (1, \u00272022-02-01 14:30:00\u0027, 15), (2, \u00272022-02-03 09:15:00\u0027, 20);", + "sql": "SELECT MAX(response_time) FROM medical_emergencies WHERE incident_time \u003e\u003d \u00272022-01-01\u0027 AND incident_time \u003c \u00272022-02-01\u0027 AND city \u003d \u0027Miami\u0027;", + "sql_explanation": "The SQL query calculates the maximum response time for medical emergencies in Miami in 2022. It filters the medical_emergencies table by the given date range and city and calculates the maximum response time using the MAX function." +}, { + "id": "2030", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total number of community policing events held in \u0027northside\u0027 and \u0027southside\u0027 in Q3 of 2019?", + "sql_context": "CREATE TABLE community_policing (id INT, event_type VARCHAR(20), location VARCHAR(20), event_date DATE); INSERT INTO community_policing (id, event_type, location, event_date) VALUES (1, \u0027meeting\u0027, \u0027northside\u0027, \u00272019-07-01\u0027);", + "sql": "SELECT SUM(*) FROM community_policing WHERE location IN (\u0027northside\u0027, \u0027southside\u0027) AND event_date BETWEEN \u00272019-07-01\u0027 AND \u00272019-09-30\u0027;", + "sql_explanation": "The SQL query calculates the total number of community policing events held in \u0027northside\u0027 and \u0027southside\u0027 during Q3 of 2019 by filtering the \u0027community_policing\u0027 table by location and date range and then summing the number of records using the SUM() function." +}, { + "id": "2277", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum response time for emergency calls in \u0027South End\u0027 and \u0027North Pointe\u0027 last month?", + "sql_context": "CREATE TABLE emergency_calls (id INT, region VARCHAR(20), response_time INT, month INT);", + "sql": "SELECT MIN(response_time) FROM emergency_calls WHERE region IN (\u0027South End\u0027, \u0027North Pointe\u0027) AND month \u003d MONTH(CURRENT_DATE) - 1;", + "sql_explanation": "This SQL query calculates the minimum response time for emergency calls in \u0027South End\u0027 and \u0027North Pointe\u0027 last month by using the MIN function on the response_time column, filtered by the regions \u0027South End\u0027 and \u0027North Pointe\u0027 and the last month." +}, { + "id": "2359", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average response time for emergency calls in the \u0027county\u0027 schema in Q2 2022?", + "sql_context": "CREATE SCHEMA if not exists county; CREATE TABLE if not exists county.emergency_responses (id INT, response_time TIME, call_date DATE); INSERT INTO county.emergency_responses (id, response_time, call_date) VALUES (1, \u002701:34:00\u0027, \u00272022-04-25\u0027), (2, \u002702:15:00\u0027, \u00272022-06-12\u0027), (3, \u002701:52:00\u0027, \u00272022-07-03\u0027);", + "sql": "SELECT AVG(TIME_TO_SEC(response_time)) FROM county.emergency_responses WHERE QUARTER(call_date) \u003d 2 AND YEAR(call_date) \u003d 2022;", + "sql_explanation": "This SQL query calculates the average response time for emergency calls in the \u0027county\u0027 schema in Q2 2022. It uses the AVG() function to find the mean value of the response_time column (converted to seconds using TIME_TO_SEC()) from the emergency_responses table in the \u0027county\u0027 schema where the call_date is in Q2 2022." +}, { + "id": "3255", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average response time for fire incidents in the \u0027Northside\u0027 precinct?", + "sql_context": "CREATE TABLE precinct (id INT, name VARCHAR(50)); INSERT INTO precinct (id, name) VALUES (1, \u0027Northside\u0027); CREATE TABLE incident (id INT, precinct_id INT, type VARCHAR(50), timestamp TIMESTAMP, response_time INT);", + "sql": "SELECT AVG(response_time) as avg_response_time FROM incident WHERE precinct_id \u003d 1 AND type \u003d \u0027fire\u0027;", + "sql_explanation": "This query calculates the average response time for fire incidents in the Northside precinct by filtering on precinct_id and type and then calculating the average of the response_time column." +}, { + "id": "3502", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average response time for fire calls in 2021 and 2022?", + "sql_context": "CREATE TABLE fire_calls (call_id INT, call_date DATE, response_time INT); INSERT INTO fire_calls (call_id, call_date, response_time) VALUES (1, \u00272021-01-01\u0027, 15), (2, \u00272022-02-03\u0027, 12);", + "sql": "SELECT AVG(response_time) FROM fire_calls WHERE call_date BETWEEN \u00272021-01-01\u0027 AND \u00272022-12-31\u0027;", + "sql_explanation": "The query calculates the average response time for fire calls that occurred between Jan 1, 2021 and Dec 31, 2022 by summing up the response_time column values and dividing by the count of calls during that time period." +}, { + "id": "3745", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many traffic accidents occurred in the state of New York in the year 2020?", + "sql_context": "CREATE TABLE TrafficAccidents (id INT, state VARCHAR(20), year INT, accident_count INT);", + "sql": "SELECT SUM(accident_count) FROM TrafficAccidents WHERE state \u003d \u0027New York\u0027 AND year \u003d 2020;", + "sql_explanation": "This query calculates the total number of traffic accidents in the state of New York in the year 2020 by summing the accident_count column where the state is \u0027New York\u0027 and the year is 2020." +}, { + "id": "3805", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of police officers on patrol in the \u0027downtown\u0027 area during night shifts?", + "sql_context": "CREATE TABLE police_patrols (id INT, area VARCHAR(20), shift VARCHAR(10), num_officers INT); INSERT INTO police_patrols (id, area, shift, num_officers) VALUES (1, \u0027downtown\u0027, \u0027night\u0027, 5), (2, \u0027uptown\u0027, \u0027night\u0027, 3), (3, \u0027downtown\u0027, \u0027day\u0027, 7);", + "sql": "SELECT AVG(num_officers) FROM police_patrols WHERE area \u003d \u0027downtown\u0027 AND shift \u003d \u0027night\u0027;", + "sql_explanation": "The SQL query calculates the average number of police officers on patrol in the \u0027downtown\u0027 area during night shifts by using the AVG() function on the num_officers column, and filtering the data by the area and shift columns with the WHERE clause." +}, { + "id": "3810", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the call types and dates for all calls in \u0027downtown_police\u0027 that occurred after \u00272022-01-02 12:00:00\u0027?", + "sql_context": "CREATE TABLE downtown_police (id INT, call_type VARCHAR(20), call_date TIMESTAMP); INSERT INTO downtown_police VALUES (1, \u0027theft\u0027, \u00272022-01-03 14:00:00\u0027);", + "sql": "SELECT call_type, call_date FROM downtown_police WHERE call_date \u003e \u00272022-01-02 12:00:00\u0027;", + "sql_explanation": "We create the \u0027downtown_police\u0027 table and insert a record. The \u0027sql\u0027 query selects call types and dates for calls after \u00272022-01-02 12:00:00\u0027." +}, { + "id": "3859", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the \u0027community_policing\u0027 table to reflect the current number of community meetings held this year", + "sql_context": "CREATE TABLE community_policing (cp_id INT, did INT, meetings_this_year INT); INSERT INTO community_policing (cp_id, did, meetings_this_year) VALUES (1, 1, 5), (2, 2, 3), (3, 3, 7);", + "sql": "UPDATE community_policing SET meetings_this_year \u003d meetings_this_year + 2 WHERE did \u003d 1;", + "sql_explanation": "Update the community_policing table to increment the number of community meetings held this year by 2 for the district with ID 1." +}, { + "id": "3922", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many community policing events were held in 2020 and 2021?", + "sql_context": "CREATE TABLE events (event_id INT, year INT, type VARCHAR(255));", + "sql": "SELECT COUNT(*) FROM events WHERE year IN (2020, 2021) AND type \u003d \u0027Community Policing\u0027;", + "sql_explanation": "Filters events table for 2020 and 2021 and community policing events, then counts the number of rows." +}, { + "id": "4104", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many emergency calls were received in the \u0027east\u0027 region with a response time greater than 180 minutes?", + "sql_context": "CREATE TABLE emergency_calls (id INT, region VARCHAR(10), response_time INT); INSERT INTO emergency_calls (id, region, response_time) VALUES (1, \u0027west\u0027, 120), (2, \u0027west\u0027, 150), (3, \u0027east\u0027, 195), (4, \u0027north\u0027, 105);", + "sql": "SELECT COUNT(*) FROM emergency_calls WHERE region \u003d \u0027east\u0027 AND response_time \u003e 180;", + "sql_explanation": "The SQL query counts the number of emergency calls received in the \u0027east\u0027 region with a response time greater than 180 minutes by using the COUNT() function with a WHERE clause to filter the data by the region and response_time columns." +}, { + "id": "4149", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record for a disaster with ID 11, name \u0027Tornado\u0027, and start date 2022-03-01 into the \"disasters\" table", + "sql_context": "CREATE TABLE disasters (id INT PRIMARY KEY, name TEXT, start_date DATE);", + "sql": "INSERT INTO disasters (id, name, start_date) VALUES (11, \u0027Tornado\u0027, \u00272022-03-01\u0027);", + "sql_explanation": "The INSERT statement adds a new record to the \"disasters\" table with the specified ID, name, and start date." +}, { + "id": "4451", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the \"disaster_preparedness\" table to set the \"preparedness_score\" to 80 for the \"region_id\" 2", + "sql_context": "CREATE TABLE disaster_preparedness (region_id INT, preparedness_score INT); INSERT INTO disaster_preparedness (region_id, preparedness_score) VALUES (1, 70), (2, 60), (3, 90);", + "sql": "UPDATE disaster_preparedness SET preparedness_score \u003d 80 WHERE region_id \u003d 2;", + "sql_explanation": "The UPDATE statement modifies the \"preparedness_score\" field to 80 for the record with \"region_id\" of 2 in the \"disaster_preparedness\" table." +}, { + "id": "4685", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average response time for emergency calls in the \u0027Mountain\u0027 region?", + "sql_context": "CREATE TABLE emergency_calls (id INT, region VARCHAR(20), response_time INT); INSERT INTO emergency_calls (id, region, response_time) VALUES (1, \u0027Mountain\u0027, 100), (2, \u0027Mountain\u0027, 120), (3, \u0027Mountain\u0027, 85);", + "sql": "SELECT AVG(response_time) FROM emergency_calls WHERE region \u003d \u0027Mountain\u0027;", + "sql_explanation": "This SQL query calculates the average response time for emergency calls in the \u0027Mountain\u0027 region by selecting the average value of the \u0027response_time\u0027 column, where the \u0027region\u0027 column is equal to \u0027Mountain\u0027." +}, { + "id": "4688", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many police officers are employed in the city of Chicago?", + "sql_context": "CREATE TABLE public.police_department (id SERIAL PRIMARY KEY, city VARCHAR(255), num_officers INTEGER); INSERT INTO public.police_department (city, num_officers) VALUES (\u0027Chicago\u0027, 12000), (\u0027New York\u0027, 35000), (\u0027Los Angeles\u0027, 10000);", + "sql": "SELECT num_officers FROM public.police_department WHERE city \u003d \u0027Chicago\u0027;", + "sql_explanation": "This query retrieves the number of police officers employed in the city of Chicago by selecting the value of the num_officers column for the row where the city is \u0027Chicago\u0027." +}, { + "id": "4723", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total response time for emergency calls in the \u0027Western\u0027 region?", + "sql_context": "CREATE TABLE emergency_calls (id INT, region VARCHAR(20), response_time INT); INSERT INTO emergency_calls (id, region, response_time) VALUES (1, \u0027Western\u0027, 120);", + "sql": "SELECT SUM(response_time) FROM emergency_calls WHERE region \u003d \u0027Western\u0027;", + "sql_explanation": "This SQL query calculates the total response time for emergency calls in the \u0027Western\u0027 region by selecting the sum of the \u0027response_time\u0027 column, where the \u0027region\u0027 column is equal to \u0027Western\u0027." +}, { + "id": "4834", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of crimes committed in a single day in New York City?", + "sql_context": "CREATE TABLE Crimes (id INT, city VARCHAR(20), date DATE, number_of_crimes INT);", + "sql": "SELECT MAX(number_of_crimes) FROM Crimes WHERE city \u003d \u0027New York City\u0027;", + "sql_explanation": "This SQL query calculates the maximum number of crimes committed in a single day in New York City by selecting MAX function on the number_of_crimes column and filtering the data where the city is New York City." +}, { + "id": "4899", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the response time for medical emergencies in the city of Boston to 7 minutes.", + "sql_context": "CREATE TABLE boston_medical_emergencies (id INT, response_time INT); INSERT INTO boston_medical_emergencies (id, response_time) VALUES (1, 8);", + "sql": "UPDATE boston_medical_emergencies SET response_time \u003d 7 WHERE id \u003d 1;", + "sql_explanation": "The SQL query updates the response time for medical emergencies in the city of Boston to 7 minutes by specifying an UPDATE statement with the appropriate WHERE clause to filter records based on the id." +}, { + "id": "5260", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of emergency calls in the state of New York?", + "sql_context": "CREATE TABLE emergency_calls (id INT, state VARCHAR(20), response_time FLOAT); INSERT INTO emergency_calls (id, state, response_time) VALUES (1, \u0027New York\u0027, 5.2), (2, \u0027New York\u0027, 6.1), (3, \u0027California\u0027, 4.9);", + "sql": "SELECT COUNT(*) FROM emergency_calls WHERE state \u003d \u0027New York\u0027;", + "sql_explanation": "This SQL query counts the number of rows in the emergency_calls table where the state is \u0027New York\u0027." +}, { + "id": "5499", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all incidents recorded after 2022-12-31 from the \u0027incidents\u0027 table", + "sql_context": "CREATE TABLE incidents (id INT, incident_type VARCHAR(255), location VARCHAR(255), occurred_on DATE);", + "sql": "DELETE FROM incidents WHERE occurred_on \u003e \u00272022-12-31\u0027;", + "sql_explanation": "This query deletes all records from the \u0027incidents\u0027 table where the \u0027occurred_on\u0027 date is after 2022-12-31." +}, { + "id": "487", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average communication budget for organizations focusing on climate education in 2020?", + "sql_context": "CREATE TABLE climate_education_budget (org_name VARCHAR(50), year INT, budget FLOAT); INSERT INTO climate_education_budget (org_name, year, budget) VALUES (\u0027Eco-Schools\u0027, 2020, 750000), (\u0027Teachers for Future\u0027, 2020, 850000), (\u0027Green Schools Initiative\u0027, 2020, 950000), (\u0027Climate Education Project\u0027, 2020, 1050000), (\u0027Climate Change Connection\u0027, 2020, 1150000);", + "sql": "SELECT AVG(budget) as avg_budget FROM climate_education_budget WHERE org_name IN (\u0027Eco-Schools\u0027, \u0027Teachers for Future\u0027, \u0027Green Schools Initiative\u0027, \u0027Climate Education Project\u0027, \u0027Climate Change Connection\u0027) AND year \u003d 2020;", + "sql_explanation": "The SQL query calculates the average of the budget for the specified organizations in 2020." +}, { + "id": "1243", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total climate finance provided by organization X and organization W in the climate finance sector for 2020?", + "sql_context": "CREATE TABLE climate_finance (organization TEXT, contribution DECIMAL(10, 2), year INT, sector TEXT); INSERT INTO climate_finance (organization, contribution, year, sector) VALUES (\u0027Organization X\u0027, 850000, 2020, \u0027climate finance\u0027), (\u0027Organization X\u0027, 900000, 2021, \u0027climate finance\u0027), (\u0027Organization X\u0027, 950000, 2022, \u0027climate finance\u0027), (\u0027Organization W\u0027, 750000, 2020, \u0027climate finance\u0027), (\u0027Organization W\u0027, 800000, 2021, \u0027climate finance\u0027), (\u0027Organization W\u0027, 850000, 2022, \u0027climate finance\u0027);", + "sql": "SELECT SUM(contribution) FROM climate_finance WHERE (organization \u003d \u0027Organization X\u0027 OR organization \u003d \u0027Organization W\u0027) AND sector \u003d \u0027climate finance\u0027 AND year \u003d 2020;", + "sql_explanation": "This SQL query calculates the total climate finance provided by organization X and organization W in the climate finance sector for 2020 by summing up the \u0027contribution\u0027 column where the \u0027organization\u0027 is either \u0027Organization X\u0027 or \u0027Organization W\u0027, \u0027sector\u0027 is \u0027climate finance\u0027, and \u0027year\u0027 is 2020." +}, { + "id": "1399", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Average investment in climate adaptation in Asia", + "sql_context": "CREATE TABLE climate_adaptation_investment (id INT, project_name VARCHAR(255), sector VARCHAR(255), country VARCHAR(255), year INT, investment FLOAT);", + "sql": "SELECT AVG(investment) FROM climate_adaptation_investment WHERE country IN (\u0027China\u0027, \u0027India\u0027, \u0027Indonesia\u0027, \u0027Japan\u0027, \u0027Vietnam\u0027) AND sector \u003d \u0027Climate adaptation\u0027;", + "sql_explanation": "Calculate the average investment in climate adaptation projects in Asia." +}, { + "id": "1581", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 3 countries with the most funding for climate communication programs in 2020 and their respective funding amounts.", + "sql_context": "CREATE TABLE climate_funding (country VARCHAR(255), year INT, funding_amount FLOAT);", + "sql": "SELECT country, funding_amount FROM climate_funding WHERE year \u003d 2020 AND funding_program \u003d \u0027climate communication\u0027 ORDER BY funding_amount DESC LIMIT 3;", + "sql_explanation": "This query identifies the top 3 countries with the most funding for climate communication programs in 2020. It filters the climate_funding table to only include records from 2020 and where the funding_program is \u0027climate communication\u0027. The query uses the ORDER BY clause to sort the results in descending order based on the funding_amount, and the LIMIT clause to only return the top 3 records." +}, { + "id": "1612", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total climate finance provided by organization Y and organization Z in the climate finance sector?", + "sql_context": "CREATE TABLE climate_finance (organization TEXT, contribution DECIMAL(10, 2), year INT, sector TEXT); INSERT INTO climate_finance (organization, contribution, year, sector) VALUES (\u0027Organization Y\u0027, 700000, 2018, \u0027climate finance\u0027), (\u0027Organization Y\u0027, 750000, 2019, \u0027climate finance\u0027), (\u0027Organization Y\u0027, 800000, 2020, \u0027climate finance\u0027), (\u0027Organization Z\u0027, 650000, 2018, \u0027climate finance\u0027), (\u0027Organization Z\u0027, 700000, 2019, \u0027climate finance\u0027), (\u0027Organization Z\u0027, 750000, 2020, \u0027climate finance\u0027);", + "sql": "SELECT SUM(contribution) FROM climate_finance WHERE (organization \u003d \u0027Organization Y\u0027 OR organization \u003d \u0027Organization Z\u0027) AND sector \u003d \u0027climate finance\u0027;", + "sql_explanation": "This SQL query calculates the total climate finance provided by organization Y and organization Z in the climate finance sector by summing up the \u0027contribution\u0027 column where the \u0027organization\u0027 is either \u0027Organization Y\u0027 or \u0027Organization Z\u0027 and \u0027sector\u0027 is \u0027climate finance\u0027." +}, { + "id": "1774", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all climate mitigation projects in the Middle East and North Africa that started after 2017.", + "sql_context": "CREATE TABLE climate_mitigation (project_id INT, project_name VARCHAR(100), start_year INT, region VARCHAR(50), status VARCHAR(50));", + "sql": "SELECT project_id, project_name FROM climate_mitigation WHERE region \u003d \u0027Middle East and North Africa\u0027 AND start_year \u003e 2017 AND status \u003d \u0027active\u0027;", + "sql_explanation": "This query retrieves all climate mitigation projects in the Middle East and North Africa that started after 2017 by selecting the \u0027project_id\u0027 and \u0027project_name\u0027 columns where the \u0027region\u0027 is \u0027Middle East and North Africa\u0027, the \u0027start_year\u0027 is greater than 2017, and the \u0027status\u0027 is \u0027active\u0027." +}, { + "id": "1837", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of climate finance invested in renewable energy projects in the Latin America and Caribbean region in 2022?", + "sql_context": "CREATE TABLE climate_finance (country VARCHAR(255), sector VARCHAR(255), investment_amount NUMERIC, region VARCHAR(255), year INT);", + "sql": "SELECT SUM(investment_amount) FROM climate_finance WHERE sector \u003d \u0027renewable energy\u0027 AND region \u003d \u0027Latin America and Caribbean\u0027 AND year \u003d 2022;", + "sql_explanation": "The SQL query calculates the total amount of climate finance invested in renewable energy projects in the Latin America and Caribbean region in 2022. It does this by summing the investment_amount for each row where the sector is \u0027renewable energy\u0027, the region is \u0027Latin America and Caribbean\u0027 and the year is 2022." +}, { + "id": "2061", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average amount of climate finance provided to projects in Africa for renewable energy by the Climate and Clean Air Coalition?", + "sql_context": "CREATE TABLE climate_and_clean_air_coalition (fund_id INT, project_name VARCHAR(100), country VARCHAR(50), sector VARCHAR(50), amount FLOAT, renewable_energy_flag BOOLEAN); INSERT INTO climate_and_clean_air_coalition (fund_id, project_name, country, sector, amount, renewable_energy_flag) VALUES (1, \u0027Solar Power Africa\u0027, \u0027Africa\u0027, \u0027Energy\u0027, 5000000, TRUE);", + "sql": "SELECT AVG(amount) FROM climate_and_clean_air_coalition WHERE country \u003d \u0027Africa\u0027 AND sector \u003d \u0027Energy\u0027 AND renewable_energy_flag \u003d TRUE;", + "sql_explanation": "This query calculates the average amount of climate finance (using the AVG function) provided to projects in Africa (specified in the \u0027country\u0027 column) for renewable energy (filtered by sector \u003d \u0027Energy\u0027 and renewable_energy_flag \u003d TRUE) by the Climate and Clean Air Coalition by specifying the appropriate filters." +}, { + "id": "2140", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total climate change adaptation budget for African countries in 2024?", + "sql_context": "CREATE TABLE climate_adaptation (country VARCHAR(50), year INT, budget FLOAT); INSERT INTO climate_adaptation (country, year, budget) VALUES (\u0027Kenya\u0027, 2024, 8000000), (\u0027Nigeria\u0027, 2024, 10000000), (\u0027South Africa\u0027, 2024, 12000000), (\u0027Egypt\u0027, 2024, 9000000), (\u0027Algeria\u0027, 2024, 7000000);", + "sql": "SELECT SUM(budget) FROM climate_adaptation WHERE country IN (\u0027Kenya\u0027, \u0027Nigeria\u0027, \u0027South Africa\u0027, \u0027Egypt\u0027, \u0027Algeria\u0027) AND year \u003d 2024;", + "sql_explanation": "This query calculates the total climate change adaptation budget for African countries in 2024 by summing the \u0027budget\u0027 column where the country is in the provided list and the year is 2024." +}, { + "id": "2187", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many climate mitigation initiatives were implemented in Arctic in 2021?", + "sql_context": "CREATE TABLE Initiatives (Year INT, Region VARCHAR(20), Status VARCHAR(20), Type VARCHAR(20)); INSERT INTO Initiatives (Year, Region, Status, Type) VALUES (2021, \u0027Arctic\u0027, \u0027Implemented\u0027, \u0027Climate Mitigation\u0027);", + "sql": "SELECT COUNT(*) FROM Initiatives WHERE Year \u003d 2021 AND Region \u003d \u0027Arctic\u0027 AND Type \u003d \u0027Climate Mitigation\u0027 AND Status \u003d \u0027Implemented\u0027;", + "sql_explanation": "This query counts the number of climate mitigation initiatives implemented in Arctic in 2021 by counting the rows where the \u0027Year\u0027 is 2021, \u0027Region\u0027 is \u0027Arctic\u0027, \u0027Type\u0027 is \u0027Climate Mitigation\u0027, and \u0027Status\u0027 is \u0027Implemented\u0027." +}, { + "id": "2287", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new records into the \u0027renewable_energy_investment\u0027 table for \u0027Solar\u0027 and \u0027Wind\u0027 with amounts 10000000 and 15000000, respectively, and year 2022", + "sql_context": "CREATE TABLE renewable_energy_investment (energy_source VARCHAR(255), amount INT, year INT);", + "sql": "INSERT INTO renewable_energy_investment (energy_source, amount, year) VALUES (\u0027Solar\u0027, 10000000, 2022), (\u0027Wind\u0027, 15000000, 2022);", + "sql_explanation": "The query adds new records into the renewable_energy_investment table, specifying the energy_source, amount, and year columns for \u0027Solar\u0027 and \u0027Wind\u0027 investments." +}, { + "id": "2382", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all climate communication projects in Europe that started after 2012.", + "sql_context": "CREATE TABLE climate_projects (project_id INT, project_name TEXT, location TEXT, project_type TEXT, start_year INT); INSERT INTO climate_projects (project_id, project_name, location, project_type, start_year) VALUES (1, \u0027Communication 1\u0027, \u0027France\u0027, \u0027climate communication\u0027, 2013), (2, \u0027Mitigation 1\u0027, \u0027Germany\u0027, \u0027climate mitigation\u0027, 2015), (3, \u0027Adaptation 1\u0027, \u0027Spain\u0027, \u0027climate adaptation\u0027, 2010);", + "sql": "SELECT * FROM climate_projects WHERE project_type \u003d \u0027climate communication\u0027 AND location LIKE \u0027Europe%\u0027 AND start_year \u003e 2012;", + "sql_explanation": "The SQL query lists all climate communication projects in Europe that started after 2012 by selecting all columns from the climate_projects table where project_type is \u0027climate communication\u0027, location starts with \u0027Europe\u0027, and start_year is greater than 2012." +}, { + "id": "2556", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total climate finance for women-led projects in Africa in the energy sector?", + "sql_context": "CREATE TABLE climate_finance_for_women (fund_id INT, project_name VARCHAR(100), country VARCHAR(50), sector VARCHAR(50), amount FLOAT, gender_flag BOOLEAN); INSERT INTO climate_finance_for_women (fund_id, project_name, country, sector, amount, gender_flag) VALUES (1, \u0027Solar Power for Women\u0027, \u0027Kenya\u0027, \u0027Energy\u0027, 3000000, TRUE);", + "sql": "SELECT SUM(amount) FROM climate_finance_for_women WHERE country \u003d \u0027Africa\u0027 AND sector \u003d \u0027Energy\u0027 AND gender_flag \u003d TRUE;", + "sql_explanation": "This query calculates the total climate finance provided to women-led projects (filtered by gender_flag \u003d TRUE) in Africa (specified in the \u0027country\u0027 column) in the energy sector (filtered by sector \u003d \u0027Energy\u0027) by using the SUM function." +}, { + "id": "2574", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which climate adaptation projects were completed in Africa in 2018?", + "sql_context": "CREATE TABLE Projects (Year INT, Region VARCHAR(20), Status VARCHAR(20), Type VARCHAR(20)); INSERT INTO Projects (Year, Region, Status, Type) VALUES (2018, \u0027Africa\u0027, \u0027Completed\u0027, \u0027Climate Adaptation\u0027);", + "sql": "SELECT * FROM Projects WHERE Year \u003d 2018 AND Region \u003d \u0027Africa\u0027 AND Type \u003d \u0027Climate Adaptation\u0027 AND Status \u003d \u0027Completed\u0027;", + "sql_explanation": "This query selects all completed climate adaptation projects in Africa in 2018 by selecting the rows where the \u0027Year\u0027 is 2018, \u0027Region\u0027 is \u0027Africa\u0027, \u0027Type\u0027 is \u0027Climate Adaptation\u0027, and \u0027Status\u0027 is \u0027Completed\u0027." +}, { + "id": "2577", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the combined climate finance allocation for adaptation projects in Africa and South America?", + "sql_context": "CREATE TABLE climate_finance (region VARCHAR(50), amount FLOAT, sector VARCHAR(50)); INSERT INTO climate_finance (region, amount, sector) VALUES (\u0027Asia\u0027, 6000000, \u0027Mitigation\u0027), (\u0027Africa\u0027, 4000000, \u0027Mitigation\u0027), (\u0027South America\u0027, 5000000, \u0027Adaptation\u0027);", + "sql": "SELECT SUM(amount) FROM climate_finance WHERE (region \u003d \u0027Africa\u0027 OR region \u003d \u0027South America\u0027) AND sector \u003d \u0027Adaptation\u0027;", + "sql_explanation": "The SQL query sums the amount column from the climate_finance table, filtering for rows where the region is either Africa or South America and the sector is Adaptation, representing the combined climate finance allocation for adaptation projects in Africa and South America." +}, { + "id": "2900", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total climate finance provided by the World Bank for all climate initiatives in Africa since 2010.", + "sql_context": "CREATE TABLE climate_finance (id INT, provider VARCHAR(100), initiative VARCHAR(100), amount FLOAT, year INT, location VARCHAR(100)); INSERT INTO climate_finance (id, provider, initiative, amount, year, location) VALUES (1, \u0027World Bank\u0027, \u0027Climate Communication\u0027, 10000000, 2015, \u0027Africa\u0027), (2, \u0027UNDP\u0027, \u0027Climate Adaptation\u0027, 15000000, 2016, \u0027Asia\u0027);", + "sql": "SELECT SUM(amount) FROM climate_finance WHERE provider \u003d \u0027World Bank\u0027 AND location \u003d \u0027Africa\u0027 AND year \u003e\u003d 2010;", + "sql_explanation": "This query calculates the sum of the \u0027amount\u0027 column for all records where the \u0027provider\u0027 is \u0027World Bank\u0027, the \u0027location\u0027 is \u0027Africa\u0027, and the \u0027year\u0027 is greater than or equal to 2010." +}, { + "id": "2937", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which climate adaptation measures resulted in over 90% success rate in South America?", + "sql_context": "CREATE TABLE adaptation_measures (measure VARCHAR(50), location VARCHAR(50), success_rate NUMERIC); INSERT INTO adaptation_measures (measure, location, success_rate) VALUES (\u0027Construction of flood barriers\u0027, \u0027South America\u0027, 0.92), (\u0027Improved irrigation systems\u0027, \u0027South America\u0027, 0.95);", + "sql": "SELECT measure, success_rate FROM adaptation_measures WHERE location \u003d \u0027South America\u0027 AND success_rate \u003e 0.9;", + "sql_explanation": "The SQL query identifies the adaptation measures with a success rate over 90% in South America. It filters the records based on the location \u0027South America\u0027 and success rate greater than 0.9." +}, { + "id": "3066", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average temperature in \u0027Sydney\u0027 for July?", + "sql_context": "CREATE TABLE weather (city VARCHAR(255), temperature FLOAT, date DATE); INSERT INTO weather (city, temperature, date) VALUES (\u0027Sydney\u0027, 60, \u00272022-07-01\u0027), (\u0027Sydney\u0027, 65, \u00272022-07-02\u0027), (\u0027Sydney\u0027, 62, \u00272022-07-03\u0027);", + "sql": "SELECT AVG(temperature) FROM weather WHERE city \u003d \u0027Sydney\u0027 AND date BETWEEN \u00272022-07-01\u0027 AND \u00272022-07-31\u0027;", + "sql_explanation": "The SQL query calculates the average temperature for Sydney in July by selecting the temperature values for Sydney in July and using the AVG function to find the average." +}, { + "id": "3168", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of climate communication campaigns launched in the European Union since 2015?", + "sql_context": "CREATE TABLE climate_communication (id INT, campaign VARCHAR(255), location VARCHAR(255), launch_year INT);", + "sql": "SELECT SUM(*) FROM climate_communication WHERE location LIKE \u0027%European Union%\u0027 AND launch_year \u003e\u003d 2015;", + "sql_explanation": "This query calculates the total number of climate communication campaigns launched in the European Union since 2015. It does this by using the SUM function and filtering the data where location contains \u0027European Union\u0027 and launch_year is greater than or equal to 2015." +}, { + "id": "3244", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many climate adaptation projects were initiated in Asia in Q1 2021?", + "sql_context": "CREATE TABLE climate_projects (region VARCHAR(50), quarter INT, year INT, project_count INT); INSERT INTO climate_projects VALUES (\u0027Asia\u0027, 1, 2021, 120);", + "sql": "SELECT SUM(project_count) FROM climate_projects WHERE region \u003d \u0027Asia\u0027 AND quarter \u003d 1 AND year \u003d 2021;", + "sql_explanation": "This query calculates the number of climate adaptation projects initiated in Asia during Q1 2021 by summing the \u0027project_count\u0027 column where \u0027region\u0027 is \u0027Asia\u0027, \u0027quarter\u0027 is 1, and \u0027year\u0027 is 2021." +}, { + "id": "3253", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total budget of climate communication projects in North America that started after 2018?", + "sql_context": "CREATE TABLE climate_communication (id INT, project_name TEXT, budget INT, start_year INT, location TEXT); INSERT INTO climate_communication (id, project_name, budget, start_year, location) VALUES (1, \u0027Climate Education\u0027, 30000, 2019, \u0027North America\u0027); INSERT INTO climate_communication (id, project_name, budget, start_year, location) VALUES (2, \u0027Climate Awareness\u0027, 40000, 2018, \u0027Asia\u0027);", + "sql": "SELECT SUM(budget) FROM climate_communication WHERE location \u003d \u0027North America\u0027 AND start_year \u003e 2018;", + "sql_explanation": "The SQL query calculates the total budget of records in the climate_communication table with the location \u0027North America\u0027 and start_year greater than 2018." +}, { + "id": "3364", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many climate communication initiatives were conducted in South Asia in 2020?", + "sql_context": "CREATE TABLE climate_communication (initiative_id INT, initiative_name VARCHAR(100), year INT, region VARCHAR(50));", + "sql": "SELECT COUNT(initiative_id) FROM climate_communication WHERE year \u003d 2020 AND region \u003d \u0027South Asia\u0027;", + "sql_explanation": "This SQL query calculates the number of climate communication initiatives conducted in South Asia in 2020 by counting the \u0027initiative_id\u0027 column where the \u0027year\u0027 is 2020 and the \u0027region\u0027 is \u0027South Asia\u0027." +}, { + "id": "3426", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total climate finance allocated for adaptation projects in South America?", + "sql_context": "CREATE TABLE climate_finance (region VARCHAR(50), amount FLOAT, sector VARCHAR(50)); INSERT INTO climate_finance (region, amount, sector) VALUES (\u0027Asia\u0027, 6000000, \u0027Mitigation\u0027), (\u0027Africa\u0027, 4000000, \u0027Mitigation\u0027), (\u0027South America\u0027, 5000000, \u0027Adaptation\u0027);", + "sql": "SELECT SUM(amount) FROM climate_finance WHERE region \u003d \u0027South America\u0027 AND sector \u003d \u0027Adaptation\u0027;", + "sql_explanation": "The SQL query sums the amount column from the climate_finance table, filtering for rows where the region is South America and the sector is Adaptation, representing the total climate finance allocated for adaptation projects in South America." +}, { + "id": "3455", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which countries have the highest greenhouse gas emissions in 2018?", + "sql_context": "CREATE TABLE emissions_data (id INT, country VARCHAR(50), year INT, emissions DECIMAL); INSERT INTO emissions_data (id, country, year, emissions) VALUES (1, \u0027USA\u0027, 2018, 5134.5); INSERT INTO emissions_data (id, country, year, emissions) VALUES (2, \u0027China\u0027, 2018, 13136.4);", + "sql": "SELECT country, emissions FROM emissions_data WHERE year \u003d 2018 ORDER BY emissions DESC LIMIT 2;", + "sql_explanation": "The SQL query retrieves the country and emissions column values where the year is equal to 2018, orders the results in descending order based on emissions, and returns the top 2 rows using the LIMIT clause." +}, { + "id": "3489", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many climate finance initiatives have been launched in North America and Oceania?", + "sql_context": "CREATE TABLE climate_finance_initiatives (initiative_name VARCHAR(255), location VARCHAR(255)); INSERT INTO climate_finance_initiatives (initiative_name, location) VALUES (\u0027Initiative A\u0027, \u0027North America\u0027), (\u0027Initiative B\u0027, \u0027Oceania\u0027);", + "sql": "SELECT COUNT(*) FROM climate_finance_initiatives WHERE location IN (\u0027North America\u0027, \u0027Oceania\u0027);", + "sql_explanation": "This query counts the number of climate finance initiatives in North America and Oceania by selecting count(*) from the climate_finance_initiatives table where the location is either North America or Oceania." +}, { + "id": "3522", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who is the communication lead for \u0027climate_mitigation\u0027?", + "sql_context": "CREATE TABLE staff (id INT PRIMARY KEY, name VARCHAR(255), role VARCHAR(255), department VARCHAR(255)); INSERT INTO staff (id, name, role, department) VALUES (1, \u0027Alice\u0027, \u0027Communication Lead\u0027, \u0027climate_mitigation\u0027), (2, \u0027Bob\u0027, \u0027Project Manager\u0027, \u0027climate_adaptation\u0027), (3, \u0027Charlie\u0027, \u0027Finance Analyst\u0027, \u0027climate_finance\u0027), (4, \u0027David\u0027, \u0027Data Scientist\u0027, \u0027climate_communication\u0027);", + "sql": "SELECT name FROM staff WHERE role \u003d \u0027Communication Lead\u0027 AND department \u003d \u0027climate_mitigation\u0027;", + "sql_explanation": "This query selects the \u0027name\u0027 of the staff member who is the \u0027Communication Lead\u0027 for the \u0027climate_mitigation\u0027 department." +}, { + "id": "3621", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average budget for climate projects in Oceania that were started after 2016?", + "sql_context": "CREATE TABLE climate_projects (project_name VARCHAR(50), location VARCHAR(50), start_year INT, budget INT, sector VARCHAR(50)); INSERT INTO climate_projects (project_name, location, start_year, budget, sector) VALUES (\u0027Solar Farm A\u0027, \u0027Australia\u0027, 2018, 1000000, \u0027Solar\u0027), (\u0027Wind Farm B\u0027, \u0027New Zealand\u0027, 2019, 1500000, \u0027Wind\u0027);", + "sql": "SELECT AVG(budget) FROM climate_projects WHERE location IN (\u0027Oceania\u0027) AND start_year \u003e 2016;", + "sql_explanation": "This query calculates the average budget for climate projects in Oceania that were started after 2016 by filtering the climate_projects table for the relevant location and start year, and then calculating the average of the budget column." +}, { + "id": "3689", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all climate finance initiatives in Africa that were successful.", + "sql_context": "CREATE TABLE climate_finance (region VARCHAR(255), initiative_status VARCHAR(255)); INSERT INTO climate_finance VALUES (\u0027Africa\u0027, \u0027successful\u0027);", + "sql": "SELECT * FROM climate_finance WHERE region \u003d \u0027Africa\u0027 AND initiative_status \u003d \u0027successful\u0027;", + "sql_explanation": "This query lists all climate finance initiatives in Africa that were successful by selecting all records from the climate_finance table where the region is Africa and the initiative_status is successful." +}, { + "id": "3778", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many renewable energy projects have been implemented in Africa since 2010?", + "sql_context": "CREATE TABLE RenewableEnergyProjects (ID INT, ProjectName VARCHAR(255), Country VARCHAR(255), Year INT); INSERT INTO RenewableEnergyProjects (ID, ProjectName, Country, Year) VALUES (1, \u0027Solar Farm 1\u0027, \u0027Egypt\u0027, 2015), (2, \u0027Wind Farm 1\u0027, \u0027South Africa\u0027, 2012), (3, \u0027Hydro Plant 1\u0027, \u0027Nigeria\u0027, 2018), (4, \u0027Geothermal Plant 1\u0027, \u0027Kenya\u0027, 2013), (5, \u0027Biomass Plant 1\u0027, \u0027Tanzania\u0027, 2017), (6, \u0027Solar Farm 2\u0027, \u0027Algeria\u0027, 2016);", + "sql": "SELECT COUNT(*) FROM RenewableEnergyProjects WHERE Country IN (\u0027Africa\u0027) AND Year \u003e\u003d 2010;", + "sql_explanation": "The SQL query counts the number of renewable energy projects implemented in Africa since 2010 by filtering the records based on Country and Year, and then applying the COUNT function to the entire set." +}, { + "id": "3944", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many public charging stations are there in Germany?", + "sql_context": "CREATE TABLE Charging_Stations (Id INT, Type VARCHAR(50), Location VARCHAR(50)); INSERT INTO Charging_Stations (Id, Type, Location) VALUES (1, \u0027Public\u0027, \u0027Germany\u0027), (2, \u0027Private\u0027, \u0027Germany\u0027), (3, \u0027Public\u0027, \u0027France\u0027);", + "sql": "SELECT COUNT(*) FROM Charging_Stations WHERE Type \u003d \u0027Public\u0027 AND Location \u003d \u0027Germany\u0027;", + "sql_explanation": "This query counts the number of public charging stations in Germany by using the COUNT function with no specific column name (which defaults to counting all rows). It considers only the rows where the Type is \u0027Public\u0027 and the Location is \u0027Germany\u0027." +}, { + "id": "4031", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a view with all adaptation projects\u0027 names and budgets", + "sql_context": "CREATE TABLE adaptation_projects (id INT PRIMARY KEY, name VARCHAR(255), location VARCHAR(255), budget FLOAT, start_date DATE, end_date DATE); INSERT INTO adaptation_projects (id, name, location, budget, start_date, end_date) VALUES (1, \u0027Seawall Construction\u0027, \u0027New York City, USA\u0027, 2000000, \u00272022-01-01\u0027, \u00272023-12-31\u0027), (2, \u0027Drought Resistant Crops\u0027, \u0027Cape Town, South Africa\u0027, 800000, \u00272023-05-15\u0027, \u00272024-04-30\u0027), (3, \u0027Flood Early Warning System\u0027, \u0027Dhaka, Bangladesh\u0027, 1000000, \u00272023-07-01\u0027, \u00272025-06-30\u0027); CREATE VIEW adaptation_projects_view AS SELECT name, budget FROM adaptation_projects;", + "sql": "CREATE VIEW adaptation_projects_view AS SELECT name, budget FROM adaptation_projects;", + "sql_explanation": "A view named \u0027adaptation_projects_view\u0027 is created with the \u0027name\u0027 and \u0027budget\u0027 columns from the \u0027adaptation_projects\u0027 table." +}, { + "id": "4133", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the unique communication methods used by organizations with size \u0027small\u0027 and \u0027large\u0027?", + "sql_context": "CREATE TABLE org_communication (org_size VARCHAR(20), method VARCHAR(20)); INSERT INTO org_communication (org_size, method) VALUES (\u0027small\u0027, \u0027email\u0027), (\u0027medium\u0027, \u0027phone\u0027), (\u0027large\u0027, \u0027video_conference\u0027), (\u0027extra_large\u0027, \u0027virtual_reality\u0027);", + "sql": "SELECT DISTINCT method FROM org_communication WHERE org_size IN (\u0027small\u0027, \u0027large\u0027);", + "sql_explanation": "This query selects the unique communication methods used by \u0027small\u0027 and \u0027large\u0027 organizations from the org_communication table." +}, { + "id": "4177", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum amount of CO2 sequestered by forest conservation projects in North America?", + "sql_context": "CREATE TABLE CO2Sequestration (Id INT, Project VARCHAR(50), CO2Sequestered DECIMAL(10,2), Location VARCHAR(50));", + "sql": "SELECT MIN(CO2Sequestered) FROM CO2Sequestration WHERE Location \u003d \u0027North America\u0027;", + "sql_explanation": "This SQL query finds the minimum amount of CO2 sequestered by forest conservation projects in North America. It filters for North America. Then, it finds the minimum CO2 sequestered in the filtered data." +}, { + "id": "4276", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total investment in renewable energy projects in the African region?", + "sql_context": "CREATE TABLE renewable_energy_projects (id INT, project VARCHAR(50), location VARCHAR(50), investment FLOAT); INSERT INTO renewable_energy_projects (id, project, location, investment) VALUES (1, \u0027Solar Farm\u0027, \u0027Africa\u0027, 5000000), (2, \u0027Wind Farm\u0027, \u0027Asia\u0027, 7000000), (3, \u0027Geothermal Plant\u0027, \u0027Africa\u0027, 6000000);", + "sql": "SELECT SUM(investment) FROM renewable_energy_projects WHERE location \u003d \u0027Africa\u0027;", + "sql_explanation": "Sum up the investment amounts from the renewable_energy_projects table for all projects located in Africa." +}, { + "id": "4347", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total budget of all climate mitigation projects in the US?", + "sql_context": "CREATE TABLE projects (id INT PRIMARY KEY, name VARCHAR(50), location VARCHAR(50), type VARCHAR(20), budget FLOAT); INSERT INTO projects (id, name, location, type, budget) VALUES (1, \u0027Carbon Capture\u0027, \u0027US\u0027, \u0027Mitigation\u0027, 7000000.0); INSERT INTO projects (id, name, location, type, budget) VALUES (2, \u0027Green Roofs\u0027, \u0027New York\u0027, \u0027Adaptation\u0027, 3000000.0);", + "sql": "SELECT SUM(budget) FROM projects WHERE location \u003d \u0027US\u0027 AND type \u003d \u0027Mitigation\u0027;", + "sql_explanation": "The SQL query calculates the sum of the budget column for all climate mitigation projects in the US. This corresponds to the prompt\u0027s request to list the total budget of all climate mitigation projects in the US." +}, { + "id": "4425", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average temperature in Brazil in 2020?", + "sql_context": "CREATE TABLE Weather (id INT, country VARCHAR(50), year INT, avg_temperature DECIMAL(5,2));", + "sql": "SELECT avg_temperature FROM Weather WHERE country \u003d \u0027Brazil\u0027 AND year \u003d 2020;", + "sql_explanation": "This query selects the average temperature from the Weather table where the country is Brazil and the year is 2020." +}, { + "id": "4461", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total investment in renewable energy projects in small island developing states (SIDS) since 2015?", + "sql_context": "CREATE TABLE renewable_energy_projects (id INT PRIMARY KEY, project_type VARCHAR(50), country VARCHAR(50), year INT, budget DECIMAL(10,2));CREATE VIEW v_sids_renewable_energy_projects AS SELECT rep.project_type, rep.country, rep.budget FROM renewable_energy_projects rep WHERE rep.country LIKE \u0027Small Island Developing State%\u0027 AND rep.year \u003e\u003d 2015;", + "sql": "SELECT SUM(budget) AS total_investment FROM v_sids_renewable_energy_projects;", + "sql_explanation": "This query calculates the total investment in renewable energy projects in small island developing states (SIDS) since 2015. It creates a view called v_sids_renewable_energy_projects that filters the renewable_energy_projects table to only include projects in SIDS starting from 2015, then calculates the sum of their budgets." +}, { + "id": "4563", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "What climate mitigation projects have been funded by the private sector?", + "sql_context": "CREATE TABLE climate_projects (id INT PRIMARY KEY, title VARCHAR(255), description TEXT, start_date DATE, end_date DATE, funding_source VARCHAR(255)); INSERT INTO climate_projects (id, title, description, start_date, end_date, funding_source) VALUES (1, \u0027Solar Farm Construction\u0027, \u0027Construction of a 100 MW solar farm.\u0027, \u00272022-01-01\u0027, \u00272022-12-31\u0027, \u0027Government Grant\u0027), (2, \u0027Wind Turbine Installation\u0027, \u0027Installation of wind turbines for a community.\u0027, \u00272022-04-01\u0027, \u00272023-03-31\u0027, \u0027Private Investment\u0027);", + "sql": "SELECT * FROM climate_projects WHERE funding_source \u003d \u0027Private Investment\u0027;", + "sql_explanation": "This query selects all records from the \"climate_projects\" table where the funding_source is \u0027Private Investment\u0027." +}, { + "id": "4580", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total climate finance committed to projects in the Asia-Pacific region?", + "sql_context": "CREATE TABLE climate_finance_ap (id INT, project VARCHAR(50), location VARCHAR(50), amount FLOAT); INSERT INTO climate_finance_ap (id, project, location, amount) VALUES (1, \u0027Adaptation Project\u0027, \u0027Asia-Pacific\u0027, 6000000.0); INSERT INTO climate_finance_ap (id, project, location, amount) VALUES (2, \u0027Mitigation Project\u0027, \u0027Asia-Pacific\u0027, 8000000.0); INSERT INTO climate_finance_ap (id, project, location, amount) VALUES (3, \u0027Communication Project\u0027, \u0027Europe\u0027, 4000000.0);", + "sql": "SELECT SUM(amount) FROM climate_finance_ap WHERE location \u003d \u0027Asia-Pacific\u0027;", + "sql_explanation": "This query calculates the total climate finance committed to projects in the Asia-Pacific region by filtering the records based on location and then summing the amount." +}, { + "id": "4754", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many climate communication projects are there?", + "sql_context": "CREATE TABLE projects (id INT PRIMARY KEY, name VARCHAR(255), leader_gender VARCHAR(10), sector VARCHAR(255), region VARCHAR(255), category VARCHAR(255)); INSERT INTO projects (id, name, leader_gender, sector, region, category) VALUES (1, \u0027Wind Power\u0027, \u0027Female\u0027, \u0027Renewable Energy\u0027, \u0027Europe\u0027, \u0027climate_mitigation\u0027), (2, \u0027Smart Grid\u0027, \u0027Male\u0027, \u0027Energy Efficiency\u0027, \u0027North America\u0027, \u0027climate_adaptation\u0027), (3, \u0027Reforestation\u0027, \u0027Female\u0027, \u0027Land Use\u0027, \u0027South America\u0027, \u0027climate_communication\u0027), (4, \u0027Coastal Protection\u0027, \u0027Male\u0027, \u0027Adaptation\u0027, \u0027Asia\u0027, \u0027climate_finance\u0027), (5, \u0027Public Engagement\u0027, NULL, NULL, NULL, \u0027climate_communication\u0027);", + "sql": "SELECT COUNT(*) FROM projects WHERE category \u003d \u0027climate_communication\u0027;", + "sql_explanation": "This query counts all rows in the \u0027projects\u0027 table where the category is \u0027climate_communication\u0027." +}, { + "id": "4781", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many climate adaptation projects were completed before \u00272020\u0027 in the \u0027adaptation_projects\u0027 table?", + "sql_context": "CREATE TABLE adaptation_projects (id INT PRIMARY KEY, name VARCHAR(255), location VARCHAR(255), description TEXT, start_date DATE, end_date DATE, budget FLOAT); INSERT INTO adaptation_projects (id, name, location, description, start_date, end_date, budget) VALUES (1, \u0027Sea Wall Construction\u0027, \u0027Miami\u0027, \u0027Coastal protection for sea level rise\u0027, \u00272018-01-01\u0027, \u00272020-12-31\u0027, 5000000);", + "sql": "SELECT COUNT(*) FROM adaptation_projects WHERE end_date \u003c \u00272020-01-01\u0027;", + "sql_explanation": "1. The COUNT function counts the number of adaptation projects completed before 2020. 2. The WHERE clause filters records based on the \u0027end_date\u0027 column, showing projects that were completed before 2020." +}, { + "id": "4800", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which climate conferences were held in Paris or Tokyo?", + "sql_context": "CREATE TABLE conferences (id INT PRIMARY KEY, name VARCHAR(50), location VARCHAR(50), year INT); INSERT INTO conferences (id, name, location, year) VALUES (1, \u0027Climate Summit\u0027, \u0027Paris\u0027, 2015); INSERT INTO conferences (id, name, location, year) VALUES (2, \u0027Adaptation Conference\u0027, \u0027Tokyo\u0027, 2016);", + "sql": "SELECT location FROM conferences WHERE location IN (\u0027Paris\u0027, \u0027Tokyo\u0027);", + "sql_explanation": "The SQL query selects the location column from the conferences table where the location is Paris or Tokyo. This corresponds to the prompt\u0027s request to list the climate conferences held in Paris or Tokyo." +}, { + "id": "4977", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many climate communication initiatives have been implemented in Africa?", + "sql_context": "CREATE TABLE climate_communication (initiative_name VARCHAR(255), region VARCHAR(255)); INSERT INTO climate_communication (initiative_name, region) VALUES (\u0027Public Awareness Campaign\u0027, \u0027Africa\u0027), (\u0027Educational Workshops\u0027, \u0027Asia\u0027), (\u0027Community Outreach Program\u0027, \u0027Africa\u0027);", + "sql": "SELECT COUNT(*) FROM climate_communication WHERE region \u003d \u0027Africa\u0027;", + "sql_explanation": "This SQL query counts the number of climate communication initiatives in the climate_communication table located in Africa using the WHERE clause and COUNT() function." +}, { + "id": "5073", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total climate finance for \u0027Africa\u0027?", + "sql_context": "CREATE TABLE climate_finance (country VARCHAR(255), amount FLOAT); INSERT INTO climate_finance (country, amount) VALUES (\u0027Canada\u0027, 5000000), (\u0027Mexico\u0027, 6000000), (\u0027Brazil\u0027, 3000000), (\u0027Argentina\u0027, 4000000), (\u0027Kenya\u0027, 7000000), (\u0027Nigeria\u0027, 8000000);", + "sql": "SELECT SUM(amount) FROM climate_finance WHERE country \u003d \u0027Africa\u0027;", + "sql_explanation": "This query sums (SUM) the \u0027amount\u0027 column for rows with a \u0027country\u0027 value of \u0027Africa\u0027." +}, { + "id": "5161", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "Show all climate mitigation projects from \u0027Africa\u0027 in the \u0027mitigation_projects\u0027 table", + "sql_context": "CREATE TABLE mitigation_projects (id INT PRIMARY KEY, name VARCHAR(255), location VARCHAR(255), description TEXT, start_date DATE, end_date DATE, budget FLOAT); INSERT INTO mitigation_projects (id, name, location, description, start_date, end_date, budget) VALUES (1, \u0027Solar Farm Installation\u0027, \u0027Kenya\u0027, \u0027Installation of solar panels\u0027, \u00272019-01-01\u0027, \u00272021-12-31\u0027, 12000000), (2, \u0027Energy Efficiency Upgrades\u0027, \u0027South Africa\u0027, \u0027Upgrading public buildings for energy efficiency\u0027, \u00272019-06-01\u0027, \u00272022-05-31\u0027, 8000000);", + "sql": "SELECT * FROM mitigation_projects WHERE location LIKE \u0027Africa%\u0027;", + "sql_explanation": "1. The SELECT statement retrieves all records from the \u0027mitigation_projects\u0027 table where the location starts with \u0027Africa\u0027." +}, { + "id": "5700", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which countries have received climate finance in the \u0027climate_finance\u0027 table?", + "sql_context": "CREATE TABLE climate_finance (initiative_name TEXT, year INTEGER, amount FLOAT, country TEXT); INSERT INTO climate_finance (initiative_name, year, amount, country) VALUES (\u0027Green Grants\u0027, 2019, 50000.0, \u0027Brazil\u0027), (\u0027Climate Innovation Fund\u0027, 2020, 100000.0, \u0027India\u0027), (\u0027Renewable Energy Loans\u0027, 2018, 75000.0, \u0027China\u0027);", + "sql": "SELECT DISTINCT country FROM climate_finance;", + "sql_explanation": "This query selects distinct countries from the \u0027climate_finance\u0027 table." +}, { + "id": "5793", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "Show all climate communication projects from the \u0027communication_projects\u0027 table", + "sql_context": "CREATE TABLE communication_projects (id INT PRIMARY KEY, name VARCHAR(255), location VARCHAR(255), description TEXT, start_date DATE, end_date DATE, budget FLOAT); INSERT INTO communication_projects (id, name, location, description, start_date, end_date, budget) VALUES (1, \u0027Public Awareness Campaign\u0027, \u0027New York\u0027, \u0027Raising awareness of climate change\u0027, \u00272018-01-01\u0027, \u00272018-12-31\u0027, 200000);", + "sql": "SELECT * FROM communication_projects;", + "sql_explanation": "1. The SELECT statement retrieves all records from the \u0027communication_projects\u0027 table. 2. No filtering is required, as the prompt requests all records for climate communication projects." +}, { + "id": "828", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average quantity of sustainable fabrics sourced from Europe in the past year?", + "sql_context": "CREATE TABLE Sustainable_Fabrics (fabric_id INT, fabric_name VARCHAR(50), sourcing_country VARCHAR(50), quantity INT); INSERT INTO Sustainable_Fabrics (fabric_id, fabric_name, sourcing_country, quantity) VALUES (1, \u0027Organic Cotton\u0027, \u0027France\u0027, 500), (2, \u0027Recycled Polyester\u0027, \u0027Germany\u0027, 700), (3, \u0027Tencel\u0027, \u0027Austria\u0027, 600);", + "sql": "SELECT AVG(quantity) FROM Sustainable_Fabrics WHERE sourcing_country IN (\u0027France\u0027, \u0027Germany\u0027, \u0027Austria\u0027) AND sourcing_country IS NOT NULL AND fabric_name IS NOT NULL AND quantity IS NOT NULL;", + "sql_explanation": "The SQL query calculates the average quantity of sustainable fabrics sourced from Europe in the past year by using the AVG function on the quantity column. It filters the data based on the sourcing_country column to only include data from France, Germany, and Austria, and also checks that the fabric_name and quantity columns are not NULL." +}, { + "id": "1317", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the difference in average customer spending between customers in Mexico and Argentina?", + "sql_context": "CREATE TABLE CustomerSpendingMX (CustomerID INT, Country TEXT, AvgSpending DECIMAL(5,2)); INSERT INTO CustomerSpendingMX (CustomerID, Country, AvgSpending) VALUES (1, \u0027Mexico\u0027, 120.50), (2, \u0027Mexico\u0027, 110.50), (3, \u0027Mexico\u0027, 130.50), (4, \u0027Mexico\u0027, 90.50); CREATE TABLE CustomerSpendingAR (CustomerID INT, Country TEXT, AvgSpending DECIMAL(5,2)); INSERT INTO CustomerSpendingAR (CustomerID, Country, AvgSpending) VALUES (1, \u0027Argentina\u0027, 105.00), (2, \u0027Argentina\u0027, 115.00), (3, \u0027Argentina\u0027, 125.00), (4, \u0027Argentina\u0027, 135.00);", + "sql": "SELECT AVG(CSMX.AvgSpending) - AVG(CSA.AvgSpending) FROM CustomerSpendingMX CSMX, CustomerSpendingAR CSA WHERE CSMX.Country \u003d \u0027Mexico\u0027 AND CSA.Country \u003d \u0027Argentina\u0027;", + "sql_explanation": "The SQL query calculates the average customer spending in the CustomerSpendingMX and CustomerSpendingAR tables separately, and then subtracts the two averages to find the difference." +}, { + "id": "1381", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of customers in Canada who prefer sustainable fashion, and how many customers are there in total?", + "sql_context": "CREATE TABLE customers (customer_id INT, customer_name VARCHAR(50), country VARCHAR(50), sustainable_fashion BOOLEAN); INSERT INTO customers VALUES (1, \u0027Customer A\u0027, \u0027Canada\u0027, true); INSERT INTO customers VALUES (2, \u0027Customer B\u0027, \u0027Canada\u0027, false); INSERT INTO customers VALUES (3, \u0027Customer C\u0027, \u0027Canada\u0027, true); INSERT INTO customers VALUES (4, \u0027Customer D\u0027, \u0027Canada\u0027, true); INSERT INTO customers VALUES (5, \u0027Customer E\u0027, \u0027Canada\u0027, false);", + "sql": "SELECT (COUNT(*) FILTER (WHERE sustainable_fashion \u003d true)) * 100.0 / COUNT(*) as percentage, COUNT(*) as total_customers FROM customers WHERE country \u003d \u0027Canada\u0027;", + "sql_explanation": "The SQL query calculates the percentage of customers in Canada who prefer sustainable fashion, and counts the total number of customers in Canada. It uses the COUNT function to count the number of customers who prefer sustainable fashion and the total number of customers, respectively. The FILTER clause is used to filter the results based on the sustainable_fashion value. The result is multiplied by 100.0 to convert it to a percentage." +}, { + "id": "1382", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Add a new column \u0027sustainable\u0027 to the Garments table and set the value to true for all garments made of sustainable materials.", + "sql_context": "CREATE TABLE Garments (garment_id INT, garment_name VARCHAR(50), retail_price DECIMAL(5,2), material VARCHAR(50)); ALTER TABLE Garments ADD sustainable BOOLEAN; UPDATE Garments SET sustainable \u003d true WHERE material IN (\u0027Organic Cotton\u0027, \u0027Hemp\u0027, \u0027Tencel\u0027, \u0027Recycled Polyester\u0027);", + "sql": "ALTER TABLE Garments ADD sustainable BOOLEAN; UPDATE Garments SET sustainable \u003d true WHERE material IN (\u0027Organic Cotton\u0027, \u0027Hemp\u0027, \u0027Tencel\u0027, \u0027Recycled Polyester\u0027);", + "sql_explanation": "This query adds a new column \u0027sustainable\u0027 to the Garments table and sets the value to true for all garments made of sustainable materials." +}, { + "id": "1604", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the CO2 emissions reduction in the past 12 months?", + "sql_context": "CREATE TABLE co2_emissions (emission_id INT, emission_value INT, emission_date DATE);", + "sql": "SELECT SUM(emission_value) AS total_emissions_reduction FROM co2_emissions WHERE emission_date \u003e\u003d DATE(NOW()) - INTERVAL 12 MONTH AND emission_value \u003c 0", + "sql_explanation": "Calculate the CO2 emissions reduction in the past 12 months by summing the negative emission_value rows in the co2_emissions table and filtering for dates in the past 12 months." +}, { + "id": "1831", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average size of customers in the \u0027sustainable_fashion_customers\u0027 table?", + "sql_context": "CREATE TABLE sustainable_fashion_customers (id INT, customer_name VARCHAR(30), size VARCHAR(10)); INSERT INTO sustainable_fashion_customers (id, customer_name, size) VALUES (1, \u0027Alice\u0027, \u0027M\u0027), (2, \u0027Bob\u0027, \u0027L\u0027), (3, \u0027Charlie\u0027, \u0027S\u0027);", + "sql": "SELECT AVG(CASE WHEN size \u003d \u0027S\u0027 THEN 0 WHEN size \u003d \u0027M\u0027 THEN 1 WHEN size \u003d \u0027L\u0027 THEN 2 ELSE 3 END) AS avg_size FROM sustainable_fashion_customers;", + "sql_explanation": "This SQL query calculates the average size of customers in the sustainable_fashion_customers table by mapping the size column to numeric values and then computing the average using the AVG() function. The CASE statement maps the size values \u0027S\u0027, \u0027M\u0027, and \u0027L\u0027 to the numeric values 0, 1, and 2, respectively. The query then returns the average size as a numeric value." +}, { + "id": "2173", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many unique customers have purchased plus-size garments in the past year?", + "sql_context": "CREATE TABLE Customer_Data (Customer_ID INT, Purchase_Date DATE, Item_Size INT); INSERT INTO Customer_Data (Customer_ID, Purchase_Date, Item_Size) VALUES (1, \u00272022-01-01\u0027, 18), (2, \u00272022-02-01\u0027, 14), (3, \u00272022-03-01\u0027, 20), (4, \u00272022-04-01\u0027, 12), (5, \u00272022-05-01\u0027, 16), (6, \u00272022-06-01\u0027, 18), (7, \u00272021-12-25\u0027, 22), (8, \u00272021-11-11\u0027, 20);", + "sql": "SELECT COUNT(DISTINCT Customer_ID) FROM Customer_Data WHERE Item_Size \u003e\u003d 16 AND Purchase_Date BETWEEN \u00272021-01-01\u0027 AND \u00272022-12-31\u0027;", + "sql_explanation": "This SQL query counts the number of unique customers who have purchased plus-size garments (size 16 and above) in the past year (2021-01-01 to 2022-12-31) by using a where clause to filter the Customer_Data table based on the Item_Size and Purchase_Date columns and then using the count distinct function to count the number of unique customer IDs." +}, { + "id": "2822", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many size 14 dresses were sold in the United States and Canada?", + "sql_context": "CREATE TABLE sales (id INT, product VARCHAR(20), size INT, quantity INT, country VARCHAR(20)); INSERT INTO sales VALUES (1, \u0027dress\u0027, 14, 200, \u0027USA\u0027), (2, \u0027dress\u0027, 14, 150, \u0027Canada\u0027);", + "sql": "SELECT SUM(s.quantity) FROM sales s WHERE s.product \u003d \u0027dress\u0027 AND s.size \u003d 14 AND s.country IN (\u0027USA\u0027, \u0027Canada\u0027);", + "sql_explanation": "The SQL query selects the sum of the quantity column from the sales table where the product is dress, the size is 14, and the country is either the USA or Canada." +}, { + "id": "2846", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total sales volume for Cultural Print garments in Mumbai during 2020 from ethical sources with a rating above 8?", + "sql_context": "CREATE TABLE Sales (sale_id INT, garment_id INT, location_id INT, sale_date DATE);CREATE TABLE Garments (garment_id INT, trend_id INT, fabric_source_id INT, size VARCHAR(50), style VARCHAR(255));CREATE TABLE FabricSources (source_id INT, fabric_type VARCHAR(255), country_of_origin VARCHAR(255), ethical_rating DECIMAL(3,2));CREATE TABLE StoreLocations (location_id INT, city VARCHAR(255), country VARCHAR(255), sales_volume INT);CREATE VIEW CulturalPrints AS SELECT * FROM Garments WHERE trend_id IN (SELECT trend_id FROM FashionTrends WHERE name \u003d \u0027Cultural Print\u0027);CREATE VIEW EthicalCulturalPrints AS SELECT * FROM CulturalPrints WHERE fabric_source_id IN (SELECT source_id FROM FabricSources WHERE ethical_rating \u003e 8.0);CREATE VIEW MumbaiSales AS SELECT * FROM Sales WHERE location_id IN (SELECT location_id FROM StoreLocations WHERE city \u003d \u0027Mumbai\u0027);CREATE VIEW MumbaiEthicalCulturalPrints AS SELECT * FROM MumbaiSales WHERE garment_id IN (SELECT garment_id FROM EthicalCulturalPrints);", + "sql": "SELECT SUM(sales_volume) FROM MumbaiEthicalCulturalPrints WHERE sale_date BETWEEN \u00272020-01-01\u0027 AND \u00272020-12-31\u0027;", + "sql_explanation": "This query calculates the total sales volume for Cultural Print garments sold in Mumbai during 2020 that originate from ethical sources with a rating above 8." +}, { + "id": "3050", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the percentage of items that are sustainable", + "sql_context": "CREATE TABLE inventory (id INT, item_name VARCHAR(20), is_sustainable BOOLEAN, quantity INT); INSERT INTO inventory (id, item_name, is_sustainable, quantity) VALUES (1, \u0027t-shirt\u0027, false, 100), (2, \u0027blouse\u0027, true, 50), (3, \u0027jeans\u0027, true, 75), (4, \u0027skirt\u0027, false, 150), (5, \u0027jacket\u0027, true, 100);", + "sql": "SELECT (COUNT(CASE WHEN is_sustainable \u003d true THEN 1 END) * 100.0 / COUNT(*)) AS percentage FROM inventory;", + "sql_explanation": "This SQL query calculates the percentage of sustainable items in the inventory table. It uses a CASE statement to count the number of sustainable items and divides that by the total number of items to get the percentage." +}, { + "id": "3344", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many unique garment types and colors are present in the \u0027garments\u0027 table, without any duplicates?", + "sql_context": "CREATE TABLE garments (garment_id INTEGER, garment_type TEXT, garment_color TEXT, price INTEGER, quantity INTEGER); INSERT INTO garments (garment_id, garment_type, garment_color, price, quantity) VALUES (1, \u0027t-shirt\u0027, \u0027red\u0027, 20, 100), (2, \u0027jeans\u0027, \u0027blue\u0027, 50, 75), (3, \u0027hoodie\u0027, \u0027black\u0027, 30, 120);", + "sql": "SELECT COUNT(DISTINCT garment_type) + COUNT(DISTINCT garment_color) AS unique_values FROM garments;", + "sql_explanation": "The query uses the COUNT() function with the DISTINCT keyword to count the number of unique values in the \u0027garment_type\u0027 and \u0027garment_color\u0027 columns, then adds the two values together to get the total number of unique values." +}, { + "id": "3504", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which sustainable materials have the lowest sustainability score?", + "sql_context": "CREATE TABLE sustainable_fashion (id INT, product_id INT, material VARCHAR(255), sustainability_score INT); INSERT INTO sustainable_fashion (id, product_id, material, sustainability_score) VALUES (1, 101, \u0027Organic Cotton\u0027, 90), (2, 102, \u0027Recycled Polyester\u0027, 80), (3, 103, \u0027Tencel\u0027, 85), (4, 101, \u0027Hemp\u0027, 95), (5, 102, \u0027Bamboo\u0027, 88);", + "sql": "SELECT material, sustainability_score FROM sustainable_fashion WHERE sustainability_score \u003c 90;", + "sql_explanation": "Filter the data to only include materials with a sustainability score less than 90." +}, { + "id": "3726", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record in the trending_fashions table for style \u0027T-shirt\u0027, region \u0027Africa\u0027 and popularity 70", + "sql_context": "CREATE TABLE trending_fashions (style VARCHAR(255) PRIMARY KEY, region VARCHAR(255), popularity INT); INSERT INTO trending_fashions (style, region, popularity) VALUES (\u0027Tunic\u0027, \u0027MiddleEast\u0027, 60), (\u0027Pants\u0027, \u0027Asia\u0027, 90);", + "sql": "INSERT INTO trending_fashions (style, region, popularity) VALUES (\u0027T-shirt\u0027, \u0027Africa\u0027, 70);", + "sql_explanation": "This query inserts a new record in the trending_fashions table for style \u0027T-shirt\u0027, region \u0027Africa\u0027 and popularity 70." +}, { + "id": "4254", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the names and sizes of all women\u0027s clothing items.", + "sql_context": "CREATE TABLE ClothingItems (ItemID INT, ItemName VARCHAR(255), AvailableSizes VARCHAR(255)); INSERT INTO ClothingItems (ItemID, ItemName, AvailableSizes) VALUES (101, \u0027Maxi Dress\u0027, \u0027XS,S,M,L,XL\u0027), (102, \u0027Blazer\u0027, \u0027S,M,L\u0027), (103, \u0027Jeans\u0027, \u002728,30,32,34,36\u0027), (104, \u0027T-Shirt\u0027, \u0027XS,S,M,L,XL,XXL\u0027), (105, \u0027Jumpsuit\u0027, \u0027M,L,XL\u0027), (106, \u0027Shorts\u0027, \u002726,28,30,32,34\u0027);", + "sql": "SELECT ItemName, AvailableSizes FROM ClothingItems WHERE ItemName LIKE \u0027%Women%\u0027;", + "sql_explanation": "This query selects the ItemName and AvailableSizes fields from the ClothingItems table where the ItemName field contains the word \"Women\"." +}, { + "id": "4612", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Count the number of size 14 dresses in the inventory", + "sql_context": "CREATE TABLE inventory (id INT, product_id INT, product_name VARCHAR(50), size INT, quantity INT); INSERT INTO inventory (id, product_id, product_name, size, quantity) VALUES (1, 1001, \u0027Dress\u0027, 14, 35), (2, 1002, \u0027Blouse\u0027, 10, 47), (3, 1003, \u0027Skirt\u0027, 8, 53), (4, 1004, \u0027Dress\u0027, 12, 29), (5, 1005, \u0027Jumpsuit\u0027, 16, 21);", + "sql": "SELECT COUNT(*) FROM inventory WHERE size \u003d 14 AND product_name \u003d \u0027Dress\u0027;", + "sql_explanation": "The SQL query counts the number of size 14 dresses in the inventory table by filtering on size 14 and product_name \u0027Dress\u0027 and using the COUNT function." +}, { + "id": "4655", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all sustainable fabrics from the \u0027fabrics_sourced\u0027 table", + "sql_context": "CREATE TABLE fabrics_sourced (id INT PRIMARY KEY, fabric_type VARCHAR(255), country VARCHAR(255), sustainability_rating INT);", + "sql": "SELECT fabric_type FROM fabrics_sourced WHERE sustainability_rating \u003e\u003d 8;", + "sql_explanation": "The SQL query selects all records from the \u0027fabrics_sourced\u0027 table where the \u0027sustainability_rating\u0027 is 8 or greater to list sustainable fabrics." +}, { + "id": "4665", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum price of linen garments sold in the UK?", + "sql_context": "CREATE TABLE sales (id INT, price DECIMAL(5,2), material VARCHAR(20), country VARCHAR(20)); INSERT INTO sales (id, price, material, country) VALUES (1, 75.00, \u0027linen\u0027, \u0027UK\u0027); -- additional rows removed for brevity;", + "sql": "SELECT MAX(price) FROM sales WHERE material \u003d \u0027linen\u0027 AND country \u003d \u0027UK\u0027;", + "sql_explanation": "This query finds the maximum price of linen garments sold in the UK. It filters the sales table based on material and country, and then calculates the maximum price of the garments that meet the criteria." +}, { + "id": "4709", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all size options available for women\u0027s clothing items.", + "sql_context": "CREATE TABLE ClothingItems (ItemName VARCHAR(255), ItemType VARCHAR(255), SizeOptions VARCHAR(255)); INSERT INTO ClothingItems (ItemName, ItemType, SizeOptions) VALUES (\u0027Blouse\u0027, \u0027Women\u0027, \u0027XS,S,M,L\u0027), (\u0027Skirt\u0027, \u0027Women\u0027, \u0027S,M,L,XL\u0027), (\u0027Dress\u0027, \u0027Women\u0027, \u0027XS,S,M\u0027), (\u0027Jacket\u0027, \u0027Women\u0027, \u0027S,M,L\u0027), (\u0027Pants\u0027, \u0027Women\u0027, \u002728,30,32,34\u0027);", + "sql": "SELECT DISTINCT SizeOptions FROM ClothingItems WHERE ItemType \u003d \u0027Women\u0027;", + "sql_explanation": "This query retrieves the distinct SizeOptions field values from the ClothingItems table where the ItemType field is \"Women\"." +}, { + "id": "4910", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the total quantity of sustainable fabric types used in all clothing items.", + "sql_context": "CREATE TABLE TextileSourcing (FabricType VARCHAR(255), Quantity INT, IsSustainable BOOLEAN); INSERT INTO TextileSourcing (FabricType, Quantity, IsSustainable) VALUES (\u0027Organic Cotton\u0027, 1200, TRUE), (\u0027Recycled Polyester\u0027, 800, TRUE), (\u0027Tencel\u0027, 1500, TRUE), (\u0027Virgin Polyester\u0027, 1000, FALSE), (\u0027Conventional Cotton\u0027, 2000, FALSE);", + "sql": "SELECT SUM(Quantity) FROM TextileSourcing WHERE IsSustainable \u003d TRUE;", + "sql_explanation": "This query calculates the sum of the Quantity field in the TextileSourcing table where the IsSustainable field is true." +}, { + "id": "5020", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total CO2 emissions for garments made of cotton.", + "sql_context": "CREATE TABLE inventory (id INT, garment_id INT, material VARCHAR(50), CO2_emissions INT); INSERT INTO inventory (id, garment_id, material, CO2_emissions) VALUES (1, 1003, \u0027cotton\u0027, 5);", + "sql": "SELECT SUM(CO2_emissions) FROM inventory WHERE material \u003d \u0027cotton\u0027;", + "sql_explanation": "The SQL query calculates the sum of CO2 emissions for cotton garments from the inventory table." +}, { + "id": "5074", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add a new textile source \u0027Organic Silk\u0027 to the \u0027sources\u0027 table", + "sql_context": "CREATE TABLE sources (id INT PRIMARY KEY, source_name VARCHAR(50));", + "sql": "INSERT INTO sources (id, source_name) VALUES (2, \u0027Organic Silk\u0027);", + "sql_explanation": "This SQL query inserts a new record into the \u0027sources\u0027 table with an id of 2 and the source name \u0027Organic Silk\u0027." +}, { + "id": "5109", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new fashion trend \u0027Color Blocking\u0027 into the \u0027trends\u0027 table", + "sql_context": "CREATE TABLE trends (id INT PRIMARY KEY, trend_name VARCHAR(50));", + "sql": "INSERT INTO trends (id, trend_name) VALUES (1, \u0027Color Blocking\u0027);", + "sql_explanation": "This SQL query inserts a new record into the \u0027trends\u0027 table with an id of 1 and the trend name \u0027Color Blocking\u0027." +}, { + "id": "5192", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average textile sourcing cost for a specific fabric type?", + "sql_context": "CREATE TABLE textile_sourcing (id INT, item_id INT, fabric TEXT, cost DECIMAL);", + "sql": "SELECT AVG(cost) FROM textile_sourcing WHERE fabric \u003d \u0027cotton\u0027;", + "sql_explanation": "This SQL query calculates the average textile sourcing cost for a specific fabric type, in this case cotton. It does this by using the AVG function on the cost column, and filtering for only rows where the fabric is cotton." +}, { + "id": "5295", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum textile sourcing cost for a specific fabric type?", + "sql_context": "CREATE TABLE textile_sourcing (id INT, item_id INT, fabric TEXT, cost DECIMAL);", + "sql": "SELECT MIN(cost) FROM textile_sourcing WHERE fabric \u003d \u0027silk\u0027;", + "sql_explanation": "This SQL query calculates the minimum textile sourcing cost for a specific fabric type, in this case silk. It does this by using the MIN function on the cost column, and filtering for only rows where the fabric is silk." +}, { + "id": "5308", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete textile suppliers with low sustainability ratings", + "sql_context": "CREATE TABLE TextileSuppliers (SupplierID INT, SupplierName TEXT, SustainabilityRating INT); INSERT INTO TextileSuppliers (SupplierID, SupplierName, SustainabilityRating) VALUES (1, \u0027Supplier A\u0027, 85), (2, \u0027Supplier B\u0027, 90), (3, \u0027Supplier C\u0027, 70);", + "sql": "DELETE FROM TextileSuppliers WHERE SustainabilityRating \u003c 80;", + "sql_explanation": "This SQL query deletes records from the TextileSuppliers table where the SustainabilityRating is below 80, ensuring only suppliers with higher sustainability ratings remain." +}, { + "id": "1435", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the client\u0027s first and last name, state, and the difference between the case closing date and the case opening date, for cases with a practice area of \u0027Family Law\u0027, partitioned by state and ordered by the difference in ascending order.", + "sql_context": "CREATE TABLE Cases (CaseID INT, ClientFirstName VARCHAR(50), ClientLastName VARCHAR(50), State VARCHAR(2), PracticeArea VARCHAR(50), CaseOutcome VARCHAR(20), OpenDate DATE, CloseDate DATE); INSERT INTO Cases (CaseID, ClientFirstName, ClientLastName, State, PracticeArea, CaseOutcome, OpenDate, CloseDate) VALUES (1, \u0027Daniel\u0027, \u0027Garcia\u0027, \u0027NY\u0027, \u0027Bankruptcy\u0027, \u0027closed\u0027, \u00272020-01-01\u0027, \u00272020-06-01\u0027), (2, \u0027Avery\u0027, \u0027Washington\u0027, \u0027CA\u0027, \u0027Bankruptcy\u0027, \u0027open\u0027, \u00272019-01-01\u0027, NULL), (3, \u0027Jessica\u0027, \u0027Harris\u0027, \u0027NY\u0027, \u0027Family Law\u0027, \u0027closed\u0027, \u00272021-01-01\u0027, \u00272021-06-01\u0027);", + "sql": "SELECT State, ClientFirstName, ClientLastName, DATEDIFF(CloseDate, OpenDate) AS DaysOpen FROM Cases WHERE PracticeArea \u003d \u0027Family Law\u0027 ORDER BY State, DaysOpen;", + "sql_explanation": "The SQL query selects the State, ClientFirstName, ClientLastName, and the difference between the CloseDate and OpenDate columns (aliased as DaysOpen) from the Cases table where PracticeArea is \u0027Family Law\u0027, and orders the results by the State and DaysOpen columns in ascending order." +}, { + "id": "1714", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new case outcome records", + "sql_context": "CREATE TABLE case_outcomes (case_id INT, outcome TEXT, precedent TEXT);", + "sql": "INSERT INTO case_outcomes (case_id, outcome, precedent) VALUES (1, \u0027Won\u0027, \u0027Precedent A\u0027), (2, \u0027Lost\u0027, \u0027Precedent B\u0027), (3, \u0027Settled\u0027, \u0027Precedent C\u0027);", + "sql_explanation": "This SQL statement inserts three new records into the case_outcomes table. Each record represents a different case outcome, along with the precedent associated with that outcome." +}, { + "id": "3722", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all client demographics for clients who have had cases with a precedent set by Judge \u0027Brown\u0027 or \u0027Garcia\u0027.", + "sql_context": "CREATE TABLE clients (id INT, first_name VARCHAR(20), last_name VARCHAR(20), judge_name VARCHAR(20)); INSERT INTO clients (id, first_name, last_name, judge_name) VALUES (1, \u0027Jacob\u0027, \u0027Davis\u0027, \u0027Brown\u0027), (2, \u0027Emily\u0027, \u0027Johnson\u0027, \u0027Garcia\u0027), (3, \u0027Daniel\u0027, \u0027Lee\u0027, \u0027Brown\u0027), (4, \u0027Ava\u0027, \u0027Patel\u0027, \u0027Garcia\u0027), (5, \u0027Lucas\u0027, \u0027Singh\u0027, \u0027Garcia\u0027), (6, \u0027Mia\u0027, \u0027Kim\u0027, \u0027Brown\u0027);", + "sql": "SELECT DISTINCT first_name, last_name FROM clients WHERE judge_name IN (\u0027Brown\u0027, \u0027Garcia\u0027);", + "sql_explanation": "We filter the clients table for clients with a precedent set by Judge \u0027Brown\u0027 or \u0027Garcia\u0027 using the WHERE clause and the IN operator. Then, we select the first_name and last_name columns. The DISTINCT keyword is used to ensure each client appears only once." +}, { + "id": "4015", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the total number of cases and billing amounts for cases that were not resolved by trial.", + "sql_context": "CREATE TABLE CaseResolutions (CaseID INT, CaseType VARCHAR(20), Resolution VARCHAR(20), BillingAmount DECIMAL(10,2)); INSERT INTO CaseResolutions (CaseID, CaseType, Resolution, BillingAmount) VALUES (1, \u0027Civil\u0027, \u0027Plaintiff Verdict\u0027, 7000.00), (2, \u0027Civil\u0027, \u0027Settlement\u0027, 3000.00);", + "sql": "SELECT COUNT(*), SUM(BillingAmount) FROM CaseResolutions WHERE Resolution !\u003d \u0027Trial\u0027;", + "sql_explanation": "The SQL query counts the number of cases and adds up the billing amounts for cases not resolved by trial by using the COUNT() and SUM() functions and the !\u003d operator." +}, { + "id": "4018", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the \u0027billing\u0027 table to set the \u0027total_cost\u0027 field to \u00272500\u0027 where the client\u0027s ID is 345 and the case type is \u0027divorce\u0027.", + "sql_context": "CREATE TABLE billing (bill_id INT PRIMARY KEY AUTO_INCREMENT, client_id INT, case_type VARCHAR(50), total_cost DECIMAL(10,2));", + "sql": "UPDATE billing SET total_cost \u003d 2500 WHERE client_id \u003d 345 AND case_type \u003d \u0027divorce\u0027;", + "sql_explanation": "1. The UPDATE statement is used to modify records in the billing table. 2. The SET clause sets the total_cost to 2500 for the records that meet the conditions specified in the WHERE clause. 3. The WHERE clause filters the records based on the client_id (345) and the case_type (\u0027divorce\u0027)." +}, { + "id": "4295", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average billing amount for cases in \u0027California\u0027?", + "sql_context": "CREATE TABLE cases (case_id INT, state VARCHAR(50), billing_amount DECIMAL(10,2)); INSERT INTO cases (case_id, state, billing_amount) VALUES (1, \u0027California\u0027, 3000.00), (2, \u0027New York\u0027, 6000.00), (3, \u0027California\u0027, 7000.00), (4, \u0027Texas\u0027, 1000.00);", + "sql": "SELECT AVG(billing_amount) as avg_billing FROM cases WHERE state \u003d \u0027California\u0027;", + "sql_explanation": "This query calculates the average billing amount for cases in \u0027California\u0027. It does this by averaging the billing_amount from the cases table, filtering by state name." +}, { + "id": "4331", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many cases were handled by attorneys who graduated from law school between 2000 and 2010?", + "sql_context": "CREATE TABLE Attorneys (AttorneyID INT, LawDegreeDate DATE);", + "sql": "SELECT COUNT(*) FROM Attorneys WHERE YEAR(LawDegreeDate) BETWEEN 2000 AND 2010;", + "sql_explanation": "The SQL query counts the number of attorneys from the \u0027Attorneys\u0027 table who graduated from law school between 2000 and 2010 by using the YEAR() function to extract the year from the \u0027LawDegreeDate\u0027 column." +}, { + "id": "4666", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the average billing amount for cases in the \u0027Personal Injury\u0027 category", + "sql_context": "CREATE TABLE cases (case_id INT, category VARCHAR(50), billing_amount INT); INSERT INTO cases (case_id, category, billing_amount) VALUES (1, \u0027Personal Injury\u0027, 5000), (2, \u0027Civil Litigation\u0027, 7000);", + "sql": "SELECT AVG(billing_amount) FROM cases WHERE category \u003d \u0027Personal Injury\u0027;", + "sql_explanation": "This SQL query calculates the average billing amount for cases in the \u0027Personal Injury\u0027 category by taking the average of the \u0027billing_amount\u0027 column where the \u0027category\u0027 is \u0027Personal Injury\u0027." +}, { + "id": "4675", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the minimum billing amount for cases in the \u0027Personal Injury\u0027 category", + "sql_context": "CREATE TABLE cases (case_id INT, category VARCHAR(50), billing_amount INT); INSERT INTO cases (case_id, category, billing_amount) VALUES (1, \u0027Personal Injury\u0027, 5000), (2, \u0027Civil Litigation\u0027, 7000);", + "sql": "SELECT MIN(billing_amount) FROM cases WHERE category \u003d \u0027Personal Injury\u0027;", + "sql_explanation": "This SQL query calculates the minimum billing amount for cases in the \u0027Personal Injury\u0027 category by finding the minimum value in the \u0027billing_amount\u0027 column of the \u0027cases\u0027 table where the \u0027category\u0027 is \u0027Personal Injury\u0027." +}, { + "id": "4860", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum hourly rate for attorneys who have won at least one case?", + "sql_context": "CREATE TABLE Attorneys (AttorneyID int, HourlyRate decimal(5,2), Wins int); INSERT INTO Attorneys (AttorneyID, HourlyRate, Wins) VALUES (1, 300.00, 1), (2, 250.00, 0), (3, 350.00, 2);", + "sql": "SELECT MIN(HourlyRate) AS MinHourlyRate FROM Attorneys WHERE Wins \u003e 0;", + "sql_explanation": "Finds the minimum hourly rate for attorneys who have won at least one case. It filters for attorneys with at least one win and then calculates the minimum hourly rate from the resulting set." +}, { + "id": "4862", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum billing amount for cases in the real estate category?", + "sql_context": "CREATE TABLE cases (case_id INT, category VARCHAR(20), billing_amount DECIMAL(10, 2));", + "sql": "SELECT MIN(billing_amount) FROM cases WHERE category \u003d \u0027real estate\u0027;", + "sql_explanation": "This query calculates the minimum billing amount for cases in the \u0027real estate\u0027 category by finding the lowest billing_amount value in the cases table where the category is \u0027real estate\u0027." +}, { + "id": "4871", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum billing amount for clients in the \u0027los angeles\u0027 region?", + "sql_context": "CREATE TABLE clients (id INT, name TEXT, region TEXT, billing_amount DECIMAL(10, 2)); INSERT INTO clients (id, name, region, billing_amount) VALUES (1, \u0027Alice\u0027, \u0027los angeles\u0027, 200.00), (2, \u0027Bob\u0027, \u0027los angeles\u0027, 300.00), (3, \u0027Charlie\u0027, \u0027los angeles\u0027, 400.00);", + "sql": "SELECT MAX(billing_amount) FROM clients WHERE region \u003d \u0027los angeles\u0027;", + "sql_explanation": "This query calculates the maximum billing amount for clients in the \u0027los angeles\u0027 region by selecting the maximum billing_amount value for clients in that region." +}, { + "id": "4961", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum billing amount for cases in the \u0027Civil Law\u0027 category?", + "sql_context": "CREATE TABLE cases (case_id INT, category VARCHAR(20), billing_amount DECIMAL(5,2)); INSERT INTO cases (case_id, category, billing_amount) VALUES (1, \u0027Criminal Law\u0027, 1500.00), (2, \u0027Civil Law\u0027, 3000.00);", + "sql": "SELECT MAX(billing_amount) FROM cases WHERE category \u003d \u0027Civil Law\u0027;", + "sql_explanation": "This query filters the \u0027cases\u0027 table for rows with a category of \u0027Civil Law\u0027 and calculates the maximum billing amount using the MAX function." +}, { + "id": "5264", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of cases handled by attorneys who identify as male?", + "sql_context": "CREATE TABLE attorneys (attorney_id INT, gender VARCHAR(10), total_cases INT); INSERT INTO attorneys (attorney_id, gender, total_cases) VALUES (1, \u0027Female\u0027, 15), (2, \u0027Male\u0027, 20), (3, \u0027Male\u0027, 10);", + "sql": "SELECT MAX(total_cases) FROM attorneys WHERE gender \u003d \u0027Male\u0027;", + "sql_explanation": "The SQL query calculates the maximum number of cases handled by attorneys with gender \u0027Male\u0027 from the attorneys table." +}, { + "id": "5294", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total billing amount for cases in the year 2020?", + "sql_context": "CREATE TABLE cases (case_id INT, case_year INT, billing_amount INT);", + "sql": "SELECT SUM(billing_amount) FROM cases WHERE case_year \u003d 2020;", + "sql_explanation": "This SQL query calculates the total billing amount for cases in the year 2020. It does this by using the SUM function on the billing_amount column, and filtering the rows to only include cases where the case_year is 2020 using the WHERE clause." +}, { + "id": "5318", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average billing rate for attorneys in the NY office?", + "sql_context": "CREATE TABLE attorneys (id INT, name VARCHAR(255), office VARCHAR(255), billing_rate FLOAT); INSERT INTO attorneys (id, name, office, billing_rate) VALUES (1, \u0027Brown\u0027, \u0027NY\u0027, 300.00), (2, \u0027Smith\u0027, \u0027NY\u0027, 350.00), (3, \u0027Johnson\u0027, \u0027LA\u0027, 400.00);", + "sql": "SELECT AVG(billing_rate) FROM attorneys WHERE office \u003d \u0027NY\u0027;", + "sql_explanation": "The query calculates the average billing rate for attorneys in the NY office by averaging the billing rate where the office is NY." +}, { + "id": "5399", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total billing amount for cases handled by the attorney with the ID 3?", + "sql_context": "CREATE TABLE Attorneys (AttorneyID INT, Name VARCHAR(50)); INSERT INTO Attorneys (AttorneyID, Name) VALUES (1, \u0027Smith, John\u0027), (2, \u0027Garcia, Maria\u0027), (3, \u0027Li, Wei\u0027); CREATE TABLE Cases (CaseID INT, AttorneyID INT, BillingAmount DECIMAL(10, 2)); INSERT INTO Cases (CaseID, AttorneyID, BillingAmount) VALUES (1, 1, 5000.00), (2, 2, 3500.00), (3, 3, 4000.00), (4, 3, 6000.00);", + "sql": "SELECT SUM(BillingAmount) FROM Cases WHERE AttorneyID \u003d 3;", + "sql_explanation": "Filter the Cases table to only include rows where the AttorneyID is 3 and then calculate the sum of the BillingAmount for those rows." +}, { + "id": "5481", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the case type for case_id 1 to \u0027Family\u0027.", + "sql_context": "CREATE TABLE cases (case_id INT, case_type VARCHAR(255)); INSERT INTO cases (case_id, case_type) VALUES (1, \u0027Civil\u0027), (2, \u0027Criminal\u0027);", + "sql": "UPDATE cases SET case_type \u003d \u0027Family\u0027 WHERE case_id \u003d 1;", + "sql_explanation": "The SQL query updates the case_type field for the record with case_id 1 to be \u0027Family\u0027." +}, { + "id": "5594", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Remove all clients with the last name \u0027Lee\u0027 from the \u0027clients\u0027 table", + "sql_context": "CREATE TABLE clients (client_id INT PRIMARY KEY, client_first_name VARCHAR(50), client_last_name VARCHAR(50)); INSERT INTO clients (client_id, client_first_name, client_last_name) VALUES (1, \u0027Grace\u0027, \u0027Lee\u0027), (2, \u0027James\u0027, \u0027Lee\u0027), (3, \u0027Jasmine\u0027, \u0027Wong\u0027), (4, \u0027Henry\u0027, \u0027Chen\u0027);", + "sql": "DELETE FROM clients WHERE client_last_name \u003d \u0027Lee\u0027;", + "sql_explanation": "This query deletes all records from the \u0027clients\u0027 table where the \u0027client_last_name\u0027 column has a value of \u0027Lee\u0027." +}, { + "id": "5648", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum billing amount for family law cases?", + "sql_context": "CREATE TABLE family_law_cases (case_id INT, billing_amount DECIMAL(10,2)); INSERT INTO family_law_cases (case_id, billing_amount) VALUES (1, 3000.00), (2, 4500.00), (3, 8000.00);", + "sql": "SELECT MAX(billing_amount) FROM family_law_cases;", + "sql_explanation": "This SQL query calculates the maximum billing amount for family law cases by using the MAX function on the billing_amount column in the family_law_cases table." +}, { + "id": "5814", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all cases with a fee over $5000 from the \u0027cases\u0027 table", + "sql_context": "CREATE TABLE cases (case_id INT PRIMARY KEY, case_name VARCHAR(50), fee DECIMAL(7,2)); INSERT INTO cases (case_id, case_name, fee) VALUES (1, \u0027Case1\u0027, 4000.00), (2, \u0027Case2\u0027, 6000.00), (3, \u0027Case3\u0027, 3000.00);", + "sql": "DELETE FROM cases WHERE fee \u003e 5000;", + "sql_explanation": "This query deletes all records from the \u0027cases\u0027 table where the \u0027fee\u0027 column has a value greater than 5000." +}, { + "id": "2314", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of marine species in the \u0027Coral Reef\u0027 and \u0027Open Ocean\u0027 habitats?", + "sql_context": "CREATE TABLE marine_species (id INT, name VARCHAR(255), habitat_type VARCHAR(255), average_depth FLOAT); INSERT INTO marine_species (id, name, habitat_type, average_depth) VALUES (1, \u0027Clownfish\u0027, \u0027Coral Reef\u0027, 20.0); INSERT INTO marine_species (id, name, habitat_type, average_depth) VALUES (2, \u0027Blue Whale\u0027, \u0027Open Ocean\u0027, 200.0); INSERT INTO marine_species (id, name, habitat_type, average_depth) VALUES (3, \u0027Sea Turtle\u0027, \u0027Coral Reef\u0027, 10.0);", + "sql": "SELECT SUM(CASE WHEN ms.habitat_type IN (\u0027Coral Reef\u0027, \u0027Open Ocean\u0027) THEN 1 ELSE 0 END) as total_species FROM marine_species ms;", + "sql_explanation": "This SQL query retrieves the total number of marine species in the \u0027Coral Reef\u0027 and \u0027Open Ocean\u0027 habitats. It does so by using a CASE statement to check if the habitat_type is either \u0027Coral Reef\u0027 or \u0027Open Ocean\u0027. If the condition is true, the query increments the count by 1, otherwise it increments the count by 0. The query then uses the SUM() function to calculate the total count of marine species in the specified habitats." +}, { + "id": "2967", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum coral cover for the last 10 years?", + "sql_context": "CREATE TABLE coral_cover (year INT, coral_cover FLOAT); INSERT INTO coral_cover (year, coral_cover) VALUES (2011, 25.0), (2012, 23.5), (2013, 22.2), (2014, 21.9), (2015, 21.1), (2016, 20.4), (2017, 19.8), (2018, 19.2), (2019, 18.8), (2020, 18.5);", + "sql": "SELECT MIN(coral_cover) FROM coral_cover WHERE year BETWEEN (YEAR(CURRENT_DATE) - 10) AND YEAR(CURRENT_DATE);", + "sql_explanation": "This query retrieves the minimum coral cover for the last 10 years by selecting the minimum coral_cover value from the coral_cover table where the year is within the last 10 years." +}, { + "id": "3170", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average depth of the Indian Ocean and the Southern Ocean?", + "sql_context": "CREATE TABLE ocean_depths (ocean VARCHAR(255), avg_depth FLOAT); INSERT INTO ocean_depths (ocean, avg_depth) VALUES (\u0027Indian Ocean\u0027, 3962.2); INSERT INTO ocean_depths (ocean, avg_depth) VALUES (\u0027Southern Ocean\u0027, 3487.3);", + "sql": "SELECT AVG(avg_depth) as avg_depth FROM ocean_depths WHERE ocean IN (\u0027Indian Ocean\u0027, \u0027Southern Ocean\u0027);", + "sql_explanation": "We calculate the average depth of the Indian and Southern Oceans by finding the average of their average depths." +}, { + "id": "3473", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum population size of marine mammals that are threatened or endangered?", + "sql_context": "CREATE TABLE marine_mammals (mammal_id INT, name VARCHAR(50), population INT, status VARCHAR(20)); INSERT INTO marine_mammals (mammal_id, name, population, status) VALUES (1, \u0027Blue Whale\u0027, 10000, \u0027Endangered\u0027), (2, \u0027Fin Whale\u0027, 25000, \u0027Threatened\u0027), (3, \u0027Sei Whale\u0027, 15000, \u0027Threatened\u0027);", + "sql": "SELECT MAX(population) FROM marine_mammals WHERE status \u003d \u0027Endangered\u0027 OR status \u003d \u0027Threatened\u0027;", + "sql_explanation": "This SQL query calculates the maximum population size of marine mammals that are threatened or endangered by using the MAX function on the population column and filtering the marine_mammals table using the WHERE clause." +}, { + "id": "3517", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the maximum and minimum conservation scores for marine species.", + "sql_context": "CREATE TABLE species (id INT, name VARCHAR(255), conservation_score INT);", + "sql": "SELECT MAX(conservation_score) AS max_score, MIN(conservation_score) AS min_score FROM species;", + "sql_explanation": "This query calculates the maximum and minimum values of the conservation_score column in the species table." +}, { + "id": "3595", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of marine species located in the Atlantic and Pacific Oceans?", + "sql_context": "CREATE TABLE marine_species (id INT, species_name VARCHAR(255), location VARCHAR(255)); INSERT INTO marine_species (id, species_name, location) VALUES (1, \u0027Narwhal\u0027, \u0027Arctic\u0027), (2, \u0027Beluga\u0027, \u0027Arctic\u0027), (3, \u0027Blue Whale\u0027, \u0027Atlantic\u0027), (4, \u0027Giant Pacific Octopus\u0027, \u0027Pacific\u0027);", + "sql": "SELECT COUNT(*) FROM marine_species WHERE marine_species.location IN (\u0027Atlantic\u0027, \u0027Pacific\u0027);", + "sql_explanation": "The query calculates the number of marine species in the Atlantic and Pacific Oceans by counting the rows in the marine_species table with a location of \u0027Atlantic\u0027 or \u0027Pacific\u0027." +}, { + "id": "3658", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the total population of seals in the North Pacific", + "sql_context": "CREATE TABLE seal_populations (id INT, region TEXT, year INT, population INT);", + "sql": "SELECT SUM(population) FROM seal_populations WHERE region \u003d \u0027North Pacific\u0027 AND year \u003d 2020;", + "sql_explanation": "This query calculates the total population of seals in the North Pacific for the year 2020 by using the SUM function on the population column from the seal_populations table." +}, { + "id": "3716", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum depth of all marine protected areas with a conservation status of \u0027Endangered\u0027?", + "sql_context": "CREATE TABLE marine_protected_areas (id INT, name VARCHAR(255), area_size FLOAT, avg_depth FLOAT, conservation_status VARCHAR(100)); INSERT INTO marine_protected_areas (id, name, area_size, avg_depth, conservation_status) VALUES (1, \u0027Coral Triangle\u0027, 518000, -200, \u0027Least Concern\u0027), (2, \u0027Great Barrier Reef\u0027, 344400, -500, \u0027Critically Endangered\u0027), (3, \u0027Galapagos Marine Reserve\u0027, 133000, -300, \u0027Endangered\u0027);", + "sql": "SELECT MIN(avg_depth) FROM marine_protected_areas WHERE conservation_status \u003d \u0027Endangered\u0027;", + "sql_explanation": "This query calculates the minimum value of the avg_depth column in the marine_protected_areas table for rows with a conservation status of \u0027Endangered\u0027." +}, { + "id": "4141", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum sea surface temperature recorded in the Indian Ocean?", + "sql_context": "CREATE TABLE sea_surface_temperature (region TEXT, temperature NUMERIC); INSERT INTO sea_surface_temperature (region, temperature) VALUES (\u0027Indian Ocean\u0027, \u002729.5\u0027); INSERT INTO sea_surface_temperature (region, temperature) VALUES (\u0027Indian Ocean\u0027, \u002730.3\u0027);", + "sql": "SELECT MAX(temperature) FROM sea_surface_temperature WHERE region \u003d \u0027Indian Ocean\u0027;", + "sql_explanation": "This query selects the maximum value of the \u0027temperature\u0027 column for all records where the \u0027region\u0027 is the \u0027Indian Ocean\u0027." +}, { + "id": "4258", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many marine species are there in the \u0027marine_species\u0027 table without any conservation status?\"", + "sql_context": "CREATE TABLE marine_species (species_name VARCHAR(50), conservation_status VARCHAR(50));", + "sql": "SELECT COUNT(species_name) FROM marine_species WHERE conservation_status IS NULL;", + "sql_explanation": "The SQL query counts the number of records in the \u0027marine_species\u0027 table where the \u0027conservation_status\u0027 column is NULL, indicating species without a conservation status." +}, { + "id": "4417", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum depth of marine protected areas in the Pacific Ocean?", + "sql_context": "CREATE TABLE marine_protected_areas (name VARCHAR(255), area_id INT, depth FLOAT, size INT, country VARCHAR(255)); INSERT INTO marine_protected_areas (name, area_id, depth, size, country) VALUES (\u0027Palau National Marine Sanctuary\u0027, 5, 3000, 500000, \u0027Palau\u0027), (\u0027Phoenix Islands Protected Area\u0027, 6, 5000, 408000, \u0027Kiribati\u0027);", + "sql": "SELECT MAX(depth) FROM marine_protected_areas WHERE country \u003d \u0027Pacific Ocean\u0027;", + "sql_explanation": "This query calculates the maximum depth of marine protected areas in the Pacific Ocean by finding the maximum value in the \u0027depth\u0027 column in the \u0027marine_protected_areas\u0027 table for rows where the \u0027country\u0027 column is \u0027Pacific Ocean\u0027. Note that this assumes \u0027Pacific Ocean\u0027 is a value in the \u0027country\u0027 column, which may not be the case in a real-world scenario." +}, { + "id": "4450", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all the distinct marine species and their observation counts in the Indian Ocean, excluding sharks.", + "sql_context": "CREATE TABLE indian_ocean_marine_life (species VARCHAR(255), count INT); INSERT INTO indian_ocean_marine_life (species, count) VALUES (\u0027Turtle\u0027, 150), (\u0027Shark\u0027, 200), (\u0027Whale\u0027, 100);", + "sql": "SELECT species, count FROM indian_ocean_marine_life WHERE species !\u003d \u0027Shark\u0027;", + "sql_explanation": "The SQL query filters the indian_ocean_marine_life table to exclude rows with the species \u0027Shark\u0027. It then selects the distinct species and their observation counts to display the desired information." +}, { + "id": "4578", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average depth of all marine protected areas in the Pacific region?\u0027", + "sql_context": "CREATE TABLE marine_protected_areas (id INT, name TEXT, region TEXT, avg_depth FLOAT); INSERT INTO marine_protected_areas (id, name, region, avg_depth) VALUES (1, \u0027Galapagos Marine Reserve\u0027, \u0027Pacific\u0027, 200.5); INSERT INTO marine_protected_areas (id, name, region, avg_depth) VALUES (2, \u0027Great Barrier Reef\u0027, \u0027Pacific\u0027, 91.4);", + "sql": "SELECT AVG(avg_depth) FROM marine_protected_areas WHERE region \u003d \u0027Pacific\u0027;", + "sql_explanation": "This query calculates the average depth (avg_depth) of all marine protected areas (marine_protected_areas table) located in the Pacific region." +}, { + "id": "4599", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum depth of marine protected areas in the Caribbean region?", + "sql_context": "CREATE TABLE marine_protected_areas (area_name TEXT, depth_m FLOAT, region TEXT); INSERT INTO marine_protected_areas (area_name, depth_m, region) VALUES (\u0027Bonaire National Marine Park\u0027, 60.0, \u0027Caribbean\u0027), (\u0027Buck Island Reef National Monument\u0027, 90.0, \u0027Caribbean\u0027);", + "sql": "SELECT MAX(depth_m) FROM marine_protected_areas WHERE region \u003d \u0027Caribbean\u0027;", + "sql_explanation": "Filter the marine_protected_areas table by rows where region is \u0027Caribbean\u0027, then return the maximum value of the depth_m column." +}, { + "id": "4663", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum ocean acidity level recorded in the Atlantic Ocean?", + "sql_context": "CREATE TABLE ocean_acidity_2 (region TEXT, acidity NUMERIC); INSERT INTO ocean_acidity_2 (region, acidity) VALUES (\u0027Atlantic Ocean\u0027, \u00278.4\u0027); INSERT INTO ocean_acidity_2 (region, acidity) VALUES (\u0027Atlantic Ocean\u0027, \u00278.35\u0027);", + "sql": "SELECT MAX(acidity) FROM ocean_acidity_2 WHERE region \u003d \u0027Atlantic Ocean\u0027;", + "sql_explanation": "This query selects the maximum value of the \u0027acidity\u0027 column for all records in the \u0027ocean_acidity_2\u0027 table where the \u0027region\u0027 is the \u0027Atlantic Ocean\u0027." +}, { + "id": "4679", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average size of marine protected areas in the Caribbean?", + "sql_context": "CREATE TABLE marine_protected_areas (name VARCHAR(255), area_id INT, depth FLOAT, size INT, country VARCHAR(255)); INSERT INTO marine_protected_areas (name, area_id, depth, size, country) VALUES (\u0027Buck Island Reef National Monument\u0027, 7, 10, 1760, \u0027US Virgin Islands\u0027), (\u0027Montego Bay Marine Park\u0027, 8, 20, 1500, \u0027Jamaica\u0027);", + "sql": "SELECT AVG(size) FROM marine_protected_areas WHERE country \u003d \u0027Caribbean\u0027;", + "sql_explanation": "This query calculates the average size of marine protected areas in the Caribbean by taking the average of the \u0027size\u0027 column in the \u0027marine_protected_areas\u0027 table for rows where the \u0027country\u0027 column is \u0027Caribbean\u0027. Note that this assumes \u0027Caribbean\u0027 is a value in the \u0027country\u0027 column, which may not be the case in a real-world scenario." +}, { + "id": "4780", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average depth of marine protected areas in the Arctic?", + "sql_context": "CREATE TABLE marine_protected_areas (name VARCHAR(255), area_id INT, depth FLOAT, size INT, country VARCHAR(255)); INSERT INTO marine_protected_areas (name, area_id, depth, size, country) VALUES (\u0027Norwegian Arctic Archipelago\u0027, 13, 300, 196000, \u0027Norway\u0027), (\u0027Gulf of Leptev Sea\u0027, 14, 400, 320000, \u0027Russia\u0027);", + "sql": "SELECT AVG(depth) FROM marine_protected_areas WHERE country \u003d \u0027Arctic\u0027;", + "sql_explanation": "This query calculates the average depth of marine protected areas in the Arctic by taking the average of the \u0027depth\u0027 column in the \u0027marine_protected_areas\u0027 table for rows where the \u0027country\u0027 column is \u0027Arctic\u0027. Note that this assumes \u0027Arctic\u0027 is a value in the \u0027country\u0027 column, which may not be the case in a real-world scenario." +}, { + "id": "4831", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many marine species are found in the Atlantic Ocean?", + "sql_context": "CREATE TABLE marine_species (name VARCHAR(50), common_name VARCHAR(50), location VARCHAR(50));", + "sql": "SELECT COUNT(*) FROM marine_species WHERE location \u003d \u0027Atlantic Ocean\u0027;", + "sql_explanation": "This query counts the number of marine species found in the Atlantic Ocean by using the COUNT() function with a WHERE clause." +}, { + "id": "4903", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum ocean acidity level in the Southern Ocean?", + "sql_context": "CREATE TABLE ocean_acidity (region varchar(255), level decimal(10,2)); INSERT INTO ocean_acidity (region, level) VALUES (\u0027Southern Ocean\u0027, 8.30), (\u0027Arctic\u0027, 8.20), (\u0027Indian\u0027, 8.15);", + "sql": "SELECT MAX(level) FROM ocean_acidity WHERE region \u003d \u0027Southern Ocean\u0027;", + "sql_explanation": "This SQL query calculates the maximum ocean acidity level in the Southern Ocean by selecting the maximum value of the level column from the ocean_acidity table where the region is \u0027Southern Ocean\u0027." +}, { + "id": "5171", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many marine research stations are in the Pacific Ocean?", + "sql_context": "CREATE TABLE research_stations (station_name VARCHAR(50), ocean VARCHAR(20)); INSERT INTO research_stations (station_name, ocean) VALUES (\u0027Hawaii Ocean Observing System\u0027, \u0027Pacific\u0027), (\u0027Monterey Bay Aquarium Research Institute\u0027, \u0027Pacific\u0027);", + "sql": "SELECT COUNT(*) FROM research_stations WHERE ocean \u003d \u0027Pacific\u0027;", + "sql_explanation": "Count the number of records in the \u0027research_stations\u0027 table where the \u0027ocean\u0027 column value is \u0027Pacific\u0027." +}, { + "id": "5204", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of marine protected areas in the Caribbean region?", + "sql_context": "CREATE TABLE ProtectedAreas(id INT, name VARCHAR(50), region VARCHAR(30)); INSERT INTO ProtectedAreas(id, name, region) VALUES (1, \u0027Bonaire National Marine Park\u0027, \u0027Caribbean\u0027), (2, \u0027Bahamas Exuma Cays Land and Sea Park\u0027, \u0027Caribbean\u0027), (3, \u0027Galapagos Marine Reserve\u0027, \u0027South America\u0027);", + "sql": "SELECT COUNT(*) FROM ProtectedAreas WHERE region \u003d \u0027Caribbean\u0027;", + "sql_explanation": "This query calculates the total number of marine protected areas in the Caribbean region. It filters the data by region and counts the number of records for the Caribbean." +}, { + "id": "5210", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum ocean acidity level in the Pacific Ocean?", + "sql_context": "CREATE TABLE ocean_acidity (region varchar(255), level decimal(10,2)); INSERT INTO ocean_acidity (region, level) VALUES (\u0027Pacific\u0027, 8.25), (\u0027Southern Ocean\u0027, 8.30), (\u0027Indian\u0027, 8.15);", + "sql": "SELECT MAX(level) FROM ocean_acidity WHERE region \u003d \u0027Pacific\u0027;", + "sql_explanation": "This SQL query calculates the maximum ocean acidity level in the Pacific Ocean by selecting the maximum value of the level column from the ocean_acidity table where the region is \u0027Pacific\u0027." +}, { + "id": "5313", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of marine species observed in the Arctic?", + "sql_context": "CREATE TABLE marine_species (species_name TEXT, region TEXT); INSERT INTO marine_species (species_name, region) VALUES (\u0027Narwhal\u0027, \u0027Arctic\u0027); INSERT INTO marine_species (species_name, region) VALUES (\u0027Polar Bear\u0027, \u0027Arctic\u0027);", + "sql": "SELECT COUNT(*) FROM marine_species WHERE region \u003d \u0027Arctic\u0027;", + "sql_explanation": "This query counts all records in the \u0027marine_species\u0027 table where the \u0027region\u0027 is the \u0027Arctic\u0027." +}, { + "id": "5342", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many conservation efforts were made in 2010?", + "sql_context": "CREATE TABLE conservation_efforts (effort_id INT, species_name VARCHAR(50), year INT, description TEXT); INSERT INTO conservation_efforts (effort_id, species_name, year, description) VALUES (1, \u0027Turtle\u0027, 2005, \u0027Hawaiian green turtle recovery\u0027), (2, \u0027Clownfish\u0027, 2010, \u0027Clownfish conservation program\u0027);", + "sql": "SELECT COUNT(*) FROM conservation_efforts WHERE year \u003d 2010;", + "sql_explanation": "This query counts the total number of rows in the conservation_efforts table with the year equal to 2010 using the WHERE clause and COUNT() function." +}, { + "id": "5372", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records from the \"oceanography\" table where the \"ocean_name\" is \"Indian Ocean\"", + "sql_context": "CREATE TABLE oceanography (id INT PRIMARY KEY, ocean_name VARCHAR(255), depth FLOAT); INSERT INTO oceanography (id, ocean_name, depth) VALUES (1, \u0027Pacific Ocean\u0027, 3970); INSERT INTO oceanography (id, ocean_name, depth) VALUES (2, \u0027Indian Ocean\u0027, 3960);", + "sql": "DELETE FROM oceanography WHERE ocean_name \u003d \u0027Indian Ocean\u0027;", + "sql_explanation": "This query deletes all records from the \"oceanography\" table where the \"ocean_name\" is \"Indian Ocean\"." +}, { + "id": "5451", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum depth of the Arctic Ocean?", + "sql_context": "CREATE TABLE oceans (name TEXT, max_depth REAL); INSERT INTO oceans (name, max_depth) VALUES (\u0027Pacific Ocean\u0027, 10994), (\u0027Atlantic Ocean\u0027, 8605), (\u0027Indian Ocean\u0027, 7490), (\u0027Southern Ocean\u0027, 7280), (\u0027Arctic Ocean\u0027, 5527);", + "sql": "SELECT max_depth FROM oceans WHERE name \u003d \u0027Arctic Ocean\u0027;", + "sql_explanation": "This query retrieves the max_depth value for the \u0027Arctic Ocean\u0027 row in the oceans table using the WHERE clause to filter by name." +}, { + "id": "5644", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum depth at which a marine species can be found?", + "sql_context": "CREATE TABLE marine_species_depths (species TEXT, min_depth FLOAT);", + "sql": "SELECT MIN(min_depth) FROM marine_species_depths;", + "sql_explanation": "This query calculates the minimum depth at which a marine species can be found by taking the minimum value of the \u0027min_depth\u0027 column in the \u0027marine_species_depths\u0027 table." +}, { + "id": "5645", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum depth at which any marine species is found?", + "sql_context": "CREATE TABLE marine_species_depths (species VARCHAR(255), max_depth FLOAT); INSERT INTO marine_species_depths (species, max_depth) VALUES (\u0027Anglerfish\u0027, 3000.0), (\u0027Giant Squid\u0027, 3000.0);", + "sql": "SELECT MAX(max_depth) FROM marine_species_depths;", + "sql_explanation": "This query calculates the maximum depth at which any marine species is found. It does this by selecting the maximum value of the max_depth column." +}, { + "id": "5704", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average size of all marine turtles?", + "sql_context": "CREATE TABLE marine_turtles (id INT, name TEXT, average_size FLOAT); INSERT INTO marine_turtles (id, name, average_size) VALUES (1, \u0027Leatherback\u0027, 200), (2, \u0027Loggerhead\u0027, 90), (3, \u0027Green\u0027, 120), (4, \u0027Hawksbill\u0027, 80), (5, \u0027Olive Ridley\u0027, 70);", + "sql": "SELECT AVG(average_size) FROM marine_turtles;", + "sql_explanation": "Calculate the average value of the \u0027average_size\u0027 column in the \u0027marine_turtles\u0027 table." +}, { + "id": "5782", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum depth of any ocean feature?", + "sql_context": "CREATE TABLE ocean_features (name TEXT, depth FLOAT); INSERT INTO ocean_features (name, depth) VALUES (\u0027Mariana Trench\u0027, 10994.0), (\u0027Puerto Rico Trench\u0027, 8605.0), (\u0027Siberian Traps\u0027, 3000.0);", + "sql": "SELECT MIN(depth) FROM ocean_features;", + "sql_explanation": "This query calculates the minimum depth of any ocean feature by finding the minimum depth value in the ocean_features table." +}, { + "id": "2839", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average ticket price for jazz concerts in Canada?", + "sql_context": "CREATE TABLE Concerts (id INT, city VARCHAR(50), genre VARCHAR(50), price DECIMAL(5,2)); INSERT INTO Concerts (id, city, genre, price) VALUES (1, \u0027Toronto\u0027, \u0027Jazz\u0027, 60.00), (2, \u0027Vancouver\u0027, \u0027Jazz\u0027, 65.00), (3, \u0027Montreal\u0027, \u0027Jazz\u0027, 55.00), (4, \u0027Ottawa\u0027, \u0027Jazz\u0027, 70.00);", + "sql": "SELECT AVG(price) FROM Concerts WHERE city IN (\u0027Toronto\u0027, \u0027Vancouver\u0027, \u0027Montreal\u0027, \u0027Ottawa\u0027) AND genre \u003d \u0027Jazz\u0027;", + "sql_explanation": "The SQL query calculates the average ticket price for jazz concerts in Canada by using the AVG function and filtering the data with the WHERE clause. The IN keyword is used to filter data from multiple cities in Canada." +}, { + "id": "3513", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the sum of streams for Hip Hop songs in the USA in 2021?", + "sql_context": "CREATE TABLE Streaming (country VARCHAR(50), year INT, genre VARCHAR(50), streams INT); INSERT INTO Streaming (country, year, genre, streams) VALUES (\u0027USA\u0027, 2021, \u0027Hip Hop\u0027, 1200000); INSERT INTO Streaming (country, year, genre, streams) VALUES (\u0027USA\u0027, 2021, \u0027Hip Hop\u0027, 1300000);", + "sql": "SELECT SUM(streams) FROM Streaming WHERE country \u003d \u0027USA\u0027 AND year \u003d 2021 AND genre \u003d \u0027Hip Hop\u0027;", + "sql_explanation": "Find the sum of streams for Hip Hop songs in the USA in 2021 by selecting the sum of streams in the Streaming table where country is \u0027USA\u0027, year is 2021 and genre is \u0027Hip Hop\u0027." +}, { + "id": "3534", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all concerts in the United Kingdom with a capacity greater than 10,000.", + "sql_context": "CREATE TABLE concerts (concert_id INT, concert_name VARCHAR(255), country VARCHAR(255), capacity INT); INSERT INTO concerts (concert_id, concert_name, country, capacity) VALUES (1, \u0027Pop Music Festival\u0027, \u0027UK\u0027, 15000), (2, \u0027Rock Music Festival\u0027, \u0027France\u0027, 20000), (3, \u0027Jazz Music Festival\u0027, \u0027UK\u0027, 8000), (4, \u0027Hip Hop Music Festival\u0027, \u0027Germany\u0027, 12000);", + "sql": "SELECT concert_name, country, capacity FROM concerts WHERE country \u003d \u0027UK\u0027 AND capacity \u003e 10000;", + "sql_explanation": "This query selects all rows from the \u0027concerts\u0027 table where the \u0027country\u0027 column is \u0027UK\u0027 and the \u0027capacity\u0027 column is greater than 10,000. It returns the concert name, country, and capacity." +}, { + "id": "3988", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum ticket price for concerts in Argentina for artist \u0027Bad Bunny\u0027?", + "sql_context": "CREATE TABLE Concerts (id INT, artist VARCHAR(100), location VARCHAR(100), price DECIMAL(5,2)); INSERT INTO Concerts (id, artist, location, price) VALUES (1, \u0027Bad Bunny\u0027, \u0027Argentina\u0027, 90.00), (2, \u0027Bad Bunny\u0027, \u0027Argentina\u0027, 110.00);", + "sql": "SELECT MIN(price) FROM Concerts WHERE location \u003d \u0027Argentina\u0027 AND artist \u003d \u0027Bad Bunny\u0027", + "sql_explanation": "This query calculates the minimum ticket price for concerts in Argentina for artist \u0027Bad Bunny\u0027 by selecting the \u0027price\u0027 column from the \u0027Concerts\u0027 table where the \u0027location\u0027 is \u0027Argentina\u0027 and the \u0027artist\u0027 is \u0027Bad Bunny\u0027 and then finding the minimum value using the MIN() function." +}, { + "id": "4418", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of unique users who have streamed songs from artists who identify as LGBTQ+ in the \u0027music_streaming\u0027 table.", + "sql_context": "CREATE TABLE music_streaming (stream_id INT, user_id INT, song_id INT, streams INT, date DATE, artist_id INT, artist_lgbtq BOOLEAN);", + "sql": "SELECT COUNT(DISTINCT user_id) FROM music_streaming WHERE artist_lgbtq \u003d true;", + "sql_explanation": "The query first selects the distinct user_id and then filters the data for artists who identify as LGBTQ+. After that, it calculates the count of the distinct user_id." +}, { + "id": "4498", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of streams per user for users in New York who have streamed songs by artists from the Latin genre?", + "sql_context": "CREATE TABLE Users (id INT, state VARCHAR(255), genre VARCHAR(255), streams INT);", + "sql": "SELECT AVG(streams) FROM Users WHERE state \u003d \u0027New York\u0027 AND genre \u003d \u0027Latin\u0027;", + "sql_explanation": "The SQL query filters the Users table for rows where the state is New York and the genre is Latin, then calculates the average of the streams column." +}, { + "id": "4589", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the top 3 cities with the highest total ticket sales.", + "sql_context": "CREATE TABLE concerts (id INT, country VARCHAR(255), city VARCHAR(255), artist_name VARCHAR(255), tier VARCHAR(255), price DECIMAL(10,2), num_tickets INT); CREATE VIEW city_sales AS SELECT city, SUM(price * num_tickets) AS total_sales FROM concerts GROUP BY city;", + "sql": "SELECT city, total_sales FROM city_sales ORDER BY total_sales DESC LIMIT 3;", + "sql_explanation": "This query shows the top 3 cities with the highest total ticket sales by first creating a view of city and total sales, then selecting city and total sales and sorting by total sales in descending order, and limiting to the top 3 results." +}, { + "id": "4684", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the names of artists who had more than 10 million streams in 2020.", + "sql_context": "CREATE TABLE Streams (artist_name VARCHAR(50), year INT, streams INT); INSERT INTO Streams (artist_name, year, streams) VALUES (\u0027Taylor Swift\u0027, 2020, 12000000), (\u0027Drake\u0027, 2020, 15000000), (\u0027BTS\u0027, 2020, 20000000), (\u0027Billie Eilish\u0027, 2020, 11000000);", + "sql": "SELECT artist_name FROM Streams WHERE year \u003d 2020 AND streams \u003e 10000000;", + "sql_explanation": "This query retrieves the names of artists who had more than 10 million streams in 2020 by selecting the artist_name values from the Streams table where the year is 2020 and the streams value is greater than 10 million." +}, { + "id": "4721", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find total tickets sold for all concerts in \u0027New York City\u0027", + "sql_context": "CREATE TABLE concerts (concert_id INT PRIMARY KEY, artist_name VARCHAR(100), concert_date DATE, location VARCHAR(100), tickets_sold INT); INSERT INTO concerts (concert_id, artist_name, concert_date, location, tickets_sold) VALUES (1, \u0027Taylor Swift\u0027, \u00272023-06-15\u0027, \u0027New York City\u0027, 25000); INSERT INTO concerts (concert_id, artist_name, concert_date, location, tickets_sold) VALUES (2, \u0027BTS\u0027, \u00272023-07-01\u0027, \u0027Los Angeles\u0027, 30000); INSERT INTO concerts (concert_id, artist_name, concert_date, location, tickets_sold) VALUES (3, \u0027Drake\u0027, \u00272023-08-10\u0027, \u0027New York City\u0027, 18000);", + "sql": "SELECT SUM(tickets_sold) FROM concerts WHERE location \u003d \u0027New York City\u0027;", + "sql_explanation": "This statement calculates the total tickets sold for all concerts in \u0027New York City\u0027 by selecting the sum of the \u0027tickets_sold\u0027 column where the \u0027location\u0027 column is equal to \u0027New York City\u0027." +}, { + "id": "4775", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the city for all concerts in the \u0027Pop\u0027 genre to \u0027New York\u0027.", + "sql_context": "CREATE SCHEMA if not exists music_schema;CREATE TABLE if not exists concerts (id INT, name VARCHAR, city VARCHAR, genre VARCHAR, revenue FLOAT);INSERT INTO concerts (id, name, city, genre, revenue) VALUES (1, \u0027Music Festival\u0027, \u0027New York\u0027, \u0027Pop\u0027, 50000.00), (2, \u0027Rock Concert\u0027, \u0027Chicago\u0027, \u0027Rock\u0027, 75000.00), (3, \u0027Jazz Festival\u0027, \u0027Los Angeles\u0027, \u0027Jazz\u0027, 125000.00), (4, \u0027Hip Hop Concert\u0027, \u0027Miami\u0027, \u0027Hip Hop\u0027, 60000.00), (5, \u0027Country Music Festival\u0027, \u0027Nashville\u0027, \u0027Country\u0027, 40000.00), (6, \u0027EDM Festival\u0027, \u0027Las Vegas\u0027, \u0027EDM\u0027, 80000.00), (7, \u0027Pop Concert\u0027, \u0027Los Angeles\u0027, \u0027Pop\u0027, 70000.00), (8, \u0027Rock Festival\u0027, \u0027Chicago\u0027, \u0027Rock\u0027, 65000.00), (9, \u0027Jazz Concert\u0027, \u0027Los Angeles\u0027, \u0027Jazz\u0027, 110000.00), (10, \u0027Hip Hop Festival\u0027, \u0027Miami\u0027, \u0027Hip Hop\u0027, 75000.00);", + "sql": "UPDATE music_schema.concerts SET city \u003d \u0027New York\u0027 WHERE genre \u003d \u0027Pop\u0027;", + "sql_explanation": "The SQL query updates the city for all concerts in the \u0027Pop\u0027 genre to \u0027New York\u0027 by using an UPDATE statement with a WHERE clause to filter for concerts in the \u0027Pop\u0027 genre." +}, { + "id": "4784", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average revenue per concert ticket sale in the \u0027concert_ticket_sales\u0027 table for artists who have performed in California?", + "sql_context": "CREATE TABLE concert_ticket_sales (ticket_id INT, user_id INT, artist_id INT, venue_id INT, price DECIMAL(10,2), date DATE, city VARCHAR(50), state VARCHAR(50));", + "sql": "SELECT AVG(price) FROM concert_ticket_sales WHERE state \u003d \u0027California\u0027;", + "sql_explanation": "The query calculates the average revenue per concert ticket sale in the \u0027concert_ticket_sales\u0027 table for artists who have performed in California by filtering the data for California and then calculating the average price." +}, { + "id": "5142", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average ticket price for concerts in the Midwest region?", + "sql_context": "CREATE TABLE concerts (id INT, region VARCHAR(20), ticket_price DECIMAL(10, 2)); INSERT INTO concerts (id, region, ticket_price) VALUES (1, \u0027Midwest\u0027, 100.00), (2, \u0027Northeast\u0027, 125.00), (3, \u0027Midwest\u0027, 110.00);", + "sql": "SELECT AVG(ticket_price) FROM concerts WHERE region \u003d \u0027Midwest\u0027;", + "sql_explanation": "This query calculates the average ticket price for concerts in the Midwest region by averaging the ticket_price column where the region is \u0027Midwest\u0027." +}, { + "id": "5274", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records for concerts that took place in the city of \u0027New York\u0027 and have a price greater than 100.", + "sql_context": "CREATE TABLE concerts (id INT, artist VARCHAR(255), city VARCHAR(255), tickets_sold INT, price DECIMAL(10,2));", + "sql": "DELETE FROM concerts WHERE city \u003d \u0027New York\u0027 AND price \u003e 100;", + "sql_explanation": "The SQL query deletes all records in the \u0027concerts\u0027 table where the city is \u0027New York\u0027 and the price is greater than 100." +}, { + "id": "5317", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum ticket price for Latin music concerts?", + "sql_context": "CREATE TABLE ConcertTickets (ticket_id INT, genre VARCHAR(20), price DECIMAL(5,2));", + "sql": "SELECT MIN(price) FROM ConcertTickets WHERE genre \u003d \u0027Latin\u0027;", + "sql_explanation": "The SQL query finds the minimum ticket price for Latin music concerts by using the MIN function on the price column, filtering the data where the genre is Latin." +}, { + "id": "5452", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many artists are in our database from Asia?", + "sql_context": "CREATE TABLE Artists (ArtistID INT, Name VARCHAR(255), Nationality VARCHAR(255), Genre VARCHAR(255)); INSERT INTO Artists VALUES (8, \u0027Agnez Mo\u0027, \u0027Indonesian\u0027, \u0027Pop\u0027,); INSERT INTO Artists VALUES (9, \u0027Wang Leehom\u0027, \u0027Taiwanese-American\u0027, \u0027Mandopop\u0027,);", + "sql": "SELECT COUNT(*) FROM Artists WHERE Nationality \u003d \u0027Asian\u0027;", + "sql_explanation": "This SQL query counts the number of artists in our database from Asia in the Artists table. It filters the records using the WHERE clause to only include artists from Asia, then applies the COUNT function to find the number of records." +}, { + "id": "5479", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of R\u0026B artists?", + "sql_context": "CREATE TABLE Artists (ArtistID INT, ArtistName VARCHAR(100), Genre VARCHAR(50), Age INT); INSERT INTO Artists (ArtistID, ArtistName, Genre, Age) VALUES (11, \u0027Beyonce\u0027, \u0027R\u0026B\u0027, 40); INSERT INTO Artists (ArtistID, ArtistName, Genre, Age) VALUES (12, \u0027Rihanna\u0027, \u0027R\u0026B\u0027, 34);", + "sql": "SELECT Genre, AVG(Age) FROM Artists WHERE Genre \u003d \u0027R\u0026B\u0027;", + "sql_explanation": "This SQL query calculates the average age of R\u0026B artists by selecting the genre and average age from the Artists table, filtering for the R\u0026B genre." +}, { + "id": "5583", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all concert records for the artist \u0027Billie Eilish\u0027 in the \u0027concerts\u0027 table.", + "sql_context": "CREATE TABLE concerts (id INT, artist VARCHAR(255), city VARCHAR(255), tickets_sold INT, price DECIMAL(10,2));", + "sql": "DELETE FROM concerts WHERE artist \u003d \u0027Billie Eilish\u0027;", + "sql_explanation": "The SQL query deletes all records in the \u0027concerts\u0027 table where the artist is \u0027Billie Eilish\u0027." +}, { + "id": "1527", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average cargo weight handled per day by the port of Shanghai in February 2022?", + "sql_context": "CREATE TABLE cargo_handling (cargo_handling_id INT, port VARCHAR(255), cargo_weight INT, handling_date DATE);INSERT INTO cargo_handling (cargo_handling_id, port, cargo_weight, handling_date) VALUES (1, \u0027Shanghai\u0027, 50000, \u00272022-02-01\u0027), (2, \u0027Shanghai\u0027, 55000, \u00272022-02-02\u0027);", + "sql": "SELECT AVG(cargo_weight) FROM cargo_handling WHERE port \u003d \u0027Shanghai\u0027 AND EXTRACT(MONTH FROM handling_date) \u003d 2 AND EXTRACT(YEAR FROM handling_date) \u003d 2022;", + "sql_explanation": "This SQL query calculates the average cargo weight handled per day by the port of Shanghai in February 2022 by filtering the data for the port of Shanghai, the month of February and the year 2022 and then averaging the cargo weight." +}, { + "id": "2297", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of containers handled in a single day by cranes in the Port of Tokyo in May 2021?", + "sql_context": "CREATE TABLE Port_Tokyo_Crane_Stats (crane_name TEXT, handling_date DATE, containers_handled INTEGER); INSERT INTO Port_Tokyo_Crane_Stats (crane_name, handling_date, containers_handled) VALUES (\u0027CraneA\u0027, \u00272021-05-01\u0027, 90), (\u0027CraneB\u0027, \u00272021-05-02\u0027, 85), (\u0027CraneC\u0027, \u00272021-05-03\u0027, 95), (\u0027CraneD\u0027, \u00272021-05-04\u0027, 80);", + "sql": "SELECT MAX(containers_handled) FROM Port_Tokyo_Crane_Stats WHERE handling_date \u003e\u003d \u00272021-05-01\u0027 AND handling_date \u003c\u003d \u00272021-05-31\u0027;", + "sql_explanation": "The query filters for crane statistics in the Port of Tokyo in May 2021. It then calculates the maximum number of containers handled in a single day during this time period." +}, { + "id": "2559", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update status to \u0027inactive\u0027 for vessels that have not been maintained in 9 months", + "sql_context": "CREATE SCHEMA if not exists ocean_shipping;CREATE TABLE if not exists ocean_shipping.vessels (id INT, name VARCHAR(255), status VARCHAR(255), last_maintenance DATE);", + "sql": "UPDATE ocean_shipping.vessels SET status \u003d \u0027inactive\u0027 WHERE last_maintenance \u003c DATE_SUB(CURRENT_DATE, INTERVAL 9 MONTH);", + "sql_explanation": "The SQL query updates records in the vessels table, changing the status to \u0027inactive\u0027 for records where the last_maintenance date is more than 9 months prior to the current date. This is done using the UPDATE statement." +}, { + "id": "2843", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all the unique destinations of container vessels for the month of July 2022", + "sql_context": "CREATE TABLE vessel_destinations (vessel_name VARCHAR(50), destination VARCHAR(50), departure_date DATE, arrival_date DATE); INSERT INTO vessel_destinations VALUES (\u0027Ever Given\u0027, \u0027Rotterdam\u0027, \u00272022-07-02\u0027, \u00272022-07-06\u0027); INSERT INTO vessel_destinations VALUES (\u0027HMM Algeciras\u0027, \u0027New York\u0027, \u00272022-07-04\u0027, \u00272022-07-11\u0027); INSERT INTO vessel_destinations VALUES (\u0027CMA CGM Jacques Saade\u0027, \u0027Singapore\u0027, \u00272022-07-07\u0027, \u00272022-07-14\u0027); INSERT INTO vessel_destinations VALUES (\u0027Seaspan Amazon\u0027, \u0027Tokyo\u0027, \u00272022-07-11\u0027, \u00272022-07-18\u0027); INSERT INTO vessel_destinations VALUES (\u0027MSC Virtuosa\u0027, \u0027Sydney\u0027, \u00272022-07-15\u0027, \u00272022-07-22\u0027);", + "sql": "SELECT DISTINCT destination FROM vessel_destinations WHERE departure_date BETWEEN \u00272022-07-01\u0027 AND \u00272022-07-31\u0027;", + "sql_explanation": "This query lists all the unique destinations of container vessels for the month of July 2022 by selecting the distinct destination column from the vessel_destinations table, where the departure_date is between \u00272022-07-01\u0027 and \u00272022-07-31\u0027." +}, { + "id": "3433", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average cargo weight for imports from Canada?", + "sql_context": "CREATE TABLE imports (id INT, cargo_weight INT, shipment_date DATE, country VARCHAR(20)); INSERT INTO imports (id, cargo_weight, shipment_date, country) VALUES (1, 5000, \u00272021-01-01\u0027, \u0027Canada\u0027); INSERT INTO imports (id, cargo_weight, shipment_date, country) VALUES (2, 6000, \u00272021-01-02\u0027, \u0027Canada\u0027);", + "sql": "SELECT AVG(cargo_weight) FROM imports WHERE country \u003d \u0027Canada\u0027 AND shipment_date \u003e\u003d \u00272021-01-01\u0027;", + "sql_explanation": "This query calculates the average cargo weight from all import shipments from Canada, considering the shipment_date from January 1st, 2021 onwards." +}, { + "id": "3501", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of textiles transported by the Kota Pertama?", + "sql_context": "CREATE TABLE Vessels (VesselID INT, VesselName VARCHAR(100), VesselType VARCHAR(100), PortID INT); INSERT INTO Vessels (VesselID, VesselName, VesselType, PortID) VALUES (1, \u0027Kota Pertama\u0027, \u0027Container Ship\u0027, 1); CREATE TABLE Cargo (CargoID INT, CargoName VARCHAR(100), Quantity INT, VesselID INT); INSERT INTO Cargo (CargoID, CargoName, Quantity, VesselID) VALUES (1, \u0027Textiles\u0027, 8000, 1); INSERT INTO Cargo (CargoID, CargoName, Quantity, VesselID) VALUES (2, \u0027Machinery\u0027, 6000, 2);", + "sql": "SELECT SUM(Cargo.Quantity) FROM Cargo WHERE Cargo.VesselID \u003d 1 AND Cargo.CargoName \u003d \u0027Textiles\u0027;", + "sql_explanation": "This query filters the Cargo table for rows where the VesselID is 1 and the CargoName is \u0027Textiles\u0027, then calculates the sum of the Quantity column for the filtered results." +}, { + "id": "4355", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the registration date of the ship \u0027Poseidon\u0027 to \u00272015-01-01\u0027", + "sql_context": "CREATE TABLE ships (ship_id INT, ship_name VARCHAR(255), registration_date DATE); INSERT INTO ships VALUES (1, \u0027Sea Giant\u0027, \u00272010-03-23\u0027), (2, \u0027Poseidon\u0027, \u00272012-09-08\u0027);", + "sql": "UPDATE ships SET registration_date \u003d \u00272015-01-01\u0027 WHERE ship_name \u003d \u0027Poseidon\u0027;", + "sql_explanation": "The query updates the registration date of the ship \u0027Poseidon\u0027 to \u00272015-01-01\u0027. It filters the records based on the ship name and updates the registration date for those records." +}, { + "id": "4776", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average tonnage of all cargo handled in the \u0027cargo_handling\u0027 table for the month of April?", + "sql_context": "CREATE TABLE cargo_handling (id INT, cargo_id INT, handling_date DATE, tonnage INT, PRIMARY KEY(id));", + "sql": "SELECT AVG(tonnage) FROM cargo_handling WHERE MONTH(handling_date) \u003d 4;", + "sql_explanation": "The SQL query calculates the average value of the \u0027tonnage\u0027 column in the \u0027cargo_handling\u0027 table where the \u0027handling_date\u0027 is in April, effectively calculating the average tonnage of all cargo handled in April." +}, { + "id": "5042", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total cargo handling of ports in the North America region?", + "sql_context": "CREATE TABLE ports (port_id INT, port_name VARCHAR(50), region VARCHAR(50), total_cargo INT); INSERT INTO ports VALUES (1, \u0027Port of Shanghai\u0027, \u0027Asia-Pacific\u0027, 43032442); INSERT INTO ports VALUES (2, \u0027Port of Singapore\u0027, \u0027Asia-Pacific\u0027, 37439402); INSERT INTO ports VALUES (3, \u0027Port of Los Angeles\u0027, \u0027North America\u0027, 20384413); INSERT INTO ports VALUES (4, \u0027Port of New York and New Jersey\u0027, \u0027North America\u0027, 8219581);", + "sql": "SELECT SUM(total_cargo) FROM ports WHERE region \u003d \u0027North America\u0027;", + "sql_explanation": "This query calculates the total cargo handling of ports in the North America region by summing the total_cargo column for records in the ports table where the region is \u0027North America\u0027." +}, { + "id": "5128", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average handling time of containers at the port \u0027Sydney\u0027?", + "sql_context": "CREATE TABLE containers (id INT, port VARCHAR(255), handled_date DATE, handling_time INT); INSERT INTO containers (id, port, handled_date, handling_time) VALUES (1, \u0027Sydney\u0027, \u00272022-03-02\u0027, 120), (2, \u0027Melbourne\u0027, \u00272022-03-03\u0027, 100), (3, \u0027Brisbane\u0027, \u00272022-03-04\u0027, 150), (4, \u0027Sydney\u0027, \u00272022-03-05\u0027, 130), (5, \u0027Melbourne\u0027, \u00272022-03-06\u0027, 110);", + "sql": "SELECT AVG(handling_time) FROM containers WHERE port \u003d \u0027Sydney\u0027;", + "sql_explanation": "This query calculates the average handling time of containers at the port \u0027Sydney\u0027 by filtering the records for the specified port and then calculating the average of their handling time." +}, { + "id": "5546", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete cargo records with handling date before \u00272022-01-01\u0027", + "sql_context": "CREATE TABLE ports (port_id INT, port_name VARCHAR(255), country VARCHAR(255)); INSERT INTO ports VALUES (1, \u0027Port of Shanghai\u0027, \u0027China\u0027), (2, \u0027Port of Oakland\u0027, \u0027USA\u0027); CREATE TABLE cargo (cargo_id INT, port_id INT, weight FLOAT, handling_date DATE); INSERT INTO cargo VALUES (1, 1, 5000, \u00272021-01-01\u0027), (2, 2, 4000, \u00272021-12-31\u0027), (3, 1, 7000, \u00272020-12-31\u0027);", + "sql": "DELETE FROM cargo WHERE handling_date \u003c \u00272022-01-01\u0027;", + "sql_explanation": "The query deletes the cargo records with handling dates before \u00272022-01-01\u0027. It filters the records based on the handling date and removes them." +}, { + "id": "5572", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records with a shipment date before \u00272019-01-01\u0027 from the shipment table.", + "sql_context": "CREATE TABLE port (port_id INT, port_name TEXT);CREATE TABLE shipment (shipment_id INT, container_count INT, ship_date DATE, port_id INT); INSERT INTO port VALUES (1, \u0027Los Angeles\u0027), (2, \u0027Tokyo\u0027); INSERT INTO shipment VALUES (1, 500, \u00272018-01-01\u0027, 1), (2, 300, \u00272019-02-15\u0027, 2);", + "sql": "DELETE FROM shipment WHERE ship_date \u003c \u00272019-01-01\u0027;", + "sql_explanation": "This query removes all records from the shipment table with a shipment date before \u00272019-01-01\u0027." +}, { + "id": "5731", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total capacity of all vessels in the \u0027fleet_management\u0027 table?", + "sql_context": "CREATE TABLE fleet_management (id INT, name VARCHAR(50), type VARCHAR(50), capacity INT);", + "sql": "SELECT SUM(capacity) FROM fleet_management;", + "sql_explanation": "This SQL query adds up all the values in the \u0027capacity\u0027 column of the \u0027fleet_management\u0027 table." +}, { + "id": "5780", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average cargo handling time in minutes for all ports in the \u0027ports\u0027 table?", + "sql_context": "CREATE TABLE ports (port_id INT, port_name VARCHAR(50), avg_cargo_time INT); INSERT INTO ports (port_id, port_name, avg_cargo_time) VALUES (1, \u0027Port of Long Beach\u0027, 60), (2, \u0027Port of Los Angeles\u0027, 55), (3, \u0027Port of Oakland\u0027, 50);", + "sql": "SELECT AVG(avg_cargo_time) FROM ports;", + "sql_explanation": "This SQL query calculates the average cargo handling time in minutes for all ports in the \u0027ports\u0027 table by applying the AVG() aggregate function to the \u0027avg_cargo_time\u0027 column." +}, { + "id": "5836", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "How can I delete the details of a specific cargo?", + "sql_context": "CREATE TABLE cargo ( id INT, vessel_id INT, weight INT, port_of_loading VARCHAR(255), port_of_discharge VARCHAR(255)); INSERT INTO cargo (id, vessel_id, weight, port_of_loading, port_of_discharge) VALUES (1, 1, 5000, \u0027Port A\u0027, \u0027Port B\u0027), (2, 2, 3000, \u0027Port C\u0027, \u0027Port D\u0027), (3, 3, 4000, \u0027Port E\u0027, \u0027Port F\u0027);", + "sql": "DELETE FROM cargo WHERE id \u003d 1;", + "sql_explanation": "This SQL query deletes the details of the cargo with an id of 1 from the cargo table." +}, { + "id": "962", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert records for Waymo and Cruise testing in San Francisco", + "sql_context": "CREATE TABLE autonomous_testing (id INT PRIMARY KEY, location VARCHAR(100), company VARCHAR(100), date DATE, miles_driven INT);", + "sql": "INSERT INTO autonomous_testing (id, location, company, date, miles_driven) VALUES (1, \u0027San Francisco\u0027, \u0027Waymo\u0027, \u00272022-05-10\u0027, 500), (2, \u0027San Francisco\u0027, \u0027Cruise\u0027, \u00272022-05-11\u0027, 600);", + "sql_explanation": "* Insert records for Waymo and Cruise testing in San Francisco into the \u0027autonomous_testing\u0027 table." +}, { + "id": "1278", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the ratio of autonomous to non-autonomous buses in Singapore?", + "sql_context": "CREATE TABLE buses (id INT, is_autonomous BOOLEAN, city VARCHAR(20), num_buses INT); INSERT INTO buses (id, is_autonomous, city, num_buses) VALUES (1, true, \u0027Singapore\u0027, 500), (2, false, \u0027Singapore\u0027, 1000);", + "sql": "SELECT (SUM(CASE WHEN is_autonomous THEN num_buses ELSE 0 END)) / (SUM(CASE WHEN is_autonomous \u003d false THEN num_buses ELSE 0 END)) FROM buses WHERE city \u003d \u0027Singapore\u0027;", + "sql_explanation": "This query calculates the ratio of autonomous to non-autonomous buses in Singapore by selecting the sum of num_buses where is_autonomous is true, and the sum of num_buses where is_autonomous is false, and then dividing the two sums to find the ratio." +}, { + "id": "1947", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of car-sharing programs in Toronto?", + "sql_context": "CREATE TABLE car_sharing (program_id INT, program_type VARCHAR(20)); INSERT INTO car_sharing (program_id, program_type) VALUES (1, \u0027Round-trip\u0027), (2, \u0027One-way\u0027), (3, \u0027Peer-to-peer\u0027), (4, \u0027Corporate\u0027), (5, \u0027Fractional\u0027);", + "sql": "SELECT COUNT(*) as num_programs FROM car_sharing WHERE program_type IN (\u0027Round-trip\u0027, \u0027One-way\u0027, \u0027Peer-to-peer\u0027, \u0027Corporate\u0027, \u0027Fractional\u0027);", + "sql_explanation": "Count the total number of car-sharing programs in Toronto by selecting all rows and counting the total number of rows. Include all program types in the count." +}, { + "id": "2261", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average speed of shared e-scooters in New York?", + "sql_context": "CREATE TABLE shared_escooters (scooter_id INT, trip_duration INT, start_speed INT, end_speed INT, trip_date DATE); INSERT INTO shared_escooters (scooter_id, trip_duration, start_speed, end_speed, trip_date) VALUES (1, 1200, 5, 15, \u00272022-01-01\u0027), (2, 900, 10, 20, \u00272022-01-02\u0027); CREATE TABLE city_coordinates (city VARCHAR(50), latitude DECIMAL(9,6), longitude DECIMAL(9,6)); INSERT INTO city_coordinates (city, latitude, longitude) VALUES (\u0027New York\u0027, 40.7128, -74.0060);", + "sql": "SELECT AVG(end_speed - start_speed) as avg_speed FROM shared_escooters, city_coordinates WHERE city_coordinates.city \u003d \u0027New York\u0027;", + "sql_explanation": "Calculates the average speed of shared e-scooters in New York by subtracting the start speed from the end speed and taking the average." +}, { + "id": "2685", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new records for 2 EVs sold in 2021 into the \u0027vehicle_sales\u0027 table", + "sql_context": "CREATE TABLE vehicle_sales (id INT, vehicle_type VARCHAR(255), sale_year INT, price FLOAT);", + "sql": "INSERT INTO vehicle_sales (id, vehicle_type, sale_year, price) VALUES (1, \u0027EV\u0027, 2021, 35000), (2, \u0027EV\u0027, 2021, 38000);", + "sql_explanation": "This query inserts new records for 2 electric vehicles (EVs) sold in 2021 into the \u0027vehicle_sales\u0027 table." +}, { + "id": "3077", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all autonomous taxis in San Francisco that are not in use.", + "sql_context": "CREATE TABLE public.taxis (id SERIAL PRIMARY KEY, name TEXT, in_use BOOLEAN, city TEXT); INSERT INTO public.taxis (name, in_use, city) VALUES (\u0027Autonomous Taxi 1\u0027, FALSE, \u0027San Francisco\u0027), (\u0027Autonomous Taxi 2\u0027, TRUE, \u0027San Francisco\u0027);", + "sql": "DELETE FROM public.taxis WHERE city \u003d \u0027San Francisco\u0027 AND name LIKE \u0027Autonomous Taxi%\u0027 AND in_use \u003d FALSE;", + "sql_explanation": "Deletes autonomous taxis in San Francisco that are not in use by filtering the taxis table for San Francisco, autonomous taxis, and those not in use, then deleting the matching rows." +}, { + "id": "3672", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new autonomous taxi in Atlanta with the latest usage status.", + "sql_context": "CREATE TABLE public.taxis (id SERIAL PRIMARY KEY, name TEXT, in_use BOOLEAN, city TEXT); INSERT INTO public.taxis (name, in_use, city) VALUES (\u0027Autonomous Taxi 1\u0027, TRUE, \u0027Atlanta\u0027), (\u0027Autonomous Taxi 2\u0027, FALSE, \u0027Atlanta\u0027);", + "sql": "INSERT INTO public.taxis (name, in_use, city) VALUES (\u0027Autonomous Taxi 3\u0027, TRUE, \u0027Atlanta\u0027);", + "sql_explanation": "Inserts a new autonomous taxi in Atlanta by inserting a new row into the taxis table for \u0027Autonomous Taxi 3\u0027 in \u0027Atlanta\u0027 with the latest usage status." +}, { + "id": "3733", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the bike sharing station with ID 601 to change its name", + "sql_context": "CREATE TABLE bike_sharing_stations (station_id INT, station_name TEXT, city TEXT, country TEXT, latitude FLOAT, longitude FLOAT);", + "sql": "UPDATE bike_sharing_stations SET station_name \u003d \u0027South Lake Union\u0027 WHERE station_id \u003d 601;", + "sql_explanation": "This query updates the record with station_id 601 in the bike_sharing_stations table. It changes the station_name to \u0027South Lake Union\u0027." +}, { + "id": "3947", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many electric cars are there in CityH?", + "sql_context": "CREATE TABLE CityH_Vehicles (vehicle_id INT, vehicle_type VARCHAR(20), is_electric BOOLEAN); INSERT INTO CityH_Vehicles (vehicle_id, vehicle_type, is_electric) VALUES (1, \u0027Car\u0027, true), (2, \u0027Bike\u0027, false), (3, \u0027Car\u0027, true), (4, \u0027Bus\u0027, false);", + "sql": "SELECT COUNT(*) FROM CityH_Vehicles WHERE vehicle_type \u003d \u0027Car\u0027 AND is_electric \u003d true;", + "sql_explanation": "This query counts the number of records where vehicle_type is \u0027Car\u0027 and is_electric is true in the CityH_Vehicles table." +}, { + "id": "3960", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total number of electric and hybrid buses in the \u0027public_transportation\u0027 table.", + "sql_context": "CREATE TABLE public_transportation (id INT, type VARCHAR(20), make VARCHAR(20), model VARCHAR(20), year INT, fuel_type VARCHAR(20));", + "sql": "SELECT SUM(fuel_type \u003d \u0027electric\u0027 OR fuel_type \u003d \u0027hybrid\u0027) FROM public_transportation;", + "sql_explanation": "The SQL query uses a conditional sum function to add up the number of rows where the \u0027fuel_type\u0027 column is either \u0027electric\u0027 or \u0027hybrid\u0027." +}, { + "id": "4052", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many shared scooters are available in Berlin?", + "sql_context": "CREATE TABLE shared_vehicles (id INT, type VARCHAR(255), city VARCHAR(255), country VARCHAR(255), num_vehicles INT); INSERT INTO shared_vehicles VALUES (1, \u0027Scooter\u0027, \u0027Berlin\u0027, \u0027Germany\u0027, 2000);", + "sql": "SELECT num_vehicles FROM shared_vehicles WHERE type \u003d \u0027Scooter\u0027 AND city \u003d \u0027Berlin\u0027;", + "sql_explanation": "This query selects the num_vehicles column from the shared_vehicles table where the type is \u0027Scooter\u0027 and the city is \u0027Berlin\u0027." +}, { + "id": "4086", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average speed of shared electric scooters in New York?", + "sql_context": "CREATE TABLE shared_scooters (scooter_id INT, ride_id INT, ride_start_time TIMESTAMP, ride_end_time TIMESTAMP, starting_location TEXT, ending_location TEXT, distance_traveled FLOAT, speed_kmh FLOAT);", + "sql": "SELECT AVG(speed_kmh) FROM shared_scooters WHERE starting_location LIKE \u0027New York%\u0027;", + "sql_explanation": "This query calculates the average speed of shared electric scooter rides in New York by using the AVG function on the speed_kmh column." +}, { + "id": "4118", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many autonomous buses are currently in operation in Tokyo?", + "sql_context": "CREATE TABLE autonomous_buses (bus_id INT, registration_date DATE, city TEXT, in_operation BOOLEAN);", + "sql": "SELECT COUNT(*) FROM autonomous_buses WHERE city \u003d \u0027Tokyo\u0027 AND in_operation \u003d TRUE;", + "sql_explanation": "This query counts the number of autonomous buses currently in operation in Tokyo by using the COUNT function on all records in the table that meet the specified conditions." +}, { + "id": "4128", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the market share of public transportation systems in Japan?", + "sql_context": "CREATE TABLE PT_Usage (id INT, system_type VARCHAR(20), country VARCHAR(50), users INT, market_share FLOAT); INSERT INTO PT_Usage (id, system_type, country, users, market_share) VALUES (1, \u0027Tokyo Metro\u0027, \u0027Japan\u0027, 2500000, 0.35), (2, \u0027Osaka Municipal Subway\u0027, \u0027Japan\u0027, 900000, 0.12), (3, \u0027Nagoya Municipal Subway\u0027, \u0027Japan\u0027, 650000, 0.09);", + "sql": "SELECT AVG(market_share) as avg_market_share FROM PT_Usage WHERE country \u003d \u0027Japan\u0027;", + "sql_explanation": "This query calculates the average market share of public transportation systems in Japan by averaging the market_share column in the PT_Usage table where the country is \u0027Japan\u0027." +}, { + "id": "4163", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average trip duration for public transportation in Sydney?", + "sql_context": "CREATE TABLE public_transportation_sydney (trip_id INT, trip_duration FLOAT, start_speed FLOAT, end_speed FLOAT, start_time TIMESTAMP, end_time TIMESTAMP, city VARCHAR(50)); INSERT INTO public_transportation_sydney (trip_id, trip_duration, start_speed, end_speed, start_time, end_time, city) VALUES (1, 45.0, 0.0, 20.0, \u00272021-01-01 00:00:00\u0027, \u00272021-01-01 00:45:00\u0027, \u0027Sydney\u0027), (2, 60.0, 0.0, 25.0, \u00272021-01-02 08:00:00\u0027, \u00272021-01-02 08:60:00\u0027, \u0027Sydney\u0027);", + "sql": "SELECT AVG(trip_duration) FROM public_transportation_sydney WHERE city \u003d \u0027Sydney\u0027;", + "sql_explanation": "Calculate the average trip duration for public transportation in Sydney." +}, { + "id": "4372", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average cost of public transportation in cities with a population of over 1 million?", + "sql_context": "CREATE TABLE cities (id INT, name VARCHAR(50), population INT, public_transportation_cost DECIMAL(5,2)); INSERT INTO cities (id, name, population, public_transportation_cost) VALUES (1, \u0027New York\u0027, 8500000, 2.50), (2, \u0027Los Angeles\u0027, 4000000, 1.75), (3, \u0027Chicago\u0027, 2700000, 2.25);", + "sql": "SELECT AVG(public_transportation_cost) FROM cities WHERE population \u003e 1000000;", + "sql_explanation": "This query calculates the average cost of public transportation in cities with a population of over 1 million by filtering the cities table based on the population and calculating the average public transportation cost." +}, { + "id": "4573", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total distance traveled by autonomous vehicles in CityC?", + "sql_context": "CREATE TABLE CityC_VehicleMovement (vehicle_id INT, vehicle_type VARCHAR(20), is_autonomous BOOLEAN, distance FLOAT); INSERT INTO CityC_VehicleMovement (vehicle_id, vehicle_type, is_autonomous, distance) VALUES (1, \u0027Car\u0027, true, 56.2), (2, \u0027Bike\u0027, false, 12.4), (3, \u0027Car\u0027, false, 34.6), (4, \u0027Bus\u0027, true, 78.9);", + "sql": "SELECT SUM(distance) FROM CityC_VehicleMovement WHERE is_autonomous \u003d true;", + "sql_explanation": "This query calculates the sum of the distance column for all records where is_autonomous is true in the CityC_VehicleMovement table." +}, { + "id": "4618", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of bike-share trips in May 2022 for users over 65 in Madrid", + "sql_context": "CREATE TABLE bike_trips (id INT, trip_start_time TIMESTAMP, trip_end_time TIMESTAMP, trip_city VARCHAR(50), trip_user_age INT);", + "sql": "SELECT EXTRACT(MONTH FROM trip_start_time) AS month, COUNT(*) AS num_trips", + "sql_explanation": "Extract the month from the trip_start_time, filter bike-share trips in May 2022 for users over 65 in Madrid, and count the number of bike-share trips." +}, { + "id": "4637", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete the bus route 12 from the public transportation system of San Francisco", + "sql_context": "CREATE TABLE bus_routes (id INT PRIMARY KEY, route_number INT, route_name VARCHAR(255), city VARCHAR(255), num_stops INT);", + "sql": "DELETE FROM bus_routes WHERE route_number \u003d 12 AND city \u003d \u0027San Francisco\u0027;", + "sql_explanation": "The DELETE statement removes the record with route_number 12 from the bus_routes table, specifically for the San Francisco city." +}, { + "id": "4711", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of hybrid buses in Singapore and Mumbai?", + "sql_context": "CREATE TABLE hybrid_buses (bus_id INT, city VARCHAR(50)); INSERT INTO hybrid_buses (bus_id, city) VALUES (1, \u0027Singapore\u0027), (2, \u0027Singapore\u0027), (3, \u0027Mumbai\u0027), (4, \u0027Mumbai\u0027), (5, \u0027Mumbai\u0027);", + "sql": "SELECT COUNT(*) FROM hybrid_buses WHERE city IN (\u0027Singapore\u0027, \u0027Mumbai\u0027);", + "sql_explanation": "This query counts the number of hybrid buses in Singapore and Mumbai by selecting all rows with \u0027Singapore\u0027 or \u0027Mumbai\u0027 in the city column and using the COUNT function to get the total number of rows." +}, { + "id": "4729", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many charging stations are there in New York City?", + "sql_context": "CREATE TABLE charging_stations (id INT, city VARCHAR(255), country VARCHAR(255), num_stations INT); INSERT INTO charging_stations VALUES (1, \u0027New York City\u0027, \u0027USA\u0027, 1000);", + "sql": "SELECT num_stations FROM charging_stations WHERE city \u003d \u0027New York City\u0027;", + "sql_explanation": "This query selects the num_stations column from the charging_stations table where the city is \u0027New York City\u0027." +}, { + "id": "4759", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all charging stations for electric bikes in Toronto.", + "sql_context": "CREATE TABLE charging_stations (station_id INT, type VARCHAR(20), city VARCHAR(20)); INSERT INTO charging_stations (station_id, type, city) VALUES (1, \u0027Car\u0027, \u0027Toronto\u0027), (2, \u0027Bike\u0027, \u0027Toronto\u0027), (3, \u0027Bike\u0027, \u0027Toronto\u0027);", + "sql": "DELETE FROM charging_stations WHERE city \u003d \u0027Toronto\u0027 AND type \u003d \u0027Bike\u0027;", + "sql_explanation": "The SQL query deletes all charging stations for electric bikes in Toronto by removing rows with city value \u0027Toronto\u0027 and type value \u0027Bike\u0027 from the charging_stations table." +}, { + "id": "4866", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of multimodal trips taken in New York City?", + "sql_context": "CREATE TABLE multimodal_trips (id INT, trips INT, city VARCHAR(50));", + "sql": "SELECT SUM(trips) FROM multimodal_trips WHERE city \u003d \u0027New York City\u0027;", + "sql_explanation": "This SQL query calculates the total number of multimodal trips taken in New York City. It does this by using the SUM function, which returns the sum of a numeric column. The query filters the multimodal_trips table to only include rows where the city is \u0027New York City\u0027, and then calculates the sum of the trips column for those rows." +}, { + "id": "4920", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum trip duration for electric taxis in London?", + "sql_context": "CREATE TABLE electric_taxis (taxi_id INT, trip_duration FLOAT, city VARCHAR(50)); INSERT INTO electric_taxis (taxi_id, trip_duration, city) VALUES (1, 45.2, \u0027London\u0027), (2, 32.6, \u0027London\u0027), (3, 50.1, \u0027London\u0027);", + "sql": "SELECT MIN(trip_duration) FROM electric_taxis WHERE city \u003d \u0027London\u0027;", + "sql_explanation": "This query finds the minimum trip duration for electric taxis in London by taking the minimum value of the trip_duration column for all rows with \u0027London\u0027 in the city column." +}, { + "id": "4951", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum trip distance for shared electric scooters in Paris?", + "sql_context": "CREATE TABLE shared_scooters (scooter_id INT, trip_id INT, trip_start_time TIMESTAMP, trip_end_time TIMESTAMP, trip_distance FLOAT, city VARCHAR(50)); INSERT INTO shared_scooters (scooter_id, trip_id, trip_start_time, trip_end_time, trip_distance, city) VALUES (1, 1001, \u00272022-01-01 10:00:00\u0027, \u00272022-01-01 10:15:00\u0027, 5.3, \u0027Paris\u0027), (2, 1002, \u00272022-01-01 11:00:00\u0027, \u00272022-01-01 11:45:00\u0027, 12.1, \u0027Paris\u0027);", + "sql": "SELECT MAX(trip_distance) FROM shared_scooters WHERE city \u003d \u0027Paris\u0027;", + "sql_explanation": "Identifies the maximum trip distance for shared electric scooters in Paris by finding the maximum trip_distance column value." +}, { + "id": "5027", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of passengers per flight for airlines in New Delhi, India?", + "sql_context": "CREATE TABLE airlines (flight_id INT, airline_id INT, flight_start_time TIMESTAMP, flight_end_time TIMESTAMP, origin TEXT, destination TEXT, city TEXT, avg_passengers DECIMAL);", + "sql": "SELECT AVG(avg_passengers) FROM airlines WHERE city \u003d \u0027New Delhi\u0027;", + "sql_explanation": "This query calculates the average number of passengers per flight for airlines in New Delhi, India by selecting the average value of the avg_passengers column where the city is New Delhi." +}, { + "id": "5203", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average distance of autonomous taxi rides in Tokyo?", + "sql_context": "CREATE TABLE tokyo_taxis (id INT, ride_id VARCHAR(20), start_time TIMESTAMP, end_time TIMESTAMP, distance FLOAT, autonomous BOOLEAN);", + "sql": "SELECT AVG(distance) FROM tokyo_taxis WHERE autonomous \u003d TRUE;", + "sql_explanation": "Calculates the average distance of autonomous taxi rides in Tokyo by getting the mean of the distance column for rows where autonomous is true." +}, { + "id": "5268", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update \u0027price\u0027 to 500 for records in the \u0027toll_booths\u0027 table where \u0027location\u0027 is \u0027Boston\u0027", + "sql_context": "CREATE TABLE toll_booths (id INT, booth_id VARCHAR(255), location VARCHAR(255), price FLOAT);", + "sql": "UPDATE toll_booths SET price \u003d 500 WHERE location \u003d \u0027Boston\u0027;", + "sql_explanation": "This query updates the \u0027price\u0027 column to 500 in the \u0027toll_booths\u0027 table for records where the \u0027location\u0027 is \u0027Boston\u0027." +}, { + "id": "5280", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of bicycles per station for stations located in urban areas in the bike_stations table?", + "sql_context": "CREATE TABLE bike_stations (id INT, location VARCHAR(20), num_bikes INT, urban VARCHAR(5)); INSERT INTO bike_stations (id, location, num_bikes, urban) VALUES (1, \u0027Station A\u0027, 20, \u0027Yes\u0027), (2, \u0027Station B\u0027, 30, \u0027Yes\u0027), (3, \u0027Station C\u0027, 15, \u0027No\u0027), (4, \u0027Station D\u0027, 25, \u0027No\u0027);", + "sql": "SELECT AVG(num_bikes) FROM bike_stations WHERE urban \u003d \u0027Yes\u0027;", + "sql_explanation": "This query calculates the average number of bicycles per station for stations located in urban areas by averaging the num_bikes column values using the AVG function." +}, { + "id": "5283", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average fare of electric ferries in Sydney, Australia?", + "sql_context": "CREATE TABLE electric_ferries (ferry_id INT, fare FLOAT, city VARCHAR(50));", + "sql": "SELECT AVG(fare) FROM electric_ferries WHERE city \u003d \u0027Sydney\u0027;", + "sql_explanation": "This query calculates the average fare of electric ferries in Sydney, Australia by using the AVG function on the fare column of the electric_ferries table, filtered by the city \u0027Sydney\u0027." +}, { + "id": "5297", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average distance traveled per ride in Singapore taxis?", + "sql_context": "CREATE TABLE taxi_data (id INT, city VARCHAR(50), distance FLOAT); INSERT INTO taxi_data (id, city, distance) VALUES (1, \u0027Singapore\u0027, 12.5), (2, \u0027NYC\u0027, 10.0), (3, \u0027London\u0027, 15.0);", + "sql": "SELECT AVG(distance) FROM taxi_data WHERE city \u003d \u0027Singapore\u0027;", + "sql_explanation": "The SQL query calculates the average distance traveled per ride in Singapore taxis." +}, { + "id": "5384", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of passengers that used public transportation in Chicago during Q1 of 2022?", + "sql_context": "CREATE TABLE public_transportation( transport_id INT, passengers INT, transport_type VARCHAR(50), usage_date DATE, city VARCHAR(50)); CREATE VIEW q1_2022 AS SELECT * FROM public_transportation WHERE usage_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-03-31\u0027;", + "sql": "SELECT SUM(passengers) FROM q1_2022 WHERE city \u003d \u0027Chicago\u0027;", + "sql_explanation": "This query calculates the total number of passengers that used public transportation in Chicago during Q1 of 2022 by filtering the q1_2022 view with the appropriate condition and using the SUM function on the passengers column." +}, { + "id": "5516", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of vehicles sold in \u0027sales_data\u0027 view that have a speed less than 65 mph?", + "sql_context": "CREATE VIEW sales_data AS SELECT id, vehicle_type, avg_speed, sales FROM vehicle_sales WHERE sales \u003e 20000;", + "sql": "SELECT SUM(sales) FROM sales_data WHERE avg_speed \u003c 65;", + "sql_explanation": "This query calculates the total number of vehicles sold with a speed less than 65 mph by selecting the SUM function on the sales column from the sales_data view, where the avg_speed is less than 65." +}, { + "id": "5686", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records in the \u0027MultimodalHubs\u0027 table where \u0027hub_id\u0027 is 300", + "sql_context": "CREATE TABLE MultimodalHubs (hub_id INT, address VARCHAR(30), PRIMARY KEY (hub_id));", + "sql": "DELETE FROM MultimodalHubs WHERE hub_id \u003d 300;", + "sql_explanation": "* This query deletes records from the \u0027MultimodalHubs\u0027 table where the \u0027hub_id\u0027 is 300.* The DELETE statement is used to remove matching records, and the WHERE clause filters the rows to be deleted." +}, { + "id": "5767", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of public bicycles available in each city in the Netherlands?", + "sql_context": "CREATE TABLE if not exists public_bicycles (id INT, city VARCHAR(255), bikes INT); INSERT INTO public_bicycles (id, city, bikes) VALUES (1, \u0027Amsterdam\u0027, 15000), (2, \u0027Utrecht\u0027, 12000), (3, \u0027Rotterdam\u0027, 8000), (4, \u0027The Hague\u0027, 6000);", + "sql": "SELECT city, bikes FROM public_bicycles;", + "sql_explanation": "This query creates a table \u0027public_bicycles\u0027 with \u0027id\u0027, \u0027city\u0027, and \u0027bikes\u0027 columns, and inserts four records for public bicycle availability in the Netherlands. The main SQL query simply selects the city and number of public bicycles available in each city without any additional calculations or manipulations." +}, { + "id": "482", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum snow depth recorded in the Arctic Research Station 18 and 19?", + "sql_context": "CREATE TABLE Arctic_Research_Station_18 (date DATE, snow_depth FLOAT); CREATE TABLE Arctic_Research_Station_19 (date DATE, snow_depth FLOAT);", + "sql": "SELECT MAX(snow_depth) FROM Arctic_Research_Station_18; SELECT MAX(snow_depth) FROM Arctic_Research_Station_19; SELECT GREATEST(MAX(snow_depth), MAX(snow_depth)) FROM Arctic_Research_Station_18, Arctic_Research_Station_19;", + "sql_explanation": "This query calculates the maximum snow depth from two different stations (18 and 19) by using the MAX function on the snow_depth column for both tables. It then combines the results from both tables using the GREATEST function and calculates the maximum snow depth between the two tables." +}, { + "id": "515", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum snow depth recorded in the Arctic Research Station 4 and 5?", + "sql_context": "CREATE TABLE Arctic_Research_Station_4 (date DATE, snow_depth FLOAT); CREATE TABLE Arctic_Research_Station_5 (date DATE, snow_depth FLOAT);", + "sql": "SELECT MAX(snow_depth) FROM Arctic_Research_Station_4; SELECT MAX(snow_depth) FROM Arctic_Research_Station_5; SELECT GREATEST(MAX(snow_depth), MAX(snow_depth)) FROM Arctic_Research_Station_4, Arctic_Research_Station_5;", + "sql_explanation": "This query calculates the maximum snow depth from two different stations (4 and 5) by using the MAX function on the snow_depth column for both tables. It then combines the results from both tables using the GREATEST function and calculates the maximum snow depth between the two tables." +}, { + "id": "1196", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average water temperature in the Arctic Ocean for the months of June, July, and August since 2010?", + "sql_context": "CREATE TABLE weather ( id INT PRIMARY KEY, location VARCHAR(255), temperature DECIMAL(5,2), measurement_date DATE ); INSERT INTO weather (id, location, temperature, measurement_date) VALUES (1, \u0027Arctic Ocean\u0027, 2.1, \u00272010-06-01\u0027), (2, \u0027Arctic Ocean\u0027, 3.5, \u00272010-07-01\u0027), (3, \u0027Arctic Ocean\u0027, 4.2, \u00272010-08-01\u0027);", + "sql": "SELECT AVG(temperature) FROM weather WHERE location \u003d \u0027Arctic Ocean\u0027 AND EXTRACT(MONTH FROM measurement_date) IN (6, 7, 8) AND EXTRACT(YEAR FROM measurement_date) \u003e\u003d 2010;", + "sql_explanation": "This SQL query calculates the average temperature from the \u0027weather\u0027 table for the Arctic Ocean during the months of June, July, and August since 2010." +}, { + "id": "1832", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add new researchers to the arctic_researchers table", + "sql_context": "CREATE SCHEMA IF NOT EXISTS arctic_db; CREATE TABLE IF NOT EXISTS arctic_researchers (id INT PRIMARY KEY, researcher_name TEXT, expertise TEXT);", + "sql": "INSERT INTO arctic_researchers (id, researcher_name, expertise) VALUES (1, \u0027Alice Johnson\u0027, \u0027climate change\u0027), (2, \u0027Bob Smith\u0027, \u0027biodiversity\u0027);", + "sql_explanation": "These INSERT statements add two new records with ids 1 and 2 for the researcher_names \u0027Alice Johnson\u0027 and \u0027Bob Smith\u0027 and their respective expertise into the arctic_researchers table." +}, { + "id": "2436", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum carbon sequestration observed in a year in Greenland?", + "sql_context": "CREATE TABLE CarbonSequestration (ID INT, Location TEXT, Year INT, Sequestration INT); INSERT INTO CarbonSequestration (ID, Location, Year, Sequestration) VALUES (1, \u0027Greenland\u0027, 2010, 1000); INSERT INTO CarbonSequestration (ID, Location, Year, Sequestration) VALUES (2, \u0027Greenland\u0027, 2011, 1500);", + "sql": "SELECT MAX(Year) as Max_Year, MAX(Sequestration) as Max_Sequestration FROM CarbonSequestration WHERE Location \u003d \u0027Greenland\u0027;", + "sql_explanation": "This query finds the year with the maximum carbon sequestration in Greenland by filtering the CarbonSequestration table on Greenland and selecting the maximum year and sequestration." +}, { + "id": "2754", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum temperature recorded in Iceland in February 2020?", + "sql_context": "CREATE TABLE WeatherData (location VARCHAR(50), date DATE, temperature DECIMAL(5,2)); INSERT INTO WeatherData (location, date, temperature) VALUES (\u0027Iceland\u0027, \u00272020-02-01\u0027, -5.2), (\u0027Iceland\u0027, \u00272020-02-02\u0027, -4.9);", + "sql": "SELECT MIN(temperature) FROM WeatherData WHERE location \u003d \u0027Iceland\u0027 AND date BETWEEN \u00272020-02-01\u0027 AND \u00272020-02-28\u0027;", + "sql_explanation": "This query calculates the minimum temperature for Iceland in February 2020 by selecting the minimum temperature value from the WeatherData table for records with a location of \u0027Iceland\u0027 and a date within February 2020." +}, { + "id": "3195", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify indigenous communities in the Arctic that rely on whaling", + "sql_context": "CREATE TABLE indigenous_communities (id INT, name VARCHAR(255), primary_activities VARCHAR(255));", + "sql": "SELECT name FROM indigenous_communities WHERE primary_activities LIKE \u0027%whaling%\u0027 AND region \u003d \u0027Arctic\u0027", + "sql_explanation": "This query retrieves the names of indigenous communities in the Arctic that rely on whaling, based on the \u0027indigenous_communities\u0027 table." +}, { + "id": "3963", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which indigenous communities in the Arctic region have a population of 1000 or more?", + "sql_context": "CREATE TABLE IndigenousCommunities (id INT, name VARCHAR(50), region VARCHAR(50), population INT); INSERT INTO IndigenousCommunities (id, name, region, population) VALUES (1, \u0027Community A\u0027, \u0027Arctic\u0027, 500); INSERT INTO IndigenousCommunities (id, name, region, population) VALUES (2, \u0027Community B\u0027, \u0027Arctic\u0027, 1500); INSERT INTO IndigenousCommunities (id, name, region, population) VALUES (3, \u0027Community C\u0027, \u0027Antarctic\u0027, 800);", + "sql": "SELECT name FROM IndigenousCommunities WHERE region \u003d \u0027Arctic\u0027 AND population \u003e\u003d 1000;", + "sql_explanation": "This query finds the names of indigenous communities in the Arctic region with a population of 1000 or more." +}, { + "id": "3970", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average methane concentration in the Arctic Ocean in 2020?", + "sql_context": "CREATE TABLE arctic_ocean_gas (gas VARCHAR(50), year INT, concentration FLOAT);", + "sql": "SELECT AVG(concentration) FROM arctic_ocean_gas WHERE gas \u003d \u0027methane\u0027 AND year \u003d 2020;", + "sql_explanation": "Calculate the average methane concentration in the Arctic Ocean in 2020." +}, { + "id": "4002", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many species are endangered in the Arctic region?", + "sql_context": "CREATE TABLE SpeciesStatus (species VARCHAR(255), region VARCHAR(255), status VARCHAR(255)); INSERT INTO SpeciesStatus (species, region, status) VALUES (\u0027Polar bear\u0027, \u0027Arctic\u0027, \u0027Endangered\u0027); INSERT INTO SpeciesStatus (species, region, status) VALUES (\u0027Beluga whale\u0027, \u0027Arctic\u0027, \u0027Endangered\u0027);", + "sql": "SELECT COUNT(*) FROM SpeciesStatus WHERE status \u003d \u0027Endangered\u0027 AND region \u003d \u0027Arctic\u0027;", + "sql_explanation": "The SQL query calculates the number of rows (COUNT) with the status \u0027Endangered\u0027 and region \u0027Arctic\u0027." +}, { + "id": "5663", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records of Arctic wildlife sightings older than 2015", + "sql_context": "CREATE TABLE WildlifeSightings (id INT, species TEXT, year INT, location TEXT); INSERT INTO WildlifeSightings (id, species, year, location) VALUES (1, \u0027Polar Bear\u0027, 2020, \u0027Barrow\u0027), (2, \u0027Polar Bear\u0027, 2015, \u0027Barrow\u0027), (3, \u0027Walrus\u0027, 2019, \u0027Tromso\u0027), (4, \u0027Walrus\u0027, 2014, \u0027Tromso\u0027), (5, \u0027Narwhal\u0027, 2018, \u0027Pond Inlet\u0027);", + "sql": "DELETE FROM WildlifeSightings WHERE year \u003c 2015;", + "sql_explanation": "Delete records of Arctic wildlife sightings older than 2015." +}, { + "id": "5716", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records with a biomass value lower than 10 in the \u0027species_data\u0027 table.", + "sql_context": "CREATE TABLE species_data (species_id INT, species_name VARCHAR(255), biomass FLOAT); INSERT INTO species_data (species_id, species_name, biomass) VALUES (1, \u0027polar_bear\u0027, 800.0), (2, \u0027arctic_fox\u0027, 15.0), (3, \u0027caribou\u0027, 220.0), (4, \u0027lemming\u0027, 5.0);", + "sql": "DELETE FROM species_data WHERE biomass \u003c 10;", + "sql_explanation": "This SQL query deletes records in the species_data table where the biomass column is lower than 10." +}, { + "id": "564", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the total sales and average sales per transaction for beauty products with a palm oil-free label in Germany?", + "sql_context": "CREATE TABLE beauty_products_germany (palm_oil_free BOOLEAN, sale_date DATE, sales_quantity INT, unit_price DECIMAL(5,2)); INSERT INTO beauty_products_germany (palm_oil_free, sale_date, sales_quantity, unit_price) VALUES (TRUE, \u00272022-01-01\u0027, 120, 22.99), (FALSE, \u00272022-01-01\u0027, 180, 18.99);", + "sql": "SELECT SUM(sales_quantity * unit_price) AS total_sales, AVG(sales_quantity) AS avg_sales_per_transaction FROM beauty_products_germany WHERE palm_oil_free \u003d TRUE AND sale_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-12-31\u0027;", + "sql_explanation": "The SQL query calculates the total sales by summing the product of sales_quantity and unit_price for beauty products with a palm oil-free label in Germany for the year 2022. It also calculates the average sales per transaction by averaging the sales_quantity for beauty products with a palm oil-free label in Germany for the same period." +}, { + "id": "1163", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of beauty products with fair trade ingredients in the overall beauty product sales in South Africa?", + "sql_context": "CREATE TABLE beauty_products_south_africa (fair_trade_ingredients BOOLEAN, sales_quantity INT); INSERT INTO beauty_products_south_africa (fair_trade_ingredients, sales_quantity) VALUES (TRUE, 700), (FALSE, 1300);", + "sql": "SELECT (SUM(CASE WHEN fair_trade_ingredients \u003d TRUE THEN sales_quantity ELSE 0 END) / SUM(sales_quantity)) * 100 AS fair_trade_percentage FROM beauty_products_south_africa;", + "sql_explanation": "The SQL query calculates the percentage of beauty products with fair trade ingredients in the overall beauty product sales in South Africa by summing the sales_quantity for beauty products with fair trade ingredients and dividing it by the total sales_quantity. The result is multiplied by 100 to get the percentage." +}, { + "id": "1301", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue for refillable beauty products in the United Kingdom in Q1 2023?", + "sql_context": "CREATE TABLE sales (product VARCHAR(255), sale_date DATE, revenue NUMERIC(10, 2), product_type VARCHAR(255), country VARCHAR(255)); INSERT INTO sales (product, sale_date, revenue, product_type, country) VALUES (\u0027Shampoo\u0027, \u00272023-01-01\u0027, 50, \u0027Refillable\u0027, \u0027United Kingdom\u0027), (\u0027Conditioner\u0027, \u00272023-01-03\u0027, 75, \u0027Refillable\u0027, \u0027United Kingdom\u0027), (\u0027Body Wash\u0027, \u00272023-02-05\u0027, 60, \u0027Refillable\u0027, \u0027United Kingdom\u0027);", + "sql": "SELECT SUM(revenue) as total_revenue FROM sales WHERE sale_date BETWEEN \u00272023-01-01\u0027 AND \u00272023-03-31\u0027 AND product_type \u003d \u0027Refillable\u0027 AND country \u003d \u0027United Kingdom\u0027;", + "sql_explanation": "The query calculates the total revenue for refillable beauty products in the United Kingdom in Q1 2023 by summing the revenue values for rows with the product_type column set to \u0027Refillable\u0027 and the country column set to \u0027United Kingdom\u0027 and sale dates in Q1 2023. The WHERE clause filters the sales table to only include rows with the product_type column set to \u0027Refillable\u0027 and the country column set to \u0027United Kingdom\u0027 and sale dates in Q1 2023. The SELECT clause calculates the total revenue for refillable beauty products in the United Kingdom in Q1 2023." +}, { + "id": "1749", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue of natural skincare products in Australia in the last year?", + "sql_context": "CREATE TABLE skincare_revenue (revenue_id INT, product_id INT, revenue DECIMAL(5,2), is_natural BOOLEAN, revenue_date DATE); INSERT INTO skincare_revenue VALUES (1, 20, 55.99, true, \u00272021-12-15\u0027);", + "sql": "SELECT SUM(revenue) FROM skincare_revenue WHERE is_natural \u003d true AND revenue_date BETWEEN \u00272020-01-01\u0027 AND \u00272021-12-31\u0027 AND country \u003d \u0027Australia\u0027;", + "sql_explanation": "Calculates the total revenue of natural skincare products in Australia in the last year by filtering the skincare_revenue table based on the is_natural, country, and revenue_date columns, and then using the SUM function." +}, { + "id": "1900", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total CO2 emissions from manufacturing cosmetics in the last 12 months?", + "sql_context": "CREATE TABLE manufacturing_emissions (emission_id INT, product_id INT, co2_emissions FLOAT, emission_date DATE);", + "sql": "SELECT SUM(co2_emissions) FROM manufacturing_emissions WHERE emission_date BETWEEN DATE_SUB(CURRENT_DATE, INTERVAL 12 MONTH) AND CURRENT_DATE;", + "sql_explanation": "The SQL query calculates the total CO2 emissions from manufacturing cosmetics in the last 12 months by using the SUM function on the co2_emissions column, filtering the data with the WHERE clause, and checking the emission_date column." +}, { + "id": "1931", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the total sales for organic skincare products in France in 2021?", + "sql_context": "CREATE TABLE SkincareSales (sale_id INT, product_id INT, sale_price DECIMAL(5,2), sale_date DATE, is_organic BOOLEAN, country TEXT); INSERT INTO SkincareSales (sale_id, product_id, sale_price, sale_date, is_organic, country) VALUES (1, 601, 25.99, \u00272021-03-14\u0027, true, \u0027France\u0027);", + "sql": "SELECT SUM(sale_price) FROM SkincareSales WHERE is_organic \u003d true AND country \u003d \u0027France\u0027 AND sale_date BETWEEN \u00272021-01-01\u0027 AND \u00272021-12-31\u0027;", + "sql_explanation": "Join SkincareSales where is_organic is true and country is France and sale_date is in 2021. Then, calculate the sum of sale_price for these products." +}, { + "id": "2024", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new cruelty-free foundation into the products table", + "sql_context": "CREATE TABLE products (product_id INT, product_name VARCHAR(255), category VARCHAR(255), price DECIMAL(10,2), is_cruelty_free BOOLEAN);", + "sql": "INSERT INTO products (product_id, product_name, category, price, is_cruelty_free) VALUES (4, \u0027Liquid Foundation\u0027, \u0027Makeup\u0027, 24.99, true);", + "sql_explanation": "This query inserts a new record into the products table for a cruelty-free liquid foundation with an ID of 4, a price of $24.99, and a boolean value of true indicating that it is cruelty-free." +}, { + "id": "2154", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many units of organic hair care products were sold in the USA in Q2 of 2021?", + "sql_context": "CREATE TABLE HairCareProducts (productID INT, productName VARCHAR(50), category VARCHAR(50), country VARCHAR(50), isOrganic BOOLEAN, saleDate DATE); INSERT INTO HairCareProducts (productID, productName, category, country, isOrganic, saleDate) VALUES (1, \u0027Shampoo\u0027, \u0027Hair Care\u0027, \u0027USA\u0027, TRUE, \u00272021-04-23\u0027);", + "sql": "SELECT COUNT(*) FROM HairCareProducts WHERE country \u003d \u0027USA\u0027 AND isOrganic \u003d TRUE AND QUARTER(saleDate) \u003d 2 AND YEAR(saleDate) \u003d 2021;", + "sql_explanation": "This SQL query counts the number of units of organic hair care products sold in the USA in Q2 of 2021 by filtering on country, isOrganic, saleDate columns, and using the QUARTER() and YEAR() functions to extract the quarter and year from the saleDate." +}, { + "id": "2458", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue for organic products in Q4 2021?", + "sql_context": "CREATE TABLE sales (product VARCHAR(255), sale_date DATE, revenue NUMERIC(10, 2), is_organic BOOLEAN); INSERT INTO sales (product, sale_date, revenue, is_organic) VALUES (\u0027Cleanser\u0027, \u00272021-10-01\u0027, 20, true), (\u0027Foundation\u0027, \u00272021-11-03\u0027, 30, false), (\u0027Shampoo\u0027, \u00272021-12-07\u0027, 40, true);", + "sql": "SELECT SUM(revenue) as total_revenue FROM sales WHERE sale_date BETWEEN \u00272021-10-01\u0027 AND \u00272021-12-31\u0027 AND is_organic \u003d true;", + "sql_explanation": "The query calculates the total revenue for organic products in Q4 2021 (October to December) by summing the revenue values for rows with the is_organic column set to true and sale dates in Q4 2021. The WHERE clause filters the sales table to only include rows with sale dates in Q4 2021 and with the is_organic column set to true. The SELECT clause calculates the total revenue for organic products in Q4 2021." +}, { + "id": "2468", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total revenue for organic skincare products in Q1 2022?", + "sql_context": "CREATE TABLE skincare_sales (product_type VARCHAR(20), sale_date DATE, revenue DECIMAL(10,2)); INSERT INTO skincare_sales (product_type, sale_date, revenue) VALUES (\u0027Organic\u0027, \u00272022-01-01\u0027, 150.00), (\u0027Organic\u0027, \u00272022-01-02\u0027, 200.00);", + "sql": "SELECT SUM(revenue) FROM skincare_sales WHERE product_type \u003d \u0027Organic\u0027 AND sale_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-03-31\u0027;", + "sql_explanation": "The SQL query calculates the total revenue for organic skincare products in Q1 2022 by summing the revenue column where product_type is \u0027Organic\u0027 and sale_date is between \u00272022-01-01\u0027 and \u00272022-03-31\u0027." +}, { + "id": "2622", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue of organic skincare products sold in the US in Q1 2022?", + "sql_context": "CREATE TABLE SkincareSales (product_id INT, product_name TEXT, sale_price DECIMAL(5,2), is_organic BOOLEAN, sale_date DATE); INSERT INTO SkincareSales (product_id, product_name, sale_price, is_organic, sale_date) VALUES (1, \u0027Aloe Vera Moisturizer\u0027, 15.99, true, \u00272022-01-03\u0027);", + "sql": "SELECT SUM(sale_price) FROM SkincareSales WHERE is_organic \u003d true AND sale_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-03-31\u0027;", + "sql_explanation": "Sum the sale_price where is_organic is true and sale_date is within Q1 2022." +}, { + "id": "2704", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average price of eco-friendly nail polish sold in France?", + "sql_context": "CREATE TABLE NailPolishSales (productID INT, productName VARCHAR(50), category VARCHAR(50), country VARCHAR(50), isEcoFriendly BOOLEAN, price DECIMAL(5,2)); INSERT INTO NailPolishSales (productID, productName, category, country, isEcoFriendly, price) VALUES (1, \u0027Nail Polish\u0027, \u0027Nail Care\u0027, \u0027France\u0027, TRUE, 12.99);", + "sql": "SELECT AVG(price) FROM NailPolishSales WHERE category \u003d \u0027Nail Care\u0027 AND country \u003d \u0027France\u0027 AND isEcoFriendly \u003d TRUE;", + "sql_explanation": "This SQL query calculates the average price of eco-friendly nail polish sold in France by filtering on category, country, and isEcoFriendly columns, and using the AVG() function to calculate the mean price." +}, { + "id": "2770", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many cruelty-free skincare products have more than 10 ingredients?", + "sql_context": "CREATE TABLE products (product_id INT PRIMARY KEY, product_name TEXT, product_type TEXT, brand_id INT, is_cruelty_free BOOLEAN, num_ingredients INT); INSERT INTO products (product_id, product_name, product_type, brand_id, is_cruelty_free, num_ingredients) VALUES (1, \u0027Cleanser\u0027, \u0027Skincare\u0027, 1, true, 12), (2, \u0027Toner\u0027, \u0027Skincare\u0027, 2, true, 11), (3, \u0027Moisturizer\u0027, \u0027Skincare\u0027, 3, false, 8), (4, \u0027Serum\u0027, \u0027Skincare\u0027, 3, true, 13), (5, \u0027Sunscreen\u0027, \u0027Skincare\u0027, 4, true, 15);", + "sql": "SELECT COUNT(*) FROM products WHERE product_type \u003d \u0027Skincare\u0027 AND is_cruelty_free \u003d true AND num_ingredients \u003e 10;", + "sql_explanation": "This query calculates the number of cruelty-free skincare products with more than 10 ingredients." +}, { + "id": "2845", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Count the number of vegan haircare products sold in the UK.", + "sql_context": "CREATE TABLE haircare_products(product_name TEXT, product_type TEXT, vegan BOOLEAN, sale_country TEXT); INSERT INTO haircare_products(product_name, product_type, vegan, sale_country) VALUES (\u0027Shampoo Bar\u0027, \u0027haircare\u0027, true, \u0027UK\u0027);", + "sql": "SELECT COUNT(*) FROM haircare_products WHERE product_type \u003d \u0027haircare\u0027 AND vegan \u003d true AND sale_country \u003d \u0027UK\u0027;", + "sql_explanation": "This query counts the number of vegan haircare products sold in the UK. It filters the haircare_products table by product_type, vegan, and sale_country, and then applies the COUNT function to all columns." +}, { + "id": "2905", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records of nail polish sales in Canada before 2020.", + "sql_context": "CREATE TABLE NailPolishSales (sale_id INT, product_name VARCHAR(100), category VARCHAR(50), price DECIMAL(10,2), quantity INT, sale_date DATE, country VARCHAR(50));", + "sql": "DELETE FROM NailPolishSales WHERE category \u003d \u0027Nail Polish\u0027 AND country \u003d \u0027Canada\u0027 AND sale_date \u003c \u00272020-01-01\u0027;", + "sql_explanation": "This query deletes all records of nail polish sales in Canada before 2020 by filtering for rows where the category is \u0027Nail Polish\u0027, the country is \u0027Canada\u0027, and the sale_date is before \u00272020-01-01\u0027." +}, { + "id": "3136", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of skincare products with natural ingredients and a rating above 4.5 in the Australian market?", + "sql_context": "CREATE TABLE Skincare_Australia(Product VARCHAR(30), Ingredients TEXT, Rating DECIMAL(3,2)); INSERT INTO Skincare_Australia(Product, Ingredients, Rating) VALUES(\u0027Cleanser A\u0027, \u0027Natural Ingredients, No Harmful Chemicals\u0027, 4.8), (\u0027Moisturizer B\u0027, \u0027Synthetic Ingredients\u0027, 4.5), (\u0027Toner C\u0027, \u0027Natural Ingredients, No Harmful Chemicals\u0027, 4.6), (\u0027Exfoliant D\u0027, \u0027Synthetic Ingredients\u0027, 4.2), (\u0027Serum E\u0027, \u0027Natural Ingredients, No Harmful Chemicals\u0027, 4.9), (\u0027Sunscreen F\u0027, \u0027Synthetic Ingredients\u0027, 4.7);", + "sql": "SELECT COUNT(*) FROM Skincare_Australia WHERE Ingredients LIKE \u0027%Natural Ingredients%\u0027 AND Rating \u003e 4.5;", + "sql_explanation": "The SQL query calculates the count of rows from the Skincare_Australia table for the rows where Ingredients contains the phrase \u0027Natural Ingredients\u0027 and Rating is greater than 4.5. It uses the LIKE operator with the % wildcard character to match rows with the phrase \u0027Natural Ingredients\u0027 in the Ingredients column." +}, { + "id": "3142", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the lowest-rated cruelty-free products?", + "sql_context": " CREATE TABLE product_ratings (product_id INT, brand VARCHAR(255), is_cruelty_free BOOLEAN, rating FLOAT); INSERT INTO product_ratings (product_id, brand, is_cruelty_free, rating) VALUES (1, \u0027Lush\u0027, true, 3.5), (2, \u0027The Body Shop\u0027, true, 4.2), (3, \u0027Sephora\u0027, false, 4.8); ", + "sql": " SELECT product_id, brand, rating FROM product_ratings WHERE is_cruelty_free \u003d true ORDER BY rating ASC;", + "sql_explanation": "This query retrieves the product ID, brand, and rating for all cruelty-free products, sorted in ascending order by rating." +}, { + "id": "3174", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the sales for a specific product in a specific month?", + "sql_context": "CREATE TABLE if not exists sales (id INT PRIMARY KEY, product_id INT, purchase_date DATE, quantity INT, price DECIMAL(5,2)); INSERT INTO sales (id, product_id, purchase_date, quantity, price) VALUES (3, 1, \u00272022-02-01\u0027, 2, 12.99); CREATE TABLE if not exists product (id INT PRIMARY KEY, name TEXT, brand_id INT, price DECIMAL(5,2)); INSERT INTO product (id, name, brand_id, price) VALUES (1, \u0027Solid Shampoo Bar\u0027, 1, 12.99);", + "sql": "SELECT SUM(quantity * price) FROM sales WHERE product_id \u003d 1 AND EXTRACT(MONTH FROM purchase_date) \u003d 2;", + "sql_explanation": "This SQL query retrieves the sales for a specific product in a specific month. It filters the \u0027sales\u0027 table to only include rows where the \u0027product_id\u0027 attribute is set to 1 and the \u0027purchase_date\u0027 attribute falls within the month of February. Then, it projects the sum of the product of the \u0027quantity\u0027 and \u0027price\u0027 attributes of the resulting set of rows, which represents the sales for the product in the month." +}, { + "id": "3551", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average price of skincare products manufactured in the USA?", + "sql_context": "CREATE TABLE products (product_id INT PRIMARY KEY, name VARCHAR(255), category VARCHAR(255), price DECIMAL(5,2), manufacturer_country VARCHAR(255)); INSERT INTO products (product_id, name, category, price, manufacturer_country) VALUES (1, \u0027Lipstick\u0027, \u0027Cosmetics\u0027, 19.99, \u0027France\u0027), (2, \u0027Moisturizer\u0027, \u0027Skin Care\u0027, 29.99, \u0027USA\u0027), (3, \u0027Face Wash\u0027, \u0027Skin Care\u0027, 15.99, \u0027USA\u0027);", + "sql": "SELECT AVG(price) FROM products WHERE manufacturer_country \u003d \u0027USA\u0027 AND category \u003d \u0027Skin Care\u0027;", + "sql_explanation": "This SQL query calculates the average price of skincare products (category \u003d \u0027Skin Care\u0027) manufactured in the USA (manufacturer_country \u003d \u0027USA\u0027) by using the AVG function." +}, { + "id": "3586", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average price of organic skincare products sold in the United States?", + "sql_context": "CREATE TABLE SkincareProducts (product_id INT, product_name VARCHAR(255), price DECIMAL(5,2), is_organic BOOLEAN, country VARCHAR(50));", + "sql": "SELECT AVG(price) FROM SkincareProducts WHERE is_organic \u003d TRUE AND country \u003d \u0027United States\u0027;", + "sql_explanation": "This SQL query calculates the average price of organic skincare products sold in the United States. It first filters the SkincareProducts table to include only organic products (is_organic \u003d TRUE) sold in the United States (country \u003d \u0027United States\u0027). Then, it calculates the average price of these products using the AVG function." +}, { + "id": "3910", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the ingredient \u0027Microbeads\u0027 to \u0027Biodegradable Beads\u0027 in the Products table.", + "sql_context": "CREATE TABLE Products (productID INT, productName VARCHAR(50), ingredient VARCHAR(50)); INSERT INTO Products (productID, productName, ingredient) VALUES (1, \u0027Exfoliating Scrub\u0027, \u0027Microbeads\u0027), (2, \u0027Face Wash\u0027, \u0027Salicylic Acid\u0027), (3, \u0027Hand Cream\u0027, \u0027Shea Butter\u0027);", + "sql": "UPDATE Products SET ingredient \u003d \u0027Biodegradable Beads\u0027 WHERE ingredient \u003d \u0027Microbeads\u0027;", + "sql_explanation": "This query updates the ingredient \u0027Microbeads\u0027 to \u0027Biodegradable Beads\u0027 in the Products table." +}, { + "id": "3939", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum price of natural hair care products sold in France?", + "sql_context": "CREATE TABLE hair_care_sales(product_name TEXT, price DECIMAL(5,2), is_natural BOOLEAN, country TEXT); INSERT INTO hair_care_sales VALUES (\u0027Shampoo\u0027, 12.99, true, \u0027France\u0027); INSERT INTO hair_care_sales VALUES (\u0027Conditioner\u0027, 14.99, true, \u0027France\u0027); INSERT INTO hair_care_sales VALUES (\u0027Styling Cream\u0027, 8.99, false, \u0027France\u0027);", + "sql": "SELECT MAX(price) FROM hair_care_sales WHERE is_natural \u003d true AND country \u003d \u0027France\u0027;", + "sql_explanation": "This query calculates the maximum price of natural hair care products sold in France by filtering the hair_care_sales table for natural hair care products sold in France and then finding the maximum price of those products." +}, { + "id": "4130", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of products with vegan and gluten-free labels?", + "sql_context": "CREATE TABLE Product_Labels(Product_Name VARCHAR(30), Label VARCHAR(20)); INSERT INTO Product_Labels(Product_Name, Label) VALUES(\u0027Product A\u0027, \u0027Vegan\u0027), (\u0027Product B\u0027, \u0027Gluten-Free\u0027), (\u0027Product C\u0027, \u0027Vegan\u0027), (\u0027Product D\u0027, \u0027Gluten-Free\u0027), (\u0027Product E\u0027, \u0027Vegan\u0027), (\u0027Product F\u0027, \u0027Gluten-Free\u0027), (\u0027Product G\u0027, \u0027Vegan\u0027), (\u0027Product H\u0027, \u0027Gluten-Free\u0027), (\u0027Product I\u0027, \u0027Vegan\u0027), (\u0027Product J\u0027, \u0027Gluten-Free\u0027);", + "sql": "SELECT COUNT(*) FROM Product_Labels WHERE Label \u003d \u0027Vegan\u0027 OR Label \u003d \u0027Gluten-Free\u0027;", + "sql_explanation": "The SQL query selects the count of rows from the Product_Labels table for the rows where Label is either \u0027Vegan\u0027 or \u0027Gluten-Free\u0027 using the OR operator." +}, { + "id": "4175", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the price of the Eco-Friendly Moisturizer to $29.99", + "sql_context": "CREATE TABLE products (product_id INT, product_name VARCHAR(255), category VARCHAR(255), price DECIMAL(10,2)); INSERT INTO products (product_id, product_name, category, price) VALUES (1, \u0027Sensitive Skin Cleanser\u0027, \u0027Skincare\u0027, 17.99), (2, \u0027Gentle Makeup Remover\u0027, \u0027Skincare\u0027, 12.99), (3, \u0027Eco-Friendly Moisturizer\u0027, \u0027Skincare\u0027, 27.99);", + "sql": "UPDATE products SET price \u003d 29.99 WHERE product_name \u003d \u0027Eco-Friendly Moisturizer\u0027;", + "sql_explanation": "This query updates the price of the Eco-Friendly Moisturizer to $29.99 by setting the price for the product_name Eco-Friendly Moisturizer to the new price." +}, { + "id": "4215", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average price of organic makeup products with a rating of at least 4.5?", + "sql_context": "CREATE TABLE makeup_products (product_id INT, name VARCHAR(255), price FLOAT, rating FLOAT, is_organic BOOLEAN);", + "sql": "SELECT AVG(price) FROM makeup_products WHERE is_organic \u003d TRUE AND rating \u003e\u003d 4.5;", + "sql_explanation": "The SQL query calculates the average price of organic makeup products with a rating of at least 4.5 by using the AVG function on the price column, filtering the data with the WHERE clause, and checking the is_organic and rating columns." +}, { + "id": "4452", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum price of sustainable beauty products sold in Asia?", + "sql_context": "CREATE TABLE products (id INT, name VARCHAR(255), sustainable BOOLEAN, price FLOAT, region VARCHAR(255)); INSERT INTO products (id, name, sustainable, price, region) VALUES (1, \u0027Shampoo\u0027, true, 20.99, \u0027Asia\u0027), (2, \u0027Conditioner\u0027, false, 15.99, \u0027Asia\u0027), (3, \u0027Face Mask\u0027, true, 8.99, \u0027Asia\u0027);", + "sql": "SELECT MIN(price) FROM products WHERE sustainable \u003d true AND region \u003d \u0027Asia\u0027;", + "sql_explanation": "This query finds the minimum price of sustainable beauty products sold in Asia by filtering the \u0027products\u0027 table for rows where \u0027sustainable\u0027 is true and \u0027region\u0027 is \u0027Asia\u0027 and then finding the minimum value in the \u0027price\u0027 column for these rows." +}, { + "id": "4771", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total revenue of cosmetics sold to consumers in Asia with a known allergen.", + "sql_context": "CREATE TABLE sales (sale_id INT, product_id INT, quantity INT, sale_price DECIMAL(5,2), has_allergen BOOLEAN); INSERT INTO sales (sale_id, product_id, quantity, sale_price, has_allergen) VALUES (1, 1, 3, 19.99, true); INSERT INTO sales (sale_id, product_id, quantity, sale_price, has_allergen) VALUES (2, 2, 1, 29.99, false);", + "sql": "SELECT SUM(quantity * sale_price) FROM sales WHERE has_allergen \u003d true;", + "sql_explanation": "The SQL query calculates the total revenue of cosmetics sold to consumers in Asia with a known allergen by filtering the sales table for products with a known allergen and then calculating the total revenue using the SUM function." +}, { + "id": "4796", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total sales revenue of organic skincare products", + "sql_context": "CREATE TABLE sales (product_type VARCHAR(20), revenue DECIMAL(10,2)); INSERT INTO sales (product_type, revenue) VALUES (\u0027organic skincare\u0027, 5000), (\u0027conventional skincare\u0027, 7000), (\u0027organic makeup\u0027, 3000), (\u0027conventional makeup\u0027, 9000);", + "sql": "SELECT SUM(revenue) FROM sales WHERE product_type \u003d \u0027organic skincare\u0027;", + "sql_explanation": "This query calculates the total sales revenue for the \u0027organic skincare\u0027 product type by summing up the \u0027revenue\u0027 values in the \u0027sales\u0027 table where \u0027product_type\u0027 is \u0027organic skincare\u0027." +}, { + "id": "4883", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which beauty products contain the ingredient \u0027retinol\u0027?", + "sql_context": "CREATE TABLE product_ingredients (product VARCHAR(255), ingredient VARCHAR(255)); INSERT INTO product_ingredients (product, ingredient) VALUES (\u0027Ava Cleanser\u0027, \u0027Retinol\u0027), (\u0027Ava Moisturizer\u0027, \u0027Hyaluronic Acid\u0027), (\u0027Brizo Exfoliant\u0027, \u0027Glycolic Acid\u0027), (\u0027Brizo Toner\u0027, \u0027Retinol\u0027);", + "sql": "SELECT product FROM product_ingredients WHERE ingredient \u003d \u0027Retinol\u0027;", + "sql_explanation": "This SQL query selects all products that contain the ingredient \u0027retinol\u0027. The WHERE statement filters the results to only include rows where the ingredient is \u0027retinol\u0027, and the SELECT statement chooses the product column to display." +}, { + "id": "5059", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue of skincare products that are sustainably sourced?", + "sql_context": "CREATE TABLE skincare_sales (product_name TEXT, price DECIMAL(5,2), is_sustainable BOOLEAN); INSERT INTO skincare_sales (product_name, price, is_sustainable) VALUES (\u0027Cleanser\u0027, 15.99, true), (\u0027Moisturizer\u0027, 24.99, false), (\u0027Serum\u0027, 39.99, true);", + "sql": "SELECT SUM(price) FROM skincare_sales WHERE is_sustainable \u003d true;", + "sql_explanation": "This query calculates the total revenue of skincare products that are sustainably sourced. It does this by using the SUM function on the price column, where the is_sustainable column is true." +}, { + "id": "5091", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update revenue for product_id 1 to 220.00 in \u0027cosmetics_sales\u0027 table", + "sql_context": "CREATE TABLE cosmetics_sales (product_id INT, product_name VARCHAR(255), units_sold INT, revenue DECIMAL(10,2), sale_date DATE); INSERT INTO cosmetics_sales (product_id, product_name, units_sold, revenue, sale_date) VALUES (1, \u0027Liquid Foundation\u0027, 20, 200.50, \u00272021-01-01\u0027), (2, \u0027Organic Lip Balm\u0027, 30, 75.00, \u00272021-01-02\u0027), (3, \u0027Natural Mascara\u0027, 15, 120.00, \u00272021-01-03\u0027);", + "sql": "UPDATE cosmetics_sales SET revenue \u003d 220.00 WHERE product_id \u003d 1;", + "sql_explanation": "This query updates the revenue for product_id 1 to 220.00 in the \u0027cosmetics_sales\u0027 table." +}, { + "id": "5230", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum price of lipsticks?", + "sql_context": "CREATE TABLE Lipsticks (product_id INT, product_name VARCHAR(255), category VARCHAR(255), price DECIMAL(10,2)); INSERT INTO Lipsticks (product_id, product_name, category, price) VALUES (1, \u0027Lipstick 1\u0027, \u0027Lipsticks\u0027, 12.99), (2, \u0027Lipstick 2\u0027, \u0027Lipsticks\u0027, 17.99), (3, \u0027Lipstick 3\u0027, \u0027Lipsticks\u0027, 22.99), (4, \u0027Lipstick 4\u0027, \u0027Lipsticks\u0027, 27.99);", + "sql": "SELECT MAX(price) FROM Lipsticks WHERE category \u003d \u0027Lipsticks\u0027;", + "sql_explanation": "This query calculates the maximum price of lipsticks by selecting the maximum value of the price column in the Lipsticks table where the category is \u0027Lipsticks\u0027." +}, { + "id": "5299", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the price of all mascaras to $25", + "sql_context": "CREATE TABLE products (id INT, name VARCHAR(255), price DECIMAL(10, 2), category VARCHAR(255)); INSERT INTO products (id, name, price, category) VALUES (1, \u0027Mascara 1\u0027, 20.00, \u0027Mascara\u0027), (2, \u0027Mascara 2\u0027, 30.00, \u0027Mascara\u0027), (3, \u0027Mascara 3\u0027, 40.00, \u0027Mascara\u0027), (4, \u0027Mascara 4\u0027, 50.00, \u0027Mascara\u0027);", + "sql": "UPDATE products SET price \u003d 25.00 WHERE category \u003d \u0027Mascara\u0027;", + "sql_explanation": "This query updates the price of all records in the products table where the category is \u0027Mascara\u0027 to $25.00." +}, { + "id": "5340", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 5 most expensive products?", + "sql_context": "CREATE TABLE if not exists product (id INT PRIMARY KEY, name TEXT, brand_id INT, price DECIMAL(5,2)); INSERT INTO product (id, name, brand_id, price) VALUES (3, \u0027Luxury Moisturizing Cream\u0027, 1, 250.00);", + "sql": "SELECT name, price FROM product ORDER BY price DESC LIMIT 5;", + "sql_explanation": "This SQL query retrieves the top 5 most expensive products. It orders the \u0027product\u0027 table by the \u0027price\u0027 attribute in descending order, and then projects the \u0027name\u0027 and \u0027price\u0027 attributes of the top 5 rows." +}, { + "id": "5620", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which customers are from a specific city?", + "sql_context": "CREATE TABLE if not exists customer (id INT PRIMARY KEY, name TEXT, age INT, gender TEXT, city TEXT); INSERT INTO customer (id, name, age, gender, city) VALUES (3, \u0027Alicia Keys\u0027, 41, \u0027Female\u0027, \u0027New York\u0027);", + "sql": "SELECT name FROM customer WHERE city \u003d \u0027New York\u0027;", + "sql_explanation": "This SQL query retrieves the names of customers from a specific city. It filters the \u0027customer\u0027 table to only include rows where the \u0027city\u0027 attribute is set to \u0027New York\u0027, and then projects the \u0027name\u0027 attribute of the resulting set of rows." +}, { + "id": "1692", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a table for \u0027organic_materials\u0027 in the \u0027fashion_production\u0027 sector", + "sql_context": "CREATE TABLE organic_materials (country VARCHAR(50), fashion_production_sector VARCHAR(50), organic_material_type VARCHAR(50), percentage_use FLOAT);", + "sql": "CREATE TABLE organic_materials (country VARCHAR(50), fashion_production_sector VARCHAR(50), organic_material_type VARCHAR(50), percentage_use FLOAT);", + "sql_explanation": "1. This command creates a new table named \u0027organic_materials\u0027. 2. The table consists of four columns: \u0027country\u0027, \u0027fashion_production_sector\u0027, \u0027organic_material_type\u0027, and \u0027percentage_use\u0027. 3. Each column data type is defined as VARCHAR(50) for \u0027country\u0027, \u0027fashion_production_sector\u0027, and \u0027organic_material_type\u0027, and FLOAT for \u0027percentage_use\u0027." +}, { + "id": "2406", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of recycled materials used in production?", + "sql_context": "CREATE TABLE Production (production_id INT, material_id INT, production_quantity INT); INSERT INTO Production (production_id, material_id, production_quantity) VALUES (1, 1, 500), (2, 2, 800), (3, 1, 600), (4, 2, 700);", + "sql": "SELECT (SUM(CASE WHEN material_id \u003d 2 THEN production_quantity ELSE 0 END) / SUM(production_quantity)) * 100 FROM Production;", + "sql_explanation": "This SQL query calculates the percentage of recycled materials used in production by using a CASE statement to filter material_id 2 and calculating the SUM, then dividing by the total SUM of production_quantity while multiplying by 100 to get the percentage." +}, { + "id": "2570", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of garments produced using fair labor practices in Turkey?", + "sql_context": "CREATE TABLE Garments (garment_id INT, garment_produced_fair_labor BOOLEAN, garment_country VARCHAR(50));", + "sql": "SELECT COUNT(*) AS total_garments FROM Garments WHERE garment_produced_fair_labor \u003d TRUE AND garment_country \u003d \u0027Turkey\u0027;", + "sql_explanation": "The SQL query calculates the total number of garments produced using fair labor practices in Turkey by filtering the Garments table by the garment_produced_fair_labor and garment_country columns and calculating the number of rows with a value of TRUE." +}, { + "id": "2693", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many factories are producing garments using circular economy principles in Europe?", + "sql_context": "CREATE TABLE CircularEconomyFactories (id INT, factory_location VARCHAR(255), is_circular_economy BOOLEAN); INSERT INTO CircularEconomyFactories (id, factory_location, is_circular_economy) VALUES (1, \u0027Milan, Italy\u0027, true), (2, \u0027Paris, France\u0027, false), (3, \u0027Berlin, Germany\u0027, true);", + "sql": "SELECT COUNT(*) FROM CircularEconomyFactories WHERE factory_location LIKE \u0027%Europe%\u0027 AND is_circular_economy \u003d true;", + "sql_explanation": "This query counts the number of factories producing garments using circular economy principles in Europe by filtering the CircularEconomyFactories table based on the factory_location and is_circular_economy columns and then counting the number of rows returned by the query." +}, { + "id": "2696", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the labor cost of factories in Vietnam that use organic cotton to be $1.00 higher than the current value.", + "sql_context": "CREATE TABLE factory_vietnam (factory VARCHAR(255), country VARCHAR(255), material VARCHAR(255), labor_cost DECIMAL(5,2)); INSERT INTO factory_vietnam (factory, country, material, labor_cost) VALUES (\u0027Factory1\u0027, \u0027Vietnam\u0027, \u0027organic cotton\u0027, 5.00), (\u0027Factory2\u0027, \u0027Vietnam\u0027, \u0027conventional cotton\u0027, 4.75), (\u0027Factory3\u0027, \u0027Vietnam\u0027, \u0027organic cotton\u0027, 5.25);", + "sql": "UPDATE factory_vietnam SET labor_cost \u003d labor_cost + 1.00 WHERE country \u003d \u0027Vietnam\u0027 AND material \u003d \u0027organic cotton\u0027;", + "sql_explanation": "This query updates the labor cost of factories in Vietnam that use organic cotton to be $1.00 higher than the current value. It uses the UPDATE statement with a WHERE clause to select the factories that meet the criteria and then increments the labor cost by 1.00." +}, { + "id": "2929", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many fair trade certified factories are there in South America?", + "sql_context": "CREATE TABLE FairTradeFactories (id INT, factory_location VARCHAR(255), is_certified BOOLEAN); INSERT INTO FairTradeFactories (id, factory_location, is_certified) VALUES (1, \u0027Buenos Aires, Argentina\u0027, true), (2, \u0027Lima, Peru\u0027, false), (3, \u0027Santiago, Chile\u0027, true);", + "sql": "SELECT COUNT(*) FROM FairTradeFactories WHERE factory_location LIKE \u0027%South America%\u0027 AND is_certified \u003d true;", + "sql_explanation": "This query counts the number of fair trade certified factories in South America by filtering the FairTradeFactories table based on the factory_location and is_certified columns and then counting the number of rows returned by the query." +}, { + "id": "2933", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many workers are employed in factories with fair labor practices in Asia?", + "sql_context": "CREATE TABLE FairLaborFactories (id INT, factory_location VARCHAR(255), num_workers INT, is_fair_labor BOOLEAN); INSERT INTO FairLaborFactories (id, factory_location, num_workers, is_fair_labor) VALUES (1, \u0027Mumbai, India\u0027, 500, true), (2, \u0027Ho Chi Minh City, Vietnam\u0027, 300, false), (3, \u0027Bangkok, Thailand\u0027, 700, true);", + "sql": "SELECT SUM(num_workers) FROM FairLaborFactories WHERE factory_location LIKE \u0027%Asia%\u0027 AND is_fair_labor \u003d true;", + "sql_explanation": "This query calculates the total number of workers employed in factories with fair labor practices in Asia by summing up the num_workers column values where the factory_location contains \u0027Asia\u0027 and is_fair_labor is true." +}, { + "id": "2952", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records in the green_certification table where the certification_type is \u0027Eco-Friendly\u0027 and expiration_date is before \u00272022-01-01\u0027", + "sql_context": "CREATE TABLE green_certification (id INT PRIMARY KEY, company VARCHAR(255), certification_type VARCHAR(255), expiration_date DATE); INSERT INTO green_certification (id, company, certification_type, expiration_date) VALUES (1, \u0027Company X\u0027, \u0027Eco-Friendly\u0027, \u00272022-12-31\u0027), (2, \u0027Company Y\u0027, \u0027Organic\u0027, \u00272023-06-30\u0027), (3, \u0027Company Z\u0027, \u0027Eco-Friendly\u0027, \u00272021-06-30\u0027);", + "sql": "DELETE FROM green_certification WHERE certification_type \u003d \u0027Eco-Friendly\u0027 AND expiration_date \u003c \u00272022-01-01\u0027;", + "sql_explanation": "This query deletes records from the green_certification table where the certification_type is \u0027Eco-Friendly\u0027 and expiration_date is before \u00272022-01-01\u0027. The WHERE clause filters the records according to the specified conditions, and the DELETE keyword removes the matching records." +}, { + "id": "3052", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many sustainable material types are used by each brand?", + "sql_context": "CREATE TABLE Brands (Brand_ID INT, Brand_Name TEXT); INSERT INTO Brands (Brand_ID, Brand_Name) VALUES (1, \u0027H\u0026M\u0027); CREATE TABLE Material_Types (Material_ID INT, Material_Type TEXT, Brand_ID INT);", + "sql": "SELECT B.Brand_Name, COUNT(DISTINCT MT.Material_Type) AS Number_Of_Sustainable_Material_Types FROM Brands B", + "sql_explanation": "Join the \u0027Brands\u0027 and \u0027Material_Types\u0027 tables on \u0027Brand_ID\u0027. Calculate the number of sustainable material types used by each brand." +}, { + "id": "3113", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum CO2 emission for a fair trade certified factory in Bangladesh?", + "sql_context": "CREATE TABLE CO2Emissions (factory VARCHAR(50), certification VARCHAR(50), CO2_emission INT); INSERT INTO CO2Emissions VALUES (\u0027Factory1\u0027, \u0027Fair Trade\u0027, 500), (\u0027Factory2\u0027, \u0027Not Certified\u0027, 600), (\u0027Factory3\u0027, \u0027Fair Trade\u0027, 450), (\u0027Factory4\u0027, \u0027Not Certified\u0027, 700);", + "sql": "SELECT MAX(CO2_emission) FROM CO2Emissions WHERE certification \u003d \u0027Fair Trade\u0027 AND country \u003d \u0027Bangladesh\u0027;", + "sql_explanation": "This query calculates the maximum CO2 emission for a fair trade certified factory in Bangladesh. It does this by using the MAX function, which returns the maximum value of a numeric column. In this case, it\u0027s used on the CO2_emission column with the WHERE clause to filter rows where the certification is fair trade and the country is Bangladesh." +}, { + "id": "3212", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total CO2 emissions reduction achieved through the use of sustainable materials in the past year?", + "sql_context": "CREATE TABLE CO2Emissions (date DATE, reduction DECIMAL(5,2));", + "sql": "SELECT SUM(reduction) FROM CO2Emissions WHERE date BETWEEN DATE_SUB(NOW(), INTERVAL 1 YEAR) AND NOW();", + "sql_explanation": "The SQL query calculates the total CO2 emissions reduction achieved through the use of sustainable materials in the past year by using the SUM function on the reduction column of the CO2Emissions table, and filtering the rows based on the date using the BETWEEN clause." +}, { + "id": "3545", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum wage in fair trade factories in Pakistan?", + "sql_context": "CREATE TABLE MinimumWages (factory VARCHAR(255), country VARCHAR(255), wage DECIMAL(10,2)); INSERT INTO MinimumWages (factory, country, wage) VALUES (\u0027FairTradeFactoryC\u0027, \u0027Pakistan\u0027, 4.0);", + "sql": "SELECT MIN(wage) FROM MinimumWages WHERE factory LIKE \u0027%Fair Trade%\u0027 AND country \u003d \u0027Pakistan\u0027;", + "sql_explanation": "This SQL query finds the minimum wage in fair trade factories in Pakistan by using the MIN() function on the \u0027wage\u0027 column and filtering for factories with \u0027Fair Trade\u0027 in their name and \u0027Pakistan\u0027 country." +}, { + "id": "3589", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the sustainable material type for a specific brand.", + "sql_context": "CREATE TABLE sustainable_material_brands (brand_id INT PRIMARY KEY, brand_name VARCHAR(100), material_type VARCHAR(50));", + "sql": "UPDATE sustainable_material_brands SET material_type \u003d \u0027organic cotton\u0027 WHERE brand_id \u003d 789;", + "sql_explanation": "This query updates the material_type for brand_id 789 to \u0027organic cotton\u0027 in the sustainable_material_brands table." +}, { + "id": "3596", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average water consumption for organic cotton production in Southeast Asia?", + "sql_context": "CREATE TABLE water_consumption (region VARCHAR(50), water_consumption INT); INSERT INTO water_consumption (region, water_consumption) VALUES (\u0027North America\u0027, 2000), (\u0027South America\u0027, 2500), (\u0027Southeast Asia\u0027, 1700), (\u0027Europe\u0027, 1800), (\u0027Africa\u0027, 2200);", + "sql": "SELECT region, AVG(water_consumption) FROM water_consumption WHERE region \u003d \u0027Southeast Asia\u0027;", + "sql_explanation": "This query calculates the average water consumption for organic cotton production in Southeast Asia. It filters the records by the \u0027region\u0027 column with the value \u0027Southeast Asia\u0027 and then calculates the average water consumption for the filtered records." +}, { + "id": "3638", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue generated by brands that have a sustainable supply chain, in the year 2020?", + "sql_context": "CREATE TABLE BrandRevenue (brand VARCHAR(255), revenue DECIMAL(10,2), year INT, sustainable_supply_chain BOOLEAN);", + "sql": "SELECT SUM(revenue) FROM BrandRevenue WHERE sustainable_supply_chain \u003d TRUE AND year \u003d 2020;", + "sql_explanation": "This query calculates the total revenue generated by brands that have a sustainable supply chain, in the year 2020. It filters the results to only include brands with a sustainable supply chain and revenue data for the year 2020, and then applies the SUM function to calculate the total revenue." +}, { + "id": "3737", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average awareness score for consumers in a specific region?", + "sql_context": "CREATE TABLE consumer_awareness (region_id INT PRIMARY KEY, awareness_score INT, year INT);", + "sql": "SELECT AVG(awareness_score) FROM consumer_awareness WHERE region_id \u003d 123 AND year \u003d 2021;", + "sql_explanation": "This query calculates the average awareness_score for region_id 123 in the year 2021 from the consumer_awareness table." +}, { + "id": "3782", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the record of fair wages certification for the manufacturer with ID 4.", + "sql_context": "CREATE TABLE manufacturer_fair_wages (manufacturer_id INT, certification DATE); INSERT INTO manufacturer_fair_wages (manufacturer_id, certification) VALUES (1, \u00272020-01-01\u0027), (2, \u00272019-05-15\u0027), (3, \u00272018-09-30\u0027), (4, NULL);", + "sql": "UPDATE manufacturer_fair_wages SET certification \u003d \u00272022-04-20\u0027 WHERE manufacturer_id \u003d 4;", + "sql_explanation": "This SQL query updates the fair wages certification record for the manufacturer with ID 4 by setting the \u0027certification\u0027 column to \u00272022-04-20\u0027 in the \u0027manufacturer_fair_wages\u0027 table where the \u0027manufacturer_id\u0027 column is equal to 4." +}, { + "id": "3888", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many fair trade certified factories are located in Asia?", + "sql_context": "CREATE TABLE factories (factory_id INT, factory_name VARCHAR(50), location VARCHAR(50), fair_trade_certified BOOLEAN); INSERT INTO factories (factory_id, factory_name, location, fair_trade_certified) VALUES (1, \u0027Green Factory\u0027, \u0027Asia\u0027, true), (2, \u0027Eco Factory\u0027, \u0027US\u0027, false), (3, \u0027Sustainable Factory\u0027, \u0027Europe\u0027, true);", + "sql": "SELECT COUNT(*) FROM factories WHERE location \u003d \u0027Asia\u0027 AND fair_trade_certified \u003d true;", + "sql_explanation": "This query counts the number of fair trade certified factories located in Asia by filtering the rows based on the location and fair_trade_certified column values." +}, { + "id": "3894", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average water consumption for producing silk in Brazil?", + "sql_context": "CREATE TABLE WaterConsumption (product VARCHAR(255), material VARCHAR(255), water INT); INSERT INTO WaterConsumption (product, material, water) VALUES (\u0027Silk\u0027, \u0027Brazil\u0027, 1500);", + "sql": "SELECT AVG(water) FROM WaterConsumption WHERE product \u003d \u0027Silk\u0027 AND material \u003d \u0027Brazil\u0027;", + "sql_explanation": "This SQL query calculates the average water consumption for producing silk in Brazil by using the AVG() function on the \u0027water\u0027 column, filtering for \u0027Silk\u0027 and \u0027Brazil\u0027 products." +}, { + "id": "3958", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the score of \u0027Child Labor Prevention\u0027 to 9.3.", + "sql_context": "CREATE TABLE Fair_Labor_Practices (Practice VARCHAR(255), Score FLOAT); INSERT INTO Fair_Labor_Practices (Practice, Score) VALUES (\u0027Safe Working Conditions\u0027, 8.5), (\u0027Working Hours Control\u0027, 8.8), (\u0027Child Labor Prevention\u0027, 9.2);", + "sql": "UPDATE Fair_Labor_Practices SET Score \u003d 9.3 WHERE Practice \u003d \u0027Child Labor Prevention\u0027;", + "sql_explanation": "This query updates the score of the fair labor practice \u0027Child Labor Prevention\u0027 to 9.3 in the Fair_Labor_Practices table." +}, { + "id": "4027", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many factories are there in Spain and the United Kingdom?", + "sql_context": "CREATE TABLE factories (factory_id INT, location VARCHAR(50), capacity INT); INSERT INTO factories (factory_id, location, capacity) VALUES (1, \u0027Madrid, Spain\u0027, 5000), (2, \u0027Paris, France\u0027, 7000), (3, \u0027London, UK\u0027, 6000);", + "sql": "SELECT COUNT(*) FROM factories WHERE location LIKE \u0027%Spain%\u0027 OR location LIKE \u0027%UK%\u0027;", + "sql_explanation": "This query counts the number of records in the factories table with location containing \u0027Spain\u0027 or \u0027UK\u0027." +}, { + "id": "4193", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 5 countries with the highest number of fair trade certified factories.", + "sql_context": "CREATE TABLE FairTradeFactories(factory_id INT, country VARCHAR(255));CREATE VIEW FairTradeFactoryCountries AS SELECT country, COUNT(*) as count FROM FairTradeFactories GROUP BY country;", + "sql": "SELECT country, count FROM FairTradeFactoryCountries ORDER BY count DESC LIMIT 5;", + "sql_explanation": "This query uses a subquery (FairTradeFactoryCountries) to count the number of fair trade factories per country, then orders them in descending order and limits the output to the top 5 countries with the highest counts." +}, { + "id": "4252", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which brands sourced organic cotton from India in 2021?", + "sql_context": "CREATE TABLE organic_cotton_sources (brand VARCHAR(50), country VARCHAR(50), year INT); INSERT INTO organic_cotton_sources (brand, country, year) VALUES (\u0027BrandD\u0027, \u0027India\u0027, 2021), (\u0027BrandE\u0027, \u0027Turkey\u0027, 2021), (\u0027BrandF\u0027, \u0027Pakistan\u0027, 2021);", + "sql": "SELECT brand FROM organic_cotton_sources WHERE country \u003d \u0027India\u0027 AND year \u003d 2021;", + "sql_explanation": "The SQL query lists the brands that sourced organic cotton from India in 2021 by selecting the \u0027brand\u0027 column values where \u0027country\u0027 equals \u0027India\u0027 and \u0027year\u0027 equals 2021." +}, { + "id": "4549", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total production cost of B Corp certified garments?", + "sql_context": "CREATE TABLE certifications (certification_id INT, certification_name TEXT); INSERT INTO certifications (certification_id, certification_name) VALUES (1, \u0027Fair Trade\u0027), (2, \u0027GOTS Organic\u0027), (3, \u0027B Corp\u0027); CREATE TABLE garments (garment_id INT, garment_name TEXT, production_cost FLOAT, certification_id INT); INSERT INTO garments (garment_id, garment_name, production_cost, certification_id) VALUES (1, \u0027Organic Cotton Tee\u0027, 15.50, 3), (2, \u0027Cotton Tote Bag\u0027, 8.25, NULL), (3, \u0027Recycled Polyester Hoodie\u0027, 28.99, NULL), (4, \u0027Organic Cotton Dress\u0027, 22.00, 2), (5, \u0027Hemp Trousers\u0027, 35.00, NULL), (6, \u0027Bamboo Shirt\u0027, 27.50, NULL);", + "sql": "SELECT SUM(g.production_cost) FROM garments g WHERE g.certification_id \u003d 3;", + "sql_explanation": "Sum the production cost of all B Corp certified garments." +}, { + "id": "4653", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average water consumption for organic cotton farming in India?", + "sql_context": "CREATE TABLE OrganicCotton (country VARCHAR(50), water_consumption INT); INSERT INTO OrganicCotton VALUES (\u0027India\u0027, 1200), (\u0027Nepal\u0027, 1500), (\u0027India\u0027, 1000), (\u0027Sri_Lanka\u0027, 1400);", + "sql": "SELECT AVG(water_consumption) FROM OrganicCotton WHERE country \u003d \u0027India\u0027;", + "sql_explanation": "This query calculates the average water consumption for organic cotton farming in India. It does this by using the AVG function, which returns the average value of a numeric column. In this case, it\u0027s used on the water_consumption column with the WHERE clause to filter rows where the country is India." +}, { + "id": "4827", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average price of garments made with recycled polyester?", + "sql_context": "CREATE TABLE products (product_id INT, material VARCHAR(20), price DECIMAL(5,2)); INSERT INTO products (product_id, material, price) VALUES (1, \u0027organic cotton\u0027, 25.99), (2, \u0027conventional cotton\u0027, 19.99), (3, \u0027hemp\u0027, 39.99), (4, \u0027recycled polyester\u0027, 35.99);", + "sql": "SELECT AVG(price) FROM products WHERE material \u003d \u0027recycled polyester\u0027;", + "sql_explanation": "This query calculates the average price of garments made with recycled polyester by selecting the price column from the products table where the material is recycled polyester, and then using the AVG function to find the average value." +}, { + "id": "4856", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many circular economy initiatives have been implemented by each manufacturer in the ethical fashion industry?", + "sql_context": "CREATE TABLE circular_economy_initiatives (manufacturer_id INT, initiatives INT); INSERT INTO circular_economy_initiatives (manufacturer_id, initiatives) VALUES (1, 3), (2, 5), (3, 4), (4, 6), (5, 2);", + "sql": "SELECT manufacturer_id, initiatives FROM circular_economy_initiatives;", + "sql_explanation": "This SQL query counts the number of circular economy initiatives implemented by each manufacturer in the ethical fashion industry by selecting the \u0027manufacturer_id\u0027 and \u0027initiatives\u0027 columns in the \u0027circular_economy_initiatives\u0027 table." +}, { + "id": "4891", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total CO2 emission of silk production in China?", + "sql_context": "CREATE TABLE SilkProduction (country VARCHAR(50), co2_emission INT); INSERT INTO SilkProduction VALUES (\u0027China\u0027, 1200), (\u0027India\u0027, 800), (\u0027China\u0027, 1500), (\u0027Japan\u0027, 500);", + "sql": "SELECT SUM(co2_emission) FROM SilkProduction WHERE country \u003d \u0027China\u0027;", + "sql_explanation": "This query calculates the total CO2 emission of silk production in China. It does this by using the SUM function, which returns the total sum of a numeric column. In this case, it\u0027s used with the WHERE clause to filter rows where the country is China." +}, { + "id": "4959", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average price of Fair Trade certified clothing items?", + "sql_context": "CREATE TABLE clothing (id INT, name VARCHAR(255), category VARCHAR(255), price DECIMAL(5,2), certification VARCHAR(255)); INSERT INTO clothing (id, name, category, price, certification) VALUES (1, \u0027Organic Cotton Shirt\u0027, \u0027Tops\u0027, 35.99, \u0027Fair Trade\u0027);", + "sql": "SELECT AVG(price) FROM clothing WHERE certification \u003d \u0027Fair Trade\u0027;", + "sql_explanation": "This query calculates the average price of Fair Trade certified clothing items by selecting the price column and using the AVG function. It only considers rows with the certification \u0027Fair Trade\u0027." +}, { + "id": "5052", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new record into \u0027supplier\u0027 table for \u0027Green Supplies\u0027 with \u0027rating\u0027 of 90", + "sql_context": "CREATE TABLE supplier (id INT PRIMARY KEY, name VARCHAR(50), rating INT); INSERT INTO supplier (id, name, rating) VALUES (1, \u0027Blue Supplies\u0027, 80), (2, \u0027Red Supplies\u0027, 70);", + "sql": "INSERT INTO supplier (name, rating) VALUES (\u0027Green Supplies\u0027, 90);", + "sql_explanation": "This query inserts a new record into the \u0027supplier\u0027 table for \u0027Green Supplies\u0027 with a \u0027rating\u0027 of 90. It adds a new row to the table with the specified values." +}, { + "id": "5096", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the price range of items made from hemp?", + "sql_context": "CREATE TABLE items (id INT, name VARCHAR(50), material VARCHAR(50), price DECIMAL(5,2)); INSERT INTO items (id, name, material, price) VALUES (1, \u0027Tote Bag\u0027, \u0027hemp\u0027, 65.99), (2, \u0027Hoodie\u0027, \u0027hemp\u0027, 85.99), (3, \u0027Backpack\u0027, \u0027hemp\u0027, 75.99);", + "sql": "SELECT MIN(price), MAX(price) FROM items WHERE material \u003d \u0027hemp\u0027;", + "sql_explanation": "This query retrieves the price range of items made from hemp by filtering the items table for the material column set to \u0027hemp\u0027 and then computing the minimum and maximum price." +}, { + "id": "5150", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the price of all eco-friendly jeans to $65.99.", + "sql_context": "CREATE TABLE inventory (id INT, item_name VARCHAR(255), category VARCHAR(255), price DECIMAL(5,2), is_eco_friendly BOOLEAN); INSERT INTO inventory (id, item_name, category, price, is_eco_friendly) VALUES (1, \u0027Straight Jeans\u0027, \u0027Bottoms\u0027, 59.99, true), (2, \u0027Skinny Jeans\u0027, \u0027Bottoms\u0027, 49.99, false);", + "sql": "UPDATE inventory SET price \u003d 65.99 WHERE is_eco_friendly \u003d true;", + "sql_explanation": "This query updates the price of all eco-friendly jeans to $65.99 by using the UPDATE statement and specifying the new price and condition in the WHERE clause." +}, { + "id": "5262", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all suppliers that have a fair labor certification.", + "sql_context": "CREATE TABLE suppliers (id INT, name VARCHAR(255), certification VARCHAR(255));", + "sql": "SELECT name FROM suppliers WHERE certification \u003d \u0027Fair Labor\u0027;", + "sql_explanation": "The SQL query selects the name from the suppliers table, where certification is \u0027Fair Labor\u0027. This gives a list of all suppliers that have a fair labor certification." +}, { + "id": "5349", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many items are made from recycled materials?", + "sql_context": "CREATE TABLE items (id INT, name VARCHAR(50), material VARCHAR(50)); INSERT INTO items (id, name, material) VALUES (1, \u0027Tote Bag\u0027, \u0027recycled cotton\u0027), (2, \u0027Hoodie\u0027, \u0027organic cotton\u0027), (3, \u0027Backpack\u0027, \u0027recycled polyester\u0027);", + "sql": "SELECT COUNT(*) FROM items WHERE material LIKE \u0027%recycled%\u0027;", + "sql_explanation": "This query counts the number of items made from recycled materials by filtering the items table for materials containing the word \u0027recycled\u0027." +}, { + "id": "5474", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all ethical fashion brands and their customer satisfaction scores.", + "sql_context": "CREATE TABLE BrandSatisfaction(brand VARCHAR(255), satisfaction_score DECIMAL(3,2));", + "sql": "SELECT brand, satisfaction_score FROM BrandSatisfaction;", + "sql_explanation": "This query returns all rows from the BrandSatisfaction table, providing a list of ethical fashion brands and their customer satisfaction scores." +}, { + "id": "5542", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete suppliers with sustainability score less than 4", + "sql_context": "CREATE TABLE suppliers (id INT PRIMARY KEY, name VARCHAR(255), location VARCHAR(255), sustainability_score FLOAT); INSERT INTO suppliers (id, name, location, sustainability_score) VALUES (1, \u0027Green Fabrics\u0027, \u0027Los Angeles, USA\u0027, 4.2), (2, \u0027Eco Weaves\u0027, \u0027Paris, France\u0027, 3.8);", + "sql": "DELETE FROM suppliers WHERE sustainability_score \u003c 4;", + "sql_explanation": "This query deletes the supplier \u0027Eco Weaves\u0027 from the suppliers table, as its sustainability score is less than 4." +}, { + "id": "5569", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many items are made of materials that are not sustainably sourced?", + "sql_context": "CREATE TABLE products (id INT, name TEXT, material TEXT, sustainable BOOLEAN); INSERT INTO products (id, name, material, sustainable) VALUES (1, \u0027Shirt\u0027, \u0027Organic Cotton\u0027, 1), (2, \u0027Pants\u0027, \u0027Conventional Cotton\u0027, 0);", + "sql": "SELECT COUNT(*) FROM products WHERE sustainable \u003d 0;", + "sql_explanation": "This query counts the number of items made of materials that are not sustainably sourced. It does this by using the COUNT aggregate function to count the number of rows where the sustainable column is 0, indicating that the material is not sustainably sourced." +}, { + "id": "5676", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum recycling rate achieved by any ethical fashion brand?", + "sql_context": "CREATE TABLE RecyclingRates (id INT, brand VARCHAR(50), recycling_rate DECIMAL); INSERT INTO RecyclingRates (id, brand, recycling_rate) VALUES (1, \u0027BrandA\u0027, 0.85), (2, \u0027BrandB\u0027, 0.92), (3, \u0027BrandC\u0027, 0.78), (4, \u0027BrandD\u0027, 0.95), (5, \u0027BrandE\u0027, 0.88);", + "sql": "SELECT MAX(recycling_rate) FROM RecyclingRates;", + "sql_explanation": "This SQL query finds the maximum recycling rate achieved by any ethical fashion brand. It uses the MAX function to find the highest value in the recycling_rate column of the RecyclingRates table." +}, { + "id": "5738", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue generated by upcycled clothing sales?", + "sql_context": "CREATE TABLE UpcycledClothing (id INT, revenue DECIMAL);", + "sql": "select sum(revenue) from UpcycledClothing;", + "sql_explanation": "Since the UpcycledClothing table already contains the revenue generated by upcycled clothing sales, simply calculate the sum of revenue to find the total revenue generated by upcycled clothing sales." +}, { + "id": "22", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total playtime of \u0027Racing\u0027 genre games for users from the EU region?", + "sql_context": "CREATE TABLE game_sessions (session_id INT, player_id INT, game_id INT, genre VARCHAR(50), start_time TIMESTAMP, end_time TIMESTAMP); INSERT INTO game_sessions (session_id, player_id, game_id, genre, start_time, end_time) VALUES (1, 1, 3, \u0027Racing\u0027, \u00272021-01-01 10:00:00\u0027, \u00272021-01-01 12:00:00\u0027), (2, 2, 3, \u0027Racing\u0027, \u00272021-01-02 14:00:00\u0027, \u00272021-01-02 16:00:00\u0027);", + "sql": "SELECT SUM(TIMESTAMP_DIFF(end_time, start_time, MINUTE)) AS total_playtime_minutes FROM game_sessions WHERE genre \u003d \u0027Racing\u0027 AND EXTRACT(HOUR FROM start_time) BETWEEN 0 AND 23 AND EXTRACT(MINUTE FROM start_time) \u003d 0 AND EXTRACT(SECOND FROM start_time) \u003d 0 AND EXTRACT(HOUR FROM end_time) BETWEEN 0 AND 23 AND EXTRACT(MINUTE FROM end_time) \u003d 0 AND EXTRACT(SECOND FROM end_time) \u003d 0 AND EXTRACT(REGION FROM start_time) \u003d \u0027Europe\u0027;", + "sql_explanation": "This query calculates the total playtime of \u0027Racing\u0027 genre games for users from the EU region. The query selects the difference between end_time and start_time in minutes for each session using the TIMESTAMP_DIFF function and then sums the results. It filters the game_sessions table by genre (Racing), start and end times between 00:00 and 23:59:59, and the EU region using the EXTRACT function." +}, { + "id": "999", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new records for the \"Strategy Wars\" game into the \"game_stats\" table", + "sql_context": "CREATE TABLE game_stats (game_name VARCHAR(255), players_online INT, peak_players INT, average_session_length TIME);", + "sql": "INSERT INTO game_stats (game_name, players_online, peak_players, average_session_length) VALUES (\u0027Strategy Wars\u0027, 3000, 4000, \u002700:45:00\u0027), (\u0027Strategy Wars\u0027, 3500, 4500, \u002700:50:00\u0027);", + "sql_explanation": "This query inserts new records into the game_stats table for the \u0027Strategy Wars\u0027 game, providing information about the number of players online, peak players, and average session length." +}, { + "id": "1473", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show total earnings of eSports team \u0027Crimson Clash\u0027 for the current year", + "sql_context": "CREATE TABLE esports_matches (team1 TEXT, team2 TEXT, prize_money INT, match_date DATETIME);", + "sql": "SELECT SUM(prize_money) AS total_earnings FROM esports_matches WHERE (team1 \u003d \u0027Crimson Clash\u0027 OR team2 \u003d \u0027Crimson Clash\u0027) AND YEAR(match_date) \u003d YEAR(NOW());", + "sql_explanation": "This query calculates the total earnings of the eSports team \u0027Crimson Clash\u0027 for the current year by summing the \u0027prize_money\u0027 for each appearance of the team in the \u0027esports_matches\u0027 table, filtering by the current year." +}, { + "id": "1718", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new records into the player_achievements table with the following data: (1, \u0027Player of the Month\u0027), (2, \u0027Rookie of the Year\u0027), (3, \u0027Top Fragger\u0027)", + "sql_context": "CREATE TABLE player_achievements (achievement_id INT, achievement_name VARCHAR(30));", + "sql": "INSERT INTO player_achievements (achievement_id, achievement_name) VALUES (1, \u0027Player of the Month\u0027), (2, \u0027Rookie of the Year\u0027), (3, \u0027Top Fragger\u0027);", + "sql_explanation": "This query inserts new records into the player_achievements table with the specified data." +}, { + "id": "1858", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many users from the United States have played the virtual reality game \"Cybernetic Realms\" in the last month?", + "sql_context": "CREATE TABLE users (id INT, country VARCHAR(50), game VARCHAR(50), last_played DATETIME); INSERT INTO users VALUES (1, \u0027United States\u0027, \u0027Cybernetic Realms\u0027, \u00272022-02-03 16:20:00\u0027); INSERT INTO users VALUES (2, \u0027Canada\u0027, \u0027Cybernetic Realms\u0027, \u00272022-02-10 09:35:00\u0027);", + "sql": "SELECT COUNT(*) FROM users WHERE country \u003d \u0027United States\u0027 AND game \u003d \u0027Cybernetic Realms\u0027 AND last_played \u003e\u003d DATE_SUB(NOW(), INTERVAL 1 MONTH);", + "sql_explanation": "This SQL query counts the number of users from the United States who have played the virtual reality game \"Cybernetic Realms\" in the last month. It does this by using the COUNT() function on the id column, filtering the rows by country, game, and last_played using the WHERE clause, and specifying the date range using the DATE_SUB() function and the INTERVAL keyword." +}, { + "id": "2633", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update player records with a \u0027VIP\u0027 status who have spent over $500, are from the US, and have played in the last 30 days", + "sql_context": "CREATE TABLE players (id INT PRIMARY KEY, name TEXT, status TEXT, total_spent DECIMAL(10,2), country TEXT, last_login DATETIME);", + "sql": "UPDATE players SET status \u003d \u0027VIP\u0027 WHERE total_spent \u003e 500 AND country \u003d \u0027US\u0027 AND last_login \u003e NOW() - INTERVAL 30 DAY;", + "sql_explanation": "This query updates player records in the players table, changing the status to \u0027VIP\u0027 for players who have spent over $500, are from the US, and have played in the last 30 days." +}, { + "id": "2676", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new team records into the \u0027esports_teams\u0027 table", + "sql_context": "CREATE TABLE esports_teams (team_id INT, team_name VARCHAR(50));", + "sql": "INSERT INTO esports_teams (team_id, team_name) VALUES (1, \u0027Phoenix Rising\u0027), (2, \u0027Titan Squad\u0027), (3, \u0027Cosmic Force\u0027);", + "sql_explanation": "This SQL query inserts new records into the \u0027esports_teams\u0027 table with the specified \u0027team_id\u0027 and \u0027team_name\u0027 values." +}, { + "id": "3385", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average playtime in minutes for players who have achieved a rank of Silver or higher in the game \"Virtual Arena\"?", + "sql_context": "CREATE TABLE VirtualArenaPlayers (PlayerID INT, PlayerName VARCHAR(50), PlaytimeMinutes INT, Rank VARCHAR(10)); INSERT INTO VirtualArenaPlayers VALUES (1, \u0027AvaGarcia\u0027, 550, \u0027Silver\u0027), (2, \u0027MasonThomas\u0027, 650, \u0027Gold\u0027), (3, \u0027IsabellaMartinez\u0027, 450, \u0027Silver\u0027), (4, \u0027BenjaminHarris\u0027, 750, \u0027Platinum\u0027);", + "sql": "SELECT AVG(PlaytimeMinutes) FROM VirtualArenaPlayers WHERE Rank IN (\u0027Silver\u0027, \u0027Gold\u0027, \u0027Platinum\u0027);", + "sql_explanation": "Compute the average playtime for players with a rank of Silver or higher by using the AVG function on the PlaytimeMinutes column and filtering for players with a rank of Silver or higher using the IN keyword and the specified ranks." +}, { + "id": "3391", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the number of unique achievements earned by players on \u00272022-01-02\u0027 in \u0027player_achievements\u0027 table", + "sql_context": "CREATE TABLE player_achievements (player_id INT, achievement_name VARCHAR(255), date_earned DATE);", + "sql": "SELECT COUNT(DISTINCT achievement_name) FROM player_achievements WHERE date_earned \u003d \u00272022-01-02\u0027;", + "sql_explanation": "This SQL query counts the number of unique achievements earned by players on \u00272022-01-02\u0027 by counting the number of unique \u0027achievement_name\u0027 records in the \u0027player_achievements\u0027 table that have a \u0027date_earned\u0027 value of \u00272022-01-02\u0027." +}, { + "id": "3405", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of wins for players who play \"Virtual Reality Chess Extreme\" or \"Rhythm Game 2023\"?", + "sql_context": "CREATE TABLE Players (PlayerID INT, PlayerName VARCHAR(50), Game VARCHAR(50), Wins INT); INSERT INTO Players (PlayerID, PlayerName, Game, Wins) VALUES (1, \u0027Sophia Garcia\u0027, \u0027Virtual Reality Chess Extreme\u0027, 35), (2, \u0027Daniel Kim\u0027, \u0027Rhythm Game 2023\u0027, 40), (3, \u0027Lila Hernandez\u0027, \u0027Racing Simulator 2022\u0027, 28), (4, \u0027Kenji Nguyen\u0027, \u0027Rhythm Game 2023\u0027, 45);", + "sql": "SELECT AVG(Wins) FROM Players WHERE Game IN (\u0027Virtual Reality Chess Extreme\u0027, \u0027Rhythm Game 2023\u0027);", + "sql_explanation": "The SQL query calculates the average number of wins for players who play \"Virtual Reality Chess Extreme\" or \"Rhythm Game 2023\" by selecting AVG function on the Wins column, filtering the data by the Game column with the IN clause." +}, { + "id": "3480", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average score of players who have achieved more than 10 victories in the game \u0027Galactic Battles\u0027?", + "sql_context": "CREATE TABLE Galactic_Battles (Player_ID INT, Player_Name VARCHAR(50), Score INT, Victories INT); INSERT INTO Galactic_Battles (Player_ID, Player_Name, Score, Victories) VALUES (1, \u0027John Doe\u0027, 500, 12), (2, \u0027Jane Smith\u0027, 700, 8), (3, \u0027Mike Johnson\u0027, 300, 15), (4, \u0027Sara Connor\u0027, 600, 20), (5, \u0027David Brown\u0027, 800, 10);", + "sql": "SELECT AVG(Score) FROM Galactic_Battles WHERE Victories \u003e 10 AND Game_Name \u003d \u0027Galactic Battles\u0027;", + "sql_explanation": "This query calculates the average score of players who have achieved more than 10 victories in the game \u0027Galactic Battles\u0027 by using the AVG function." +}, { + "id": "3815", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average score of players from the United States who play \u0027Racing Games\u0027?", + "sql_context": "CREATE TABLE Players (PlayerID INT, PlayerName VARCHAR(100), Country VARCHAR(50), Game VARCHAR(50), Score INT); INSERT INTO Players (PlayerID, PlayerName, Country, Game, Score) VALUES (1, \u0027John Doe\u0027, \u0027United States\u0027, \u0027Racing Games\u0027, 90); INSERT INTO Players (PlayerID, PlayerName, Country, Game, Score) VALUES (2, \u0027Jane Smith\u0027, \u0027Canada\u0027, \u0027Racing Games\u0027, 80);", + "sql": "SELECT AVG(Score) FROM Players WHERE Country \u003d \u0027United States\u0027 AND Game \u003d \u0027Racing Games\u0027;", + "sql_explanation": "This SQL query calculates the average score of players from the United States who play \u0027Racing Games\u0027. It uses the AVG() aggregation function to find the average score and filters the data using the WHERE clause based on the country and game." +}, { + "id": "4243", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average score of players who joined in the same month as the release of game V?", + "sql_context": "CREATE TABLE game_V (player_id INT, join_date DATE, score INT);", + "sql": "SELECT AVG(score) FROM game_V WHERE MONTH(join_date) \u003d MONTH(DATE(\u00272022-04-01\u0027));", + "sql_explanation": "This query calculates the average score of players who joined in the same month as the release of game V. It does this by selecting the average score from the game_V table, where the join_date is in the same month as the release date of the game." +}, { + "id": "4288", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update player\u0027s scores with a 10% increase for the \"Fantasy Battle\" game", + "sql_context": "CREATE TABLE player_scores (player_id INT, game_name VARCHAR(255), score INT, date DATE);", + "sql": "UPDATE player_scores SET score \u003d score * 1.1 WHERE game_name \u003d \u0027Fantasy Battle\u0027;", + "sql_explanation": "This query updates the score column in the player_scores table by increasing the current value by 10% (multiplication by 1.1) for all rows where the game_name is \u0027Fantasy Battle\u0027." +}, { + "id": "4426", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update player records with a \u0027VIP\u0027 status who have spent over $500 and are from the US", + "sql_context": "CREATE TABLE players (id INT PRIMARY KEY, name TEXT, status TEXT, total_spent DECIMAL(10,2), country TEXT);", + "sql": "UPDATE players SET status \u003d \u0027VIP\u0027 WHERE total_spent \u003e 500 AND country \u003d \u0027US\u0027;", + "sql_explanation": "This query updates player records in the players table, changing the status to \u0027VIP\u0027 for players who have spent over $500 and are from the US." +}, { + "id": "4472", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total number of players for FPS games.", + "sql_context": "CREATE TABLE PlayerCount (GameID int, GameName varchar(100), Genre varchar(50), PlayerCount int); INSERT INTO PlayerCount VALUES (10, \u0027GameJ\u0027, \u0027FPS\u0027, 150000), (11, \u0027GameK\u0027, \u0027Action\u0027, 120000), (12, \u0027GameL\u0027, \u0027FPS\u0027, 180000);", + "sql": "SELECT SUM(PlayerCount) as TotalPlayers FROM PlayerCount WHERE Genre \u003d \u0027FPS\u0027;", + "sql_explanation": "This query calculates the total number of players for FPS games by summing the PlayerCount column with WHERE clause to filter on FPS genre." +}, { + "id": "4483", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the top genre played by players in country B?", + "sql_context": "CREATE TABLE player_game_preferences (player_id INT, country VARCHAR(20), genre VARCHAR(20));", + "sql": "SELECT country, MAX(genre) FROM player_game_preferences WHERE country \u003d \u0027B\u0027;", + "sql_explanation": "This query finds the top genre played by players in country B. It does this by selecting the genre column from the player_game_preferences table, where the country is \u0027B\u0027, and then finding the genre with the maximum value." +}, { + "id": "4504", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the score for the player \u0027Sana Patel\u0027 to 1200 in the \u0027Galactic Guardians\u0027 table.", + "sql_context": "CREATE TABLE Galactic_Guardians (player_id INT, player_name VARCHAR(50), score INT); INSERT INTO Galactic_Guardians (player_id, player_name, score) VALUES (1, \u0027Sana Patel\u0027, 750), (2, \u0027Mohammed Khan\u0027, 1100), (3, \u0027Fatima Bhutto\u0027, 1400);", + "sql": "UPDATE Galactic_Guardians SET score \u003d 1200 WHERE player_name \u003d \u0027Sana Patel\u0027;", + "sql_explanation": "This query updates the score for the player \u0027Sana Patel\u0027 to 1200 in the \u0027Galactic_Guardians\u0027 table." +}, { + "id": "4656", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the maximum score achieved by user 3 in game \u0027D\u0027", + "sql_context": "CREATE TABLE game_scores (user_id INT, game_name VARCHAR(10), score INT); INSERT INTO game_scores (user_id, game_name, score) VALUES (1, \u0027A\u0027, 50), (2, \u0027B\u0027, 100), (3, \u0027D\u0027, 150), (3, \u0027D\u0027, 120);", + "sql": "SELECT MAX(score) FROM game_scores WHERE user_id \u003d 3 AND game_name \u003d \u0027D\u0027;", + "sql_explanation": "This query identifies the maximum score where the user_id is 3 and the game_name is \u0027D\u0027." +}, { + "id": "4846", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records with achievement_date before 2021-01-01", + "sql_context": "CREATE TABLE player_achievements (player_id INT, achievement_name VARCHAR(255), achievement_date DATE);", + "sql": "DELETE FROM player_achievements WHERE achievement_date \u003c \u00272021-01-01\u0027;", + "sql_explanation": "This SQL statement deletes records from the \u0027player_achievements\u0027 table where the achievement_date is before 2021-01-01." +}, { + "id": "4873", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total score of players whose name starts with \u0027B\u0027 or \u0027C\u0027?", + "sql_context": "CREATE TABLE Player (PlayerID INT, Name VARCHAR(50), Country VARCHAR(50), Score INT);", + "sql": "SELECT SUM(Score) FROM Player WHERE Name LIKE \u0027B%\u0027 OR Name LIKE \u0027C%\u0027;", + "sql_explanation": "This query calculates the total score of players whose name starts with \u0027B\u0027 or \u0027C\u0027 using the SUM aggregation function and filters the records based on the Name column using the LIKE keyword and OR operator." +}, { + "id": "5054", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average score of players from the United States and Canada?", + "sql_context": "CREATE TABLE Players (PlayerID int, PlayerName varchar(50), Country varchar(50), Score int); INSERT INTO Players (PlayerID, PlayerName, Country, Score) VALUES (1, \u0027John Doe\u0027, \u0027USA\u0027, 100), (2, \u0027Jane Smith\u0027, \u0027Canada\u0027, 120);", + "sql": "SELECT AVG(Score) FROM Players WHERE Country IN (\u0027USA\u0027, \u0027Canada\u0027);", + "sql_explanation": "The SQL query calculates the average score of players from the United States and Canada by using the AVG function on the Score column and filtering the data with the IN operator for the specified countries." +}, { + "id": "5072", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the \"session_length\" to 40 minutes for session_id 2 in the \"game_sessions\" table", + "sql_context": "CREATE TABLE game_sessions (session_id INT, player_id INT, session_length INT); INSERT INTO game_sessions (session_id, player_id, session_length) VALUES (1, 1, 20), (2, 2, 45), (3, 3, 35);", + "sql": "UPDATE game_sessions SET session_length \u003d 40 WHERE session_id \u003d 2;", + "sql_explanation": "This query updates the \"session_length\" to 40 minutes for session_id 2 in the \"game_sessions\" table. It uses the UPDATE statement to modify the existing record and the WHERE clause to filter records based on the specified condition." +}, { + "id": "5138", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of matches played in \"Virtual Reality Chess\"?", + "sql_context": "CREATE TABLE Matches (MatchID INT, PlayerID INT, Game VARCHAR(50), Wins INT); INSERT INTO Matches (MatchID, PlayerID, Game, Wins) VALUES (1, 1, \u0027Virtual Reality Chess\u0027, 10), (2, 1, \u0027Virtual Reality Chess\u0027, 12), (3, 2, \u0027Virtual Reality Chess\u0027, 15), (4, 3, \u0027Virtual Reality Chess\u0027, 18);", + "sql": "SELECT SUM(1) FROM Matches WHERE Game \u003d \u0027Virtual Reality Chess\u0027;", + "sql_explanation": "The SQL query calculates the total number of matches played in \"Virtual Reality Chess\" by selecting SUM function on the constant value 1, filtering the data by the Game column with the WHERE clause." +}, { + "id": "5316", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of headshots achieved by players who have more than 50 headshots in the \"SniperElite\" table?", + "sql_context": "CREATE TABLE SniperElite (PlayerID INT, Headshots INT, ShotsFired INT); INSERT INTO SniperElite (PlayerID, Headshots, ShotsFired) VALUES (1, 60, 200), (2, 55, 180), (3, 65, 220), (4, 62, 210), (5, 58, 190);", + "sql": "SELECT MAX(Headshots) FROM SniperElite WHERE Headshots \u003e 50;", + "sql_explanation": "This query finds the maximum number of headshots achieved by players who have more than 50 headshots in the \"SniperElite\" table. It first selects the maximum of the Headshots column, then filters the records where Headshots are greater than 50." +}, { + "id": "5376", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average score of players who joined after 2020 in game X?", + "sql_context": "CREATE TABLE game_X (player_id INT, join_date DATE, score INT);", + "sql": "SELECT AVG(score) FROM game_X WHERE YEAR(join_date) \u003e 2020;", + "sql_explanation": "This query calculates the average score of players who joined the game after 2020. It does this by selecting the average score from the game_X table, where the join_date is in the year 2021 or later." +}, { + "id": "5398", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total playtime of player \u0027Alice\u0027 in VR games?", + "sql_context": "CREATE TABLE vr_games (id INT, player TEXT, game TEXT, playtime INT); INSERT INTO vr_games (id, player, game, playtime) VALUES (1, \u0027Alice\u0027, \u0027Beat Saber\u0027, 120), (2, \u0027Bob\u0027, \u0027Job Simulator\u0027, 90), (3, \u0027Alice\u0027, \u0027Superhot VR\u0027, 150);", + "sql": "SELECT SUM(playtime) FROM vr_games WHERE player \u003d \u0027Alice\u0027;", + "sql_explanation": "This query calculates the total playtime of player \u0027Alice\u0027 in VR games by filtering the \u0027vr_games\u0027 table based on the \u0027player\u0027 column and then using the SUM function to find the total \u0027playtime\u0027." +}, { + "id": "5416", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of players who have played more than 100 minutes?", + "sql_context": "CREATE TABLE player_sessions (id INT, player_name TEXT, playtime INT); INSERT INTO player_sessions (id, player_name, playtime) VALUES (1, \u0027Olivia\u0027, 120); INSERT INTO player_sessions (id, player_name, playtime) VALUES (2, \u0027Olivia\u0027, 150); INSERT INTO player_sessions (id, player_name, playtime) VALUES (3, \u0027William\u0027, 200);", + "sql": "SELECT COUNT(*) FROM player_sessions WHERE playtime \u003e 100;", + "sql_explanation": "This SQL query counts the number of players who have played more than 100 minutes by using the COUNT function with no parameters (which counts all rows) and filtering for rows with \u0027playtime\u0027 values greater than 100." +}, { + "id": "5518", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all players who have not played any games yet", + "sql_context": "CREATE TABLE Players (PlayerID INT, PlayerName VARCHAR(50), GameType VARCHAR(50)); INSERT INTO Players (PlayerID, PlayerName, GameType) VALUES (1, \u0027John Doe\u0027, NULL); INSERT INTO Players (PlayerID, PlayerName, GameType) VALUES (2, \u0027Jane Smith\u0027, \u0027RPG\u0027);", + "sql": "SELECT PlayerName FROM Players WHERE GameType IS NULL;", + "sql_explanation": "This SQL query lists all players who have not played any games yet. It does this by selecting the PlayerName column, but only for the rows where the GameType column is null." +}, { + "id": "5667", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records in the game_servers table where the region is \u0027Asia\u0027", + "sql_context": "CREATE TABLE game_servers (server_id INT, region VARCHAR(10), player_capacity INT);", + "sql": "DELETE FROM game_servers WHERE region \u003d \u0027Asia\u0027;", + "sql_explanation": "This query deletes all records in the game_servers table where the region column has the value \u0027Asia\u0027." +}, { + "id": "5692", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records in the \"game_revenue\" table where the \"revenue\" is less than 1000", + "sql_context": "CREATE TABLE game_revenue (game_id INT, revenue INT); INSERT INTO game_revenue (game_id, revenue) VALUES (1, 500), (2, 1500), (3, 800);", + "sql": "DELETE FROM game_revenue WHERE revenue \u003c 1000;", + "sql_explanation": "This query deletes records in the \"game_revenue\" table where the \"revenue\" is less than 1000. It uses the WHERE clause to filter records based on the specified condition." +}, { + "id": "5740", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average K/D ratio for all players in \u0027Call of Duty\u0027 games.", + "sql_context": "CREATE TABLE Players (PlayerID INT, Name VARCHAR(50)); INSERT INTO Players (PlayerID, Name) VALUES (1, \u0027John Doe\u0027); INSERT INTO Players (PlayerID, Name) VALUES (2, \u0027Jane Smith\u0027); CREATE TABLE COD_Players (PlayerID INT, Kills INT, Deaths INT); INSERT INTO COD_Players (PlayerID, Kills, Deaths) VALUES (1, 50, 30); INSERT INTO COD_Players (PlayerID, Kills, Deaths) VALUES (2, 40, 45);", + "sql": "SELECT AVG(Kills/Deaths) FROM COD_Players;", + "sql_explanation": "The SQL query calculates the average K/D ratio by dividing the number of kills by the number of deaths for each player in the COD_Players table and then taking the average of those ratios." +}, { + "id": "393", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new employees into the DiverseEmployees table.", + "sql_context": "CREATE TABLE DiverseEmployees (id INT, name VARCHAR(100), department VARCHAR(50), country VARCHAR(50));", + "sql": "INSERT INTO DiverseEmployees (id, name, department, country) VALUES (7, \u0027Hamza Ahmed\u0027, \u0027Finance\u0027, \u0027Pakistan\u0027), (8, \u0027Xiuying Zhang\u0027, \u0027IT\u0027, \u0027China\u0027), (9, \u0027Amina Diop\u0027, \u0027Marketing\u0027, \u0027Senegal\u0027), (10, \u0027Santiago Rodriguez\u0027, \u0027HR\u0027, \u0027Brazil\u0027);", + "sql_explanation": "The SQL query inserts four new records into the DiverseEmployees table with diverse names, departments, and countries." +}, { + "id": "1724", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a table to store training program data", + "sql_context": "CREATE TABLE training_programs (id INT PRIMARY KEY, program_name VARCHAR(255), location VARCHAR(255), start_date DATE, end_date DATE, capacity INT);", + "sql": "CREATE TABLE training_programs (id INT PRIMARY KEY, program_name VARCHAR(255), location VARCHAR(255), start_date DATE, end_date DATE, capacity INT);", + "sql_explanation": "A new table named \u0027training_programs\u0027 is being created with columns for program name, location, start date, end date, and capacity." +}, { + "id": "2644", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many employees in the engineering department have completed training in inclusion?", + "sql_context": "CREATE TABLE employee_database (id INT, department TEXT, training_completed TEXT); INSERT INTO employee_database (id, department, training_completed) VALUES (1, \u0027Engineering\u0027, \u0027Diversity\u0027), (2, \u0027Engineering\u0027, \u0027Inclusion\u0027), (3, \u0027Engineering\u0027, \u0027None\u0027);", + "sql": "SELECT COUNT(*) as count FROM employee_database WHERE department \u003d \u0027Engineering\u0027 AND training_completed \u003d \u0027Inclusion\u0027;", + "sql_explanation": "This query calculates the number of employees in the engineering department who have completed training in inclusion. It does this by filtering for employees in the engineering department who have completed training in inclusion and then calculating the count of those employees." +}, { + "id": "2794", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average salary of employees who identify as non-binary, hired in 2021, and work in the Engineering department?", + "sql_context": "CREATE TABLE Employees (EmployeeID INT, Gender VARCHAR(20), HireYear INT, Department VARCHAR(20), Salary FLOAT); INSERT INTO Employees (EmployeeID, Gender, HireYear, Department, Salary) VALUES (1, \u0027Female\u0027, 2020, \u0027IT\u0027, 70000.00), (2, \u0027Male\u0027, 2019, \u0027HR\u0027, 60000.00), (3, \u0027Non-binary\u0027, 2021, \u0027Engineering\u0027, 80000.00);", + "sql": "SELECT AVG(Salary) FROM Employees WHERE Gender \u003d \u0027Non-binary\u0027 AND HireYear \u003d 2021 AND Department \u003d \u0027Engineering\u0027;", + "sql_explanation": "The SQL query calculates the average salary for employees who identify as non-binary, were hired in 2021, and work in the Engineering department. It does this by using the AVG function on the Salary column, filtering rows with the WHERE clause based on the specified conditions." +}, { + "id": "2956", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the names, ethnicities, and job titles for employees in the Engineering department who have not completed diversity and inclusion training.", + "sql_context": "CREATE TABLE EmployeeData (EmployeeID int, Name varchar(30), Ethnicity varchar(20), JobTitle varchar(20), Department varchar(20), TrainingComplete int); INSERT INTO EmployeeData (EmployeeID, Name, Ethnicity, JobTitle, Department, TrainingComplete) VALUES (1, \u0027Sophia Gonzales\u0027, \u0027Latinx\u0027, \u0027Data Analyst\u0027, \u0027Engineering\u0027, 0), (2, \u0027Mohammad Ali\u0027, \u0027Asian\u0027, \u0027Software Engineer\u0027, \u0027IT\u0027, 1), (3, \u0027Leila Johnson\u0027, \u0027African American\u0027, \u0027Project Manager\u0027, \u0027Marketing\u0027, 0);", + "sql": "SELECT Name, Ethnicity, JobTitle FROM EmployeeData WHERE Department \u003d \u0027Engineering\u0027 AND TrainingComplete \u003d 0;", + "sql_explanation": "The SQL query selects the Name, Ethnicity, and JobTitle columns from the EmployeeData table, filtering for employees in the Engineering department who have not completed diversity and inclusion training." +}, { + "id": "2974", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of diversity and inclusion roles in the job postings database?", + "sql_context": "CREATE TABLE job_postings_database (id INT, role TEXT, posting_date DATE); INSERT INTO job_postings_database (id, role, posting_date) VALUES (1, \u0027Diversity Officer\u0027, \u00272022-01-01\u0027), (2, \u0027Inclusion Specialist\u0027, \u00272022-01-02\u0027), (3, \u0027Software Engineer\u0027, \u00272022-01-03\u0027);", + "sql": "SELECT COUNT(*) as total FROM job_postings_database WHERE role LIKE \u0027%diversity%\u0027 OR role LIKE \u0027%inclusion%\u0027;", + "sql_explanation": "This query calculates the total number of diversity and inclusion roles in the job postings database. It does this by filtering for roles in the job postings database that have the words \"diversity\" or \"inclusion\" in them and then calculating the count of those roles." +}, { + "id": "3223", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the unique job titles held by employees who identify as part of the LGBTQ+ community or as having a disability.", + "sql_context": "CREATE TABLE employees (id INT, name VARCHAR(50), job_title VARCHAR(50), lgbtq_identified BOOLEAN, disability_status VARCHAR(10)); INSERT INTO employees (id, name, job_title, lgbtq_identified, disability_status) VALUES (1, \u0027John Doe\u0027, \u0027Software Engineer\u0027, FALSE, \u0027None\u0027), (2, \u0027Jane Smith\u0027, \u0027Marketing Manager\u0027, TRUE, \u0027None\u0027), (3, \u0027Mike Johnson\u0027, \u0027Data Analyst\u0027, FALSE, \u0027Physical Disability\u0027), (4, \u0027Sara Connor\u0027, \u0027Project Manager\u0027, FALSE, \u0027None\u0027);", + "sql": "SELECT DISTINCT job_title FROM employees WHERE lgbtq_identified \u003d TRUE OR disability_status \u003c\u003e \u0027None\u0027;", + "sql_explanation": "This SQL query first filters the employees table to only include records where lgbtq_identified is TRUE or disability_status is not \u0027None\u0027. It then uses the DISTINCT keyword to return only the unique values of the job_title column for these records." +}, { + "id": "3256", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many job applications were received for the Data Scientist position?", + "sql_context": "CREATE TABLE Job_Applications (Application_ID INT, Applicant_Name VARCHAR(50), Job_Title VARCHAR(50), Application_Date DATE, Interview_Date DATE, Hired BOOLEAN); CREATE TABLE Jobs (Job_ID INT, Job_Title VARCHAR(50), Department VARCHAR(50), Location VARCHAR(50), Salary DECIMAL(10,2));", + "sql": "SELECT COUNT(*) as \u0027Number of Applications\u0027 FROM Job_Applications WHERE Job_Title \u003d \u0027Data Scientist\u0027;", + "sql_explanation": "This query counts the number of job applications for the Data Scientist position." +}, { + "id": "3287", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of employees who have completed training programs in the IT and Marketing departments?", + "sql_context": "CREATE TABLE employees (id INT, name VARCHAR(50), department VARCHAR(50), completed_training BOOLEAN); INSERT INTO employees (id, name, department, completed_training) VALUES (1, \u0027John Doe\u0027, \u0027IT\u0027, TRUE), (2, \u0027Jane Smith\u0027, \u0027Marketing\u0027, FALSE), (3, \u0027Mike Johnson\u0027, \u0027IT\u0027, TRUE), (4, \u0027Sara Connor\u0027, \u0027Marketing\u0027, TRUE);", + "sql": "SELECT COUNT(*) FROM employees WHERE department IN (\u0027IT\u0027, \u0027Marketing\u0027) AND completed_training \u003d TRUE;", + "sql_explanation": "This SQL query first filters the employees table to only include records where the department is either \u0027IT\u0027 or \u0027Marketing\u0027. It then further filters the records to only include those where completed_training is TRUE. Finally, it uses the COUNT aggregate function to count the number of records that meet all of these conditions." +}, { + "id": "3332", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of employees who identify as a racial or ethnic minority in the human resources department?", + "sql_context": "CREATE TABLE EmployeeDiversity (EmployeeID INT, Identity VARCHAR(50), Department VARCHAR(50)); INSERT INTO EmployeeDiversity (EmployeeID, Identity, Department) VALUES (1, \u0027Asian\u0027, \u0027Human Resources\u0027), (2, \u0027White\u0027, \u0027Marketing\u0027);", + "sql": "SELECT COUNT(*) FROM EmployeeDiversity WHERE Identity \u003c\u003e \u0027White\u0027 AND Department \u003d \u0027Human Resources\u0027;", + "sql_explanation": "This query counts the total number of employees who identify as a racial or ethnic minority in the human resources department by filtering the EmployeeDiversity table where Identity is not \u0027White\u0027 and Department is \u0027Human Resources\u0027, then counting the number of records." +}, { + "id": "3349", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of employees who were hired in the last 30 days and have not received diversity and inclusion training.", + "sql_context": "CREATE TABLE Employees (EmployeeID INT, HireDate DATE, Training VARCHAR(50));", + "sql": "SELECT COUNT(*) FROM Employees WHERE HireDate \u003e\u003d DATEADD(day, -30, GETDATE()) AND Training IS NULL;", + "sql_explanation": "This query calculates the number of employees who were hired in the last 30 days and have not received diversity and inclusion training." +}, { + "id": "3703", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the maximum and minimum salaries for employees who identify as male and work in the HR department?", + "sql_context": "SAME AS ABOVE", + "sql": "SELECT MAX(Salary), MIN(Salary) FROM Employees WHERE Gender \u003d \u0027Male\u0027 AND Department \u003d \u0027HR\u0027;", + "sql_explanation": "The SQL query selects the maximum and minimum Salary values from the Employees table, filtering for records where Gender is \u0027Male\u0027 and Department is \u0027HR\u0027. The result is a table displaying the highest and lowest salaries for the specified criteria." +}, { + "id": "3771", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the count of employees who identify as LGBTQ+ in the Engineering department?", + "sql_context": "CREATE TABLE Employees (EmployeeID INT, Name VARCHAR(50), Department VARCHAR(50), Gender VARCHAR(50), Sexuality VARCHAR(50)); INSERT INTO Employees (EmployeeID, Name, Department, Gender, Sexuality) VALUES (1, \u0027Alex Brown\u0027, \u0027Engineering\u0027, \u0027Male\u0027, \u0027Gay\u0027), (2, \u0027Jamie Davis\u0027, \u0027Engineering\u0027, \u0027Non-binary\u0027, \u0027Queer\u0027), (3, \u0027Taylor Green\u0027, \u0027Marketing\u0027, \u0027Female\u0027, \u0027Straight\u0027);", + "sql": "SELECT COUNT(*) FROM Employees WHERE Department \u003d \u0027Engineering\u0027 AND Sexuality IS NOT NULL;", + "sql_explanation": "This SQL query counts the number of employees in the Engineering department who have a recorded sexuality. It does this by using the COUNT function with no parameters and filtering the data with a WHERE clause to only include employees in the Engineering department and whose Sexuality field is not NULL." +}, { + "id": "3886", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many employees in the Legal department have not undergone compliance training?", + "sql_context": "CREATE TABLE EmployeeTrainings2 (EmployeeID INT, Department TEXT, Training TEXT); INSERT INTO EmployeeTrainings2 (EmployeeID, Department, Training) VALUES (1, \u0027Legal\u0027, \u0027Compliance\u0027);", + "sql": "SELECT COUNT(*) FROM EmployeeTrainings2 WHERE Department \u003d \u0027Legal\u0027 AND Training IS NULL;", + "sql_explanation": "To find the number of employees in the Legal department who have not undergone compliance training, we count the number of rows with the WHERE clause, filtering employees who work in the Legal department and have not undergone compliance training (NULL value)." +}, { + "id": "4016", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average salary of developers in the IT department?", + "sql_context": "CREATE TABLE employees (id INT, name VARCHAR(50), department VARCHAR(50), position VARCHAR(50), salary FLOAT); INSERT INTO employees (id, name, department, position, salary) VALUES (1, \u0027John Doe\u0027, \u0027IT\u0027, \u0027Developer\u0027, 75000.0), (2, \u0027Jane Smith\u0027, \u0027IT\u0027, \u0027Developer\u0027, 80000.0);", + "sql": "SELECT AVG(salary) FROM employees WHERE department \u003d \u0027IT\u0027 AND position \u003d \u0027Developer\u0027;", + "sql_explanation": "The SQL query calculates the average salary of developers in the IT department by using the AVG function on the salary column, filtering the records with a WHERE clause for the IT department and Developer position." +}, { + "id": "4017", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average salary of developers who identify as female, across all departments and locations?", + "sql_context": "CREATE TABLE Employees (EmployeeID INT, FirstName VARCHAR(50), LastName VARCHAR(50), Department VARCHAR(50), Location VARCHAR(50), Position VARCHAR(50), Salary DECIMAL(10,2)); INSERT INTO Employees (EmployeeID, FirstName, LastName, Department, Location, Position, Salary) VALUES (1, \u0027John\u0027, \u0027Doe\u0027, \u0027IT\u0027, \u0027New York\u0027, \u0027Developer\u0027, 80000.00), (2, \u0027Jane\u0027, \u0027Doe\u0027, \u0027HR\u0027, \u0027Los Angeles\u0027, \u0027Developer\u0027, 85000.00);", + "sql": "SELECT AVG(Salary) FROM Employees WHERE Position \u003d \u0027Developer\u0027 AND Gender \u003d \u0027Female\u0027;", + "sql_explanation": "This query calculates the average salary for developers who identify as female by selecting the salary column and calculating the average. It filters for the position developer and gender female." +}, { + "id": "4099", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many female applicants were there for the marketing position?", + "sql_context": "CREATE TABLE Applicants (ApplicantID INT, Gender VARCHAR(10), Position VARCHAR(20)); INSERT INTO Applicants (ApplicantID, Gender, Position) VALUES (1, \u0027Female\u0027, \u0027Marketing\u0027), (2, \u0027Male\u0027, \u0027Marketing\u0027), (3, \u0027Female\u0027, \u0027IT\u0027), (4, \u0027Non-binary\u0027, \u0027HR\u0027);", + "sql": "SELECT COUNT(*) FROM Applicants WHERE Gender \u003d \u0027Female\u0027 AND Position \u003d \u0027Marketing\u0027;", + "sql_explanation": "This query counts the number of female applicants for the marketing position." +}, { + "id": "4143", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many female candidates were interviewed for the marketing position?", + "sql_context": "CREATE TABLE Candidates (CandidateID INT, Gender VARCHAR(10), Position VARCHAR(20)); INSERT INTO Candidates (CandidateID, Gender, Position) VALUES (1, \u0027Female\u0027, \u0027Marketing\u0027), (2, \u0027Male\u0027, \u0027IT\u0027), (3, \u0027Non-binary\u0027, \u0027HR\u0027), (4, \u0027Female\u0027, \u0027Marketing\u0027), (5, \u0027Male\u0027, \u0027IT\u0027);", + "sql": "SELECT COUNT(*) FROM Candidates WHERE Gender \u003d \u0027Female\u0027 AND Position \u003d \u0027Marketing\u0027;", + "sql_explanation": "The SQL query counts the number of female candidates interviewed for the marketing position by using the COUNT function on the * wildcard, filtering the data using the WHERE clause to consider only female candidates and those who applied for the marketing position." +}, { + "id": "4223", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of employees hired from the LGBTQ+ community after 2020?", + "sql_context": "CREATE TABLE Hiring (HireID INT, EmployeeID INT, HireDate DATE, Community VARCHAR(50)); INSERT INTO Hiring (HireID, EmployeeID, HireDate, Community) VALUES (1, 5, \u00272022-01-15\u0027, \u0027LatinX\u0027), (2, 6, \u00272022-02-20\u0027, \u0027African American\u0027), (3, 7, \u00272022-03-05\u0027, \u0027LGBTQ+\u0027), (4, 8, \u00272022-04-12\u0027, \u0027Women in STEM\u0027), (5, 9, \u00272021-11-30\u0027, \u0027LGBTQ+\u0027), (6, 10, \u00272021-12-15\u0027, \u0027LGBTQ+\u0027);", + "sql": "SELECT COUNT(*) FROM Hiring WHERE YEAR(HireDate) \u003e 2020 AND Community \u003d \u0027LGBTQ+\u0027;", + "sql_explanation": "This query counts the number of employees hired from the LGBTQ+ community after 2020 by selecting all rows from the Hiring table where the YEAR() function applied to the HireDate column is greater than 2020 and filtering by the Community column." +}, { + "id": "4233", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many employees were hired in Q2 of 2022?", + "sql_context": "CREATE TABLE Hiring (HireID int, HireDate date); INSERT INTO Hiring (HireID, HireDate) VALUES (1, \u00272022-04-01\u0027), (2, \u00272022-07-15\u0027), (3, \u00272022-03-30\u0027);", + "sql": "SELECT COUNT(*) FROM Hiring WHERE HireDate BETWEEN \u00272022-04-01\u0027 AND \u00272022-06-30\u0027;", + "sql_explanation": "This query counts the number of employees hired in Q2 of 2022 by using the COUNT function and filtering for rows where the HireDate falls between the start and end dates of Q2." +}, { + "id": "4310", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average salary of employees in the finance department?", + "sql_context": "CREATE TABLE Employees (id INT, name VARCHAR(50), department VARCHAR(50), salary DECIMAL(10,2)); INSERT INTO Employees (id, name, department, salary) VALUES (1, \u0027John Doe\u0027, \u0027Finance\u0027, 50000.00); INSERT INTO Employees (id, name, department, salary) VALUES (2, \u0027Jane Smith\u0027, \u0027IT\u0027, 60000.00); INSERT INTO Employees (id, name, department, salary) VALUES (3, \u0027Alice Johnson\u0027, \u0027Finance\u0027, 55000.00);", + "sql": "SELECT AVG(salary) AS avg_salary FROM Employees WHERE department \u003d \u0027Finance\u0027;", + "sql_explanation": "This query calculates the average salary of employees in the Finance department by selecting the salary field for the Finance department and applying the AVG function to the salary field." +}, { + "id": "4370", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many employees were hired in the HR department in 2021?", + "sql_context": "CREATE TABLE Hiring (HireID INT, EmployeeID INT, Department VARCHAR(20), HireDate DATE); INSERT INTO Hiring (HireID, EmployeeID, Department, HireDate) VALUES (1, 3, \u0027HR\u0027, \u00272021-02-15\u0027), (2, 4, \u0027IT\u0027, \u00272022-03-20\u0027);", + "sql": "SELECT COUNT(*) FROM Hiring WHERE Department \u003d \u0027HR\u0027 AND YEAR(HireDate) \u003d 2021;", + "sql_explanation": "This query calculates the number of employees hired in the HR department in 2021 by filtering the Hiring table based on the conditions \u0027Department \u003d HR\u0027 and \u0027YEAR(HireDate) \u003d 2021\u0027, and then counting the number of rows in the filtered table." +}, { + "id": "4445", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records of employees who identify as non-binary and work in the finance department.", + "sql_context": "SAME AS ABOVE", + "sql": "DELETE FROM Employees WHERE Gender \u003d \u0027Non-binary\u0027 AND Department \u003d \u0027Finance\u0027;", + "sql_explanation": "The SQL query deletes records from the Employees table, filtering for records where Gender is \u0027Non-binary\u0027 and Department is \u0027Finance\u0027. The result is a reduced table, with the specified records removed." +}, { + "id": "4575", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of unique departments that have employees hired in 2021", + "sql_context": "CREATE TABLE hiring (id INT, employee_id INT, hire_date DATE, department VARCHAR(255)); INSERT INTO hiring (id, employee_id, hire_date, department) VALUES (1, 101, \u00272020-01-02\u0027, \u0027HR\u0027); INSERT INTO hiring (id, employee_id, hire_date, department) VALUES (2, 102, \u00272019-12-20\u0027, \u0027IT\u0027);", + "sql": "SELECT COUNT(DISTINCT department) FROM hiring WHERE YEAR(hire_date) \u003d 2021;", + "sql_explanation": "This query counts the number of unique records in the \u0027department\u0027 column from the \u0027hiring\u0027 table where the hire_date\u0027s year is 2021." +}, { + "id": "4652", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average salary of employees in the Sales department?", + "sql_context": "CREATE TABLE Employees (EmployeeID INT, Department VARCHAR(20), Salary FLOAT); INSERT INTO Employees (EmployeeID, Department, Salary) VALUES (1, \u0027IT\u0027, 75000.00), (2, \u0027IT\u0027, 80000.00), (3, \u0027HR\u0027, 60000.00), (4, \u0027Sales\u0027, 68000.00), (5, \u0027Sales\u0027, 71000.00);", + "sql": "SELECT Department, AVG(Salary) FROM Employees WHERE Department \u003d \u0027Sales\u0027;", + "sql_explanation": "This query calculates the average salary for the Sales department by using the AVG function on the Salary column, and filtering for rows where the Department is \u0027Sales\u0027." +}, { + "id": "4760", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total salary cost for diversity training programs?", + "sql_context": "CREATE TABLE TrainingPrograms (ProgramID INT, ProgramName VARCHAR(30), ProgramType VARCHAR(20), Cost FLOAT); INSERT INTO TrainingPrograms (ProgramID, ProgramName, ProgramType, Cost) VALUES (1, \u0027Diversity Training\u0027, \u0027Diversity\u0027, 10000), (2, \u0027Leadership Training\u0027, \u0027Leadership\u0027, 15000), (3, \u0027Team Building\u0027, \u0027Teamwork\u0027, 12000);", + "sql": "SELECT SUM(Cost) FROM TrainingPrograms WHERE ProgramType \u003d \u0027Diversity\u0027;", + "sql_explanation": "This query filters the TrainingPrograms table based on the condition ProgramType \u003d \u0027Diversity\u0027, and then applies the SUM function to the Cost column of those records to find the total salary cost for diversity training programs." +}, { + "id": "4980", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum salary in the Engineering department?", + "sql_context": "CREATE TABLE Employees (EmployeeID INT, Department VARCHAR(20), Salary FLOAT); INSERT INTO Employees (EmployeeID, Department, Salary) VALUES (1, \u0027IT\u0027, 75000.00), (2, \u0027IT\u0027, 70000.00), (3, \u0027Engineering\u0027, 95000.00), (4, \u0027Finance\u0027, 85000.00);", + "sql": "SELECT MAX(Salary) FROM Employees WHERE Department \u003d \u0027Engineering\u0027;", + "sql_explanation": "This SQL query calculates the maximum salary in the Engineering department by filtering the Employees table for rows where Department is \u0027Engineering\u0027, and then computing the maximum of the Salary column." +}, { + "id": "4984", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Remove a department from the \"departments\" table", + "sql_context": "CREATE TABLE departments (id INT, department VARCHAR(50));", + "sql": "DELETE FROM departments WHERE department \u003d \u0027Diversity \u0026 Inclusion\u0027;", + "sql_explanation": "This query removes the Diversity \u0026 Inclusion department record from the \"departments\" table." +}, { + "id": "5046", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum salary for employees in the management department?", + "sql_context": "CREATE TABLE Employees (EmployeeID int, Department varchar(20), Salary numeric(10,2)); INSERT INTO Employees (EmployeeID, Department, Salary) VALUES (1, \u0027IT\u0027, 75000.00), (2, \u0027Management\u0027, 90000.00), (3, \u0027HR\u0027, 60000.00);", + "sql": "SELECT MAX(Salary) FROM Employees WHERE Department \u003d \u0027Management\u0027;", + "sql_explanation": "This query calculates the maximum salary for employees in the management department by filtering the Employees table based on Department column and then calculating the maximum of Salary column values." +}, { + "id": "5095", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average salary of employees in the Marketing department?", + "sql_context": "CREATE TABLE Employees (EmployeeID INT, FirstName VARCHAR(50), LastName VARCHAR(50), Department VARCHAR(50), Salary DECIMAL(10,2)); INSERT INTO Employees (EmployeeID, FirstName, LastName, Department, Salary) VALUES (1, \u0027Jane\u0027, \u0027Smith\u0027, \u0027Marketing\u0027, 60000.00), (2, \u0027Bruce\u0027, \u0027Johnson\u0027, \u0027IT\u0027, 75000.00);", + "sql": "SELECT AVG(Salary) FROM Employees WHERE Department \u003d \u0027Marketing\u0027;", + "sql_explanation": "This query calculates the average salary from the Employees table for the Marketing department." +}, { + "id": "5178", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the count of employees hired in 2022?", + "sql_context": "CREATE TABLE EmployeeHires (HireID INT, HireDate DATE); INSERT INTO EmployeeHires (HireID, HireDate) VALUES (1, \u00272022-01-01\u0027), (2, \u00272022-02-14\u0027), (3, \u00272021-12-25\u0027), (4, \u00272022-03-03\u0027);", + "sql": "SELECT COUNT(*) FROM EmployeeHires WHERE YEAR(HireDate) \u003d 2022;", + "sql_explanation": "This SQL query counts the number of employees hired in 2022 by selecting all records with a HireDate in 2022 and then counting the number of records." +}, { + "id": "5217", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average salary of employees who have completed diversity and inclusion training.", + "sql_context": "CREATE TABLE salaries (id INT, salary DECIMAL(10, 2), training VARCHAR(255)); INSERT INTO salaries (id, salary, training) VALUES (1, 50000, \u0027completed\u0027), (2, 55000, \u0027in progress\u0027), (3, 60000, \u0027not started\u0027);", + "sql": "SELECT AVG(salary) FROM salaries WHERE training \u003d \u0027completed\u0027;", + "sql_explanation": "The SQL query calculates the average salary of employees who have completed diversity and inclusion training by filtering the records with the \u0027completed\u0027 training status and then applying the AVG function on the salary column." +}, { + "id": "5224", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average salary of employees who were hired in 2021.", + "sql_context": "CREATE TABLE Employees (EmployeeID INT, HireDate DATE, Salary FLOAT); INSERT INTO Employees (EmployeeID, HireDate, Salary) VALUES (1, \u00272018-01-01\u0027, 70000), (2, \u00272019-01-01\u0027, 65000), (3, \u00272020-01-01\u0027, 55000), (4, \u00272021-01-01\u0027, 80000), (5, \u00272022-01-01\u0027, 85000);", + "sql": "SELECT AVG(Salary) FROM Employees WHERE YEAR(HireDate) \u003d 2021;", + "sql_explanation": "The SQL query calculates the average salary of employees who were hired in 2021 by using the AVG() function and a WHERE clause with the YEAR() function to filter the required data." +}, { + "id": "5405", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum salary for employees in the HR department?", + "sql_context": "CREATE TABLE Employees (EmployeeID int, Department varchar(20), Salary decimal(10,2)); INSERT INTO Employees (EmployeeID, Department, Salary) VALUES (1, \u0027IT\u0027, 75000.00), (2, \u0027IT\u0027, 80000.00), (3, \u0027HR\u0027, 60000.00);", + "sql": "SELECT MIN(Salary) FROM Employees WHERE Department \u003d \u0027HR\u0027;", + "sql_explanation": "This query calculates the minimum salary for employees in the HR department by using the MIN function on the Salary column, and filtering for rows where the Department is \u0027HR\u0027." +}, { + "id": "5439", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average salary for employees in the IT department?", + "sql_context": "CREATE TABLE Employees (EmployeeID INT, Department VARCHAR(50), Salary DECIMAL(10,2)); INSERT INTO Employees (EmployeeID, Department, Salary) VALUES (1, \u0027IT\u0027, 75000.00); INSERT INTO Employees (EmployeeID, Department, Salary) VALUES (2, \u0027IT\u0027, 80000.00); INSERT INTO Employees (EmployeeID, Department, Salary) VALUES (3, \u0027HR\u0027, 65000.00);", + "sql": "SELECT AVG(Salary) FROM Employees WHERE Department \u003d \u0027IT\u0027", + "sql_explanation": "Calculate the average salary for employees in the IT department." +}, { + "id": "5535", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all unique departments from \u0027employee_demographics\u0027", + "sql_context": "CREATE TABLE employee_demographics (id INT PRIMARY KEY, employee_id INT, name VARCHAR(255), department VARCHAR(255), region VARCHAR(255));", + "sql": "SELECT DISTINCT department FROM employee_demographics;", + "sql_explanation": "The query selects all unique department values from the \u0027employee_demographics\u0027 table." +}, { + "id": "5681", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records from the Employees table where the employee has been with the company for more than 2 years", + "sql_context": "CREATE TABLE Employees (EmployeeID INT, FirstName VARCHAR(50), LastName VARCHAR(50), Department VARCHAR(50), StartDate DATE, YearsAtCompany INT); INSERT INTO Employees (EmployeeID, FirstName, LastName, Department, StartDate, YearsAtCompany) VALUES (1, \u0027John\u0027, \u0027Doe\u0027, \u0027IT\u0027, \u00272020-01-01\u0027, 2); INSERT INTO Employees (EmployeeID, FirstName, LastName, Department, StartDate, YearsAtCompany) VALUES (2, \u0027Jane\u0027, \u0027Doe\u0027, \u0027HR\u0027, \u00272019-01-15\u0027, 3);", + "sql": "DELETE FROM Employees WHERE YearsAtCompany \u003e 2;", + "sql_explanation": "This DELETE statement removes all records from the Employees table where the employee has been with the company for more than 2 years." +}, { + "id": "5778", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "Who is the youngest employee in the Sales department?", + "sql_context": "CREATE TABLE Employees (Employee_ID INT, First_Name VARCHAR(20), Last_Name VARCHAR(20), Department VARCHAR(20), Salary DECIMAL(10,2), Date_Hired DATE); CREATE VIEW Youngest_Employee AS SELECT Employee_ID, First_Name, Last_Name, Department, Salary, Date_Hired FROM Employees WHERE Date_Hired \u003d (SELECT MIN(Date_Hired) FROM Employees); CREATE VIEW Youngest_Sales_Employee AS SELECT * FROM Youngest_Employee WHERE Department \u003d \u0027Sales\u0027;", + "sql": "SELECT * FROM Youngest_Sales_Employee;", + "sql_explanation": "This query retrieves the information about the youngest employee in the Sales department. The query uses a subquery to determine the earliest date that an employee was hired, and then uses that subquery as a filter in a view to only include employees who were hired on that date. The query then uses another view to filter the results to only include employees who work in the Sales department. The final query returns all of the information about the youngest employee in the Sales department." +}, { + "id": "5815", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum salary for employees in the company?", + "sql_context": "CREATE TABLE Employees (EmployeeID INT, Gender VARCHAR(10), Department VARCHAR(20), Salary FLOAT); INSERT INTO Employees (EmployeeID, Gender, Department, Salary) VALUES (1, \u0027Male\u0027, \u0027IT\u0027, 75000), (2, \u0027Female\u0027, \u0027IT\u0027, 70000), (3, \u0027Non-binary\u0027, \u0027HR\u0027, 65000), (4, \u0027Male\u0027, \u0027HR\u0027, 70000);", + "sql": "SELECT MIN(Salary) FROM Employees;", + "sql_explanation": "This query calculates the minimum salary for employees in the company." +}, { + "id": "5826", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average training cost?", + "sql_context": "CREATE TABLE Trainings (TrainingID INT, Department VARCHAR(20), Cost FLOAT); INSERT INTO Trainings (TrainingID, Department, Cost) VALUES (1, \u0027Sales\u0027, 5000), (2, \u0027IT\u0027, 7000), (3, \u0027Sales\u0027, 6000), (4, \u0027HR\u0027, 4000);", + "sql": "SELECT AVG(Cost) FROM Trainings;", + "sql_explanation": "This SQL query calculates the average training cost. It does this by selecting the average cost from the Trainings table." +}, { + "id": "5835", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of employees in the company?", + "sql_context": "CREATE TABLE Employees (EmployeeID INT, Name VARCHAR(50)); INSERT INTO Employees (EmployeeID, Name) VALUES (1, \u0027John Doe\u0027), (2, \u0027Jane Smith\u0027), (3, \u0027Mike Johnson\u0027);", + "sql": "SELECT COUNT(*) FROM Employees;", + "sql_explanation": "This query calculates the total number of employees in the company by using the COUNT function to count the number of rows (COUNT(*)) in the Employees table." +}, { + "id": "1609", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many accidents happened at the mines in Australia in the last 12 months?", + "sql_context": "CREATE TABLE mines (id INT, name VARCHAR(255), location VARCHAR(255), last_accident_date DATE); INSERT INTO mines (id, name, location, last_accident_date) VALUES (1, \u0027Mine A\u0027, \u0027Australia\u0027, \u00272021-01-15\u0027), (2, \u0027Mine B\u0027, \u0027Canada\u0027, \u00272020-06-20\u0027), (3, \u0027Mine C\u0027, \u0027Australia\u0027, \u00272021-02-10\u0027), (4, \u0027Mine D\u0027, \u0027USA\u0027, NULL);", + "sql": "SELECT COUNT(m.id) as total_accidents FROM mines m WHERE m.location \u003d \u0027Australia\u0027 AND m.last_accident_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 12 MONTH);", + "sql_explanation": "The SQL query uses the mines table and filters the records for the mines located in \u0027Australia\u0027 and with a last_accident_date within the last 12 months. Then, it counts the number of records." +}, { + "id": "2054", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 3 mining sites with the highest annual water consumption and their respective environmental impact scores in the \u0027mining_operations\u0027 database.", + "sql_context": "CREATE TABLE mining_sites (site_id INT PRIMARY KEY, site_name VARCHAR(50), annual_water_consumption INT, environmental_impact_score INT); INSERT INTO mining_sites (site_id, site_name, annual_water_consumption, environmental_impact_score) VALUES (1, \u0027Site A\u0027, 1000000, 60), (2, \u0027Site B\u0027, 1500000, 70), (3, \u0027Site C\u0027, 800000, 50), (4, \u0027Site D\u0027, 1200000, 75);", + "sql": "SELECT site_name, annual_water_consumption, environmental_impact_score FROM mining_sites ORDER BY annual_water_consumption DESC LIMIT 3;", + "sql_explanation": "This query orders the \u0027mining_sites\u0027 table by the \u0027annual_water_consumption\u0027 column in descending order and limits the result to the top 3 rows. It then returns the site names, annual water consumption, and environmental impact scores." +}, { + "id": "2269", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the total quantities of coal and iron ore mined in the first quarter of 2021?", + "sql_context": "CREATE TABLE mine_operations (operation_id INT, material VARCHAR(10), quantity INT, mining_date DATE); INSERT INTO mine_operations (operation_id, material, quantity, mining_date) VALUES (1, \u0027coal\u0027, 500, \u00272021-01-15\u0027), (2, \u0027iron_ore\u0027, 800, \u00272021-01-20\u0027), (3, \u0027coal\u0027, 700, \u00272021-03-01\u0027), (4, \u0027iron_ore\u0027, 950, \u00272021-03-14\u0027);", + "sql": "SELECT quantity FROM mine_operations WHERE material IN (\u0027coal\u0027, \u0027iron_ore\u0027) AND mining_date BETWEEN \u00272021-01-01\u0027 AND \u00272021-03-31\u0027", + "sql_explanation": "This query selects the quantities of coal and iron ore from the mine_operations table for the first quarter of 2021 by filtering on material and mining_date." +}, { + "id": "2327", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the number of mines in Peru that mined silver in 2018", + "sql_context": "CREATE TABLE mining_operations (id INT, mine_name TEXT, location TEXT, material TEXT, quantity INT, date DATE); INSERT INTO mining_operations (id, mine_name, location, material, quantity, date) VALUES (12, \u0027Silver Summit\u0027, \u0027Peru\u0027, \u0027silver\u0027, 3000, \u00272018-01-01\u0027);", + "sql": "SELECT COUNT(DISTINCT mine_name) FROM mining_operations WHERE material \u003d \u0027silver\u0027 AND location \u003d \u0027Peru\u0027 AND date \u003d \u00272018-01-01\u0027;", + "sql_explanation": "This query finds the number of mines in Peru that mined silver in 2018 by counting the DISTINCT \u0027mine_name\u0027 for rows with \u0027material\u0027 \u003d \u0027silver\u0027, \u0027location\u0027 \u003d \u0027Peru\u0027, and \u0027date\u0027 \u003d \u00272018-01-01\u0027." +}, { + "id": "2664", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average production of gold mines located in Colombia?", + "sql_context": "CREATE TABLE mining_operations (id INT, mine_name VARCHAR(255), location VARCHAR(255), extraction_type VARCHAR(255), production INT); INSERT INTO mining_operations (id, mine_name, location, extraction_type, production) VALUES (1, \u0027Copper Mine\u0027, \u0027Arizona, USA\u0027, \u0027Open Pit\u0027, 12000), (2, \u0027Gold Mine\u0027, \u0027Ontario, Canada\u0027, \u0027Underground\u0027, 5000), (3, \u0027Iron Mine\u0027, \u0027Minnesota, USA\u0027, \u0027Open Pit\u0027, 32000), (4, \u0027Gold Mine\u0027, \u0027Cauca, Colombia\u0027, \u0027Underground\u0027, 7000), (5, \u0027Emerald Mine\u0027, \u0027Boyaca, Colombia\u0027, \u0027Open Pit\u0027, 3000);", + "sql": "SELECT AVG(production) FROM mining_operations WHERE extraction_type \u003d \u0027Underground\u0027 AND location \u003d \u0027Cauca, Colombia\u0027;", + "sql_explanation": "This query calculates the average production of gold mines located in Colombia by filtering the mining_operations table to only include underground mines located in Cauca, Colombia and then calculating the average production for that group." +}, { + "id": "2712", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the mining operations in the \u0027Andes\u0027 mountain range with the highest CO2 emissions", + "sql_context": "CREATE TABLE MiningOperations (id INT, name VARCHAR(255), type VARCHAR(255), co2_emissions INT, location VARCHAR(255)); INSERT INTO MiningOperations (id, name, type, co2_emissions, location) VALUES (1, \u0027Open Pit Mining\u0027, \u0027Surface\u0027, 500, \u0027Andes\u0027), (2, \u0027Underground Mining\u0027, \u0027Underground\u0027, 300, \u0027Andes\u0027), (3, \u0027Mountaintop Removal Mining\u0027, \u0027Surface\u0027, 700, \u0027Appalachians\u0027), (4, \u0027Placer Mining\u0027, \u0027Surface\u0027, 200, \u0027Himalayas\u0027), (5, \u0027Hard Rock Mining\u0027, \u0027Underground\u0027, 800, \u0027Urals\u0027);", + "sql": "SELECT name, type, co2_emissions FROM MiningOperations WHERE location \u003d \u0027Andes\u0027 ORDER BY co2_emissions DESC LIMIT 1;", + "sql_explanation": "This SQL query filters the \u0027MiningOperations\u0027 table for the Andes location, then orders the results by the \u0027co2_emissions\u0027 column in descending order using the ORDER BY clause. The LIMIT 1 clause restricts the output to only the mining operation with the highest CO2 emissions in the Andes." +}, { + "id": "2802", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average monthly resource depletion from diamond mining operations worldwide?", + "sql_context": "CREATE TABLE resource_depletion (id INT, location VARCHAR(50), operation_type VARCHAR(50), monthly_resource_depletion INT); INSERT INTO resource_depletion (id, location, operation_type, monthly_resource_depletion) VALUES (1, \u0027Australia\u0027, \u0027Gold\u0027, 500), (2, \u0027South Africa\u0027, \u0027Gold\u0027, 700), (3, \u0027Canada\u0027, \u0027Diamond\u0027, 600);", + "sql": "SELECT AVG(monthly_resource_depletion) as avg_depletion FROM resource_depletion WHERE operation_type \u003d \u0027Diamond\u0027;", + "sql_explanation": "This SQL query calculates the average monthly resource depletion from diamond mining operations worldwide. It uses the AVG function to find the average monthly resource depletion and filters the data for diamond mining operations." +}, { + "id": "3039", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of accidents in the diamond mines in the last 5 years?", + "sql_context": "CREATE TABLE Accidents (MineID INT, MineType VARCHAR(15), AccidentDate DATE);", + "sql": "SELECT COUNT(*) FROM Accidents WHERE MineType \u003d \u0027Diamond\u0027 AND AccidentDate \u003e\u003d DATEADD(year, -5, GETDATE());", + "sql_explanation": "The SQL query counts the number of accidents in diamond mines in the last 5 years. It uses the COUNT function to count the number of rows that meet the specified conditions and the WHERE clause to filter the results based on the mine type and accident date. The DATEADD function is used to subtract 5 years from the current date." +}, { + "id": "3226", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which departments have the most diverse workforces?", + "sql_context": "CREATE TABLE Employees (EmployeeID INT, Department VARCHAR(20), Ethnicity VARCHAR(20));CREATE VIEW DepartmentDiversity AS SELECT Department, COUNT(DISTINCT Ethnicity) as DiversityCount FROM Employees GROUP BY Department;", + "sql": "SELECT Department FROM DepartmentDiversity WHERE ROW_NUMBER() OVER(ORDER BY DiversityCount DESC) \u003c\u003d 3;", + "sql_explanation": "This query selects the departments with the most diverse workforces." +}, { + "id": "3539", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the resource depletion metrics for the \u0027Aquamarine Arch\u0027 mine in Himachal Pradesh, India", + "sql_context": "CREATE TABLE resource_depletion (mine_id INT, year INT, resource_depletion_rate FLOAT);", + "sql": "UPDATE resource_depletion SET resource_depletion_rate \u003d 0.08 WHERE mine_id \u003d 9 AND year \u003d 2021;", + "sql_explanation": "This query updates the resource depletion metrics for the \u0027Aquamarine Arch\u0027 mine in Himachal Pradesh, India by setting the resource depletion rate to 8% for the year 2021 in the \"resource_depletion\" table." +}, { + "id": "3607", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of mining engineers and geologists in the workforce?", + "sql_context": "CREATE TABLE workforce (id INT, name VARCHAR(50), position VARCHAR(50), age INT); INSERT INTO workforce (id, name, position, age) VALUES (1, \u0027John Doe\u0027, \u0027Mining Engineer\u0027, 35), (2, \u0027Jane Smith\u0027, \u0027Geologist\u0027, 32), (3, \u0027Alice Johnson\u0027, \u0027Mining Engineer\u0027, 38);", + "sql": "SELECT AVG(age) AS avg_age FROM workforce WHERE position IN (\u0027Mining Engineer\u0027, \u0027Geologist\u0027);", + "sql_explanation": "This query calculates the average age of mining engineers and geologists in the workforce. It uses the WHERE clause to filter the rows based on the \u0027position\u0027 column, and then calculates the average age of the filtered rows using the AVG function." +}, { + "id": "3698", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total \u0027cost\u0027 of \u0027mining_operations\u0027 in the \u0027OperationsData\u0027 table for \u00272021\u0027?", + "sql_context": "CREATE TABLE OperationsData (id INT, operation VARCHAR(255), year INT, cost INT); INSERT INTO OperationsData (id, operation, year, cost) VALUES (1, \u0027drilling\u0027, 2021, 1000), (2, \u0027mining\u0027, 2021, 2000), (3, \u0027excavation\u0027, 2022, 1500);", + "sql": "SELECT SUM(cost) FROM OperationsData WHERE operation \u003d \u0027mining_operations\u0027 AND year \u003d 2021;", + "sql_explanation": "This query calculates the total \u0027cost\u0027 of \u0027mining_operations\u0027 in the \u0027OperationsData\u0027 table for \u00272021\u0027. It sums the \u0027cost\u0027 column for rows where the \u0027operation\u0027 is \u0027mining_operations\u0027 and the \u0027year\u0027 is 2021." +}, { + "id": "3757", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum amount of gold mined in a day from the mines in the Australian continent?", + "sql_context": "CREATE TABLE GoldMined (MineID INT, MineType VARCHAR(15), MinedDate DATE, GoldAmount INT);", + "sql": "SELECT MAX(GoldAmount) FROM GoldMined WHERE MineType \u003d \u0027Gold\u0027 AND Continent \u003d \u0027Australia\u0027;", + "sql_explanation": "The SQL query finds the maximum amount of gold mined in a day from the mines in the Australian continent. It uses the MAX function to find the highest gold amount and the WHERE clause to filter the results based on the mine type and continent." +}, { + "id": "3857", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all mining operations in \u0027south_american_mines\u0027 located in Colombia.", + "sql_context": "CREATE SCHEMA if not exists south_american_schema;CREATE TABLE south_american_schema.south_american_mines (id INT, name VARCHAR, location VARCHAR);INSERT INTO south_american_schema.south_american_mines (id, name, location) VALUES (1, \u0027Colombia mining\u0027, \u0027Colombia\u0027), (2, \u0027Peru mining\u0027, \u0027Peru\u0027);", + "sql": "SELECT name FROM south_american_schema.south_american_mines WHERE location \u003d \u0027Colombia\u0027;", + "sql_explanation": "This query lists the names of mining operations in the \u0027south_american_schema.south_american_mines\u0027 table located in Colombia." +}, { + "id": "3949", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of gold mined in the first quarter of 2021 from the \u0027GoldMine\u0027?", + "sql_context": "CREATE TABLE GoldMine (date DATE, quantity INT);INSERT INTO GoldMine (date, quantity) VALUES (\u00272021-01-01\u0027, 150), (\u00272021-01-05\u0027, 200), (\u00272021-02-10\u0027, 180), (\u00272021-03-20\u0027, 250);", + "sql": "SELECT SUM(quantity) FROM GoldMine WHERE date \u003c \u00272021-04-01\u0027 AND date \u003e\u003d \u00272021-01-01\u0027;", + "sql_explanation": "This query calculates the total quantity of gold mined in the first quarter of 2021 by summing the \u0027quantity\u0027 column values where the \u0027date\u0027 is within the first quarter of 2021." +}, { + "id": "4071", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average salary of female workers in the Mining department?", + "sql_context": "CREATE TABLE Employees (EmployeeID INT, Department VARCHAR(20), Salary DECIMAL(10,2), Gender VARCHAR(10)); INSERT INTO Employees (EmployeeID, Department, Salary, Gender) VALUES (1, \u0027Mining\u0027, 75000.00, \u0027Female\u0027); INSERT INTO Employees (EmployeeID, Department, Salary, Gender) VALUES (2, \u0027Mining\u0027, 80000.00, \u0027Male\u0027);", + "sql": "SELECT AVG(Salary) FROM Employees WHERE Department \u003d \u0027Mining\u0027 AND Gender \u003d \u0027Female\u0027;", + "sql_explanation": "This SQL query calculates the average salary of female workers in the Mining department by selecting the salary column where department is Mining and gender is Female, and then calculating the average of those salaries." +}, { + "id": "4184", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of accidents in mining operations in the state of Wyoming in the year 2020?", + "sql_context": "CREATE TABLE accidents (id INT, state VARCHAR(50), year INT, accident_count INT); INSERT INTO accidents (id, state, year, accident_count) VALUES (4, \u0027Wyoming\u0027, 2020, 4); INSERT INTO accidents (id, state, year, accident_count) VALUES (5, \u0027Wyoming\u0027, 2020, 6); INSERT INTO accidents (id, state, year, accident_count) VALUES (6, \u0027Wyoming\u0027, 2020, 2);", + "sql": "SELECT MAX(accident_count) FROM accidents WHERE state \u003d \u0027Wyoming\u0027 AND year \u003d 2020;", + "sql_explanation": "This query calculates the maximum number of accidents in mining operations in the state of Wyoming in the year 2020. It does this by using the MAX function on the accident_count column and filtering rows by the state and year column with a WHERE clause." +}, { + "id": "4301", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum amount of water used per day in the \u0027water_usage\u0027 table for the \u0027pump_station_1\u0027?", + "sql_context": "CREATE TABLE water_usage (id INT, station_name VARCHAR(50), water_amount INT, usage_date DATE); INSERT INTO water_usage (id, station_name, water_amount, usage_date) VALUES (1, \u0027pump_station_1\u0027, 5000, \u00272021-02-01\u0027); INSERT INTO water_usage (id, station_name, water_amount, usage_date) VALUES (2, \u0027pump_station_2\u0027, 6000, \u00272021-03-05\u0027);", + "sql": "SELECT MAX(water_amount) FROM water_usage WHERE station_name \u003d \u0027pump_station_1\u0027;", + "sql_explanation": "The SQL query selects the maximum amount of water used per day in the water_usage table for the \u0027pump_station_1\u0027. It selects the maximum of the water_amount column from the water_usage table where the station_name is equal to \u0027pump_station_1\u0027." +}, { + "id": "4626", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of employees per mining operation in Canada?", + "sql_context": "CREATE TABLE mining_operations(id INT, name VARCHAR, country VARCHAR, num_employees INT); INSERT INTO mining_operations(id, name, country, num_employees) VALUES (1, \u0027Alpha Mining\u0027, \u0027Canada\u0027, 50), (2, \u0027Bravo Mining\u0027, \u0027USA\u0027, 75), (3, \u0027Charlie Mining\u0027, \u0027Mexico\u0027, 100); CREATE TABLE employees(id INT, mining_operation_id INT, role VARCHAR); INSERT INTO employees(id, mining_operation_id, role) VALUES (1, 1, \u0027Engineer\u0027), (2, 1, \u0027Operator\u0027), (3, 2, \u0027Manager\u0027), (4, 2, \u0027Engineer\u0027), (5, 3, \u0027Operator\u0027), (6, 3, \u0027Manager\u0027);", + "sql": "SELECT AVG(num_employees) FROM mining_operations WHERE country \u003d \u0027Canada\u0027;", + "sql_explanation": "This SQL query calculates the average number of employees per mining operation in Canada by using the AVG function to calculate the average value of the num_employees column for rows with a country of \u0027Canada\u0027. The resulting table displays the average number of employees per mining operation in Canada." +}, { + "id": "5172", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the Employee table to change the Department of EmployeeID 1 to \u0027Safety\u0027.", + "sql_context": "CREATE TABLE Employee (EmployeeID INT, FirstName VARCHAR(50), LastName VARCHAR(50), Department VARCHAR(50)); INSERT INTO Employee (EmployeeID, FirstName, LastName, Department) VALUES (1, \u0027John\u0027, \u0027Doe\u0027, \u0027Mining and Quarry\u0027); INSERT INTO Employee (EmployeeID, FirstName, LastName, Department) VALUES (2, \u0027Jane\u0027, \u0027Doe\u0027, \u0027Environment\u0027);", + "sql": "UPDATE Employee SET Department \u003d \u0027Safety\u0027 WHERE EmployeeID \u003d 1;", + "sql_explanation": "This query updates the Department column of the Employee table for the record with EmployeeID 1 to the value \u0027Safety\u0027." +}, { + "id": "5508", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of resources extracted from each mining site?", + "sql_context": "CREATE TABLE MiningSites (site_id INT, site_name VARCHAR(50), location VARCHAR(50), resources_extracted DECIMAL(10, 2)); INSERT INTO MiningSites (site_id, site_name, location, resources_extracted) VALUES (1, \u0027Site A\u0027, \u0027California\u0027, 10000), (2, \u0027Site B\u0027, \u0027Nevada\u0027, 15000);", + "sql": "SELECT site_name, resources_extracted FROM MiningSites;", + "sql_explanation": "This query retrieves the total amount of resources extracted from each mining site by selecting the resources_extracted column and grouping the records based on site_name." +}, { + "id": "5672", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all unique nationalities represented in the workforce?", + "sql_context": "CREATE TABLE employee_info (id INT, name VARCHAR(50), position VARCHAR(50), age INT, nationality VARCHAR(50));", + "sql": "SELECT DISTINCT nationality FROM employee_info;", + "sql_explanation": "This query retrieves all unique nationalities represented in the workforce by using the DISTINCT keyword on the \u0027nationality\u0027 column in the \u0027employee_info\u0027 table." +}, { + "id": "5720", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of waste generated by the mining industry in South Africa?", + "sql_context": "CREATE TABLE mining_waste (id INT, operation_id INT, waste_amount FLOAT); INSERT INTO mining_waste (id, operation_id, waste_amount) VALUES (1, 1, 500), (2, 1, 700), (3, 2, 300);", + "sql": "SELECT SUM(waste_amount) FROM mining_waste;", + "sql_explanation": "This SQL query calculates the total amount of waste generated by the mining industry by using the SUM function to add up all the values in the waste_amount column." +}, { + "id": "5804", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of employees per site?", + "sql_context": "CREATE TABLE site (site_id INT, site_name VARCHAR(50), num_employees INT);", + "sql": "SELECT AVG(num_employees) FROM site;", + "sql_explanation": "This query calculates the average number of employees per site by selecting the num_employees column from the site table, and then calculating the average of that column using the AVG() function." +}, { + "id": "400", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the number of clinical trials conducted by Asian biotech companies in the oncology department that were successful but not yet approved by the FDA between 2017 and 2020, excluding trials from South Korea.", + "sql_context": "CREATE TABLE clinical_trials (id INT, company VARCHAR(255), department VARCHAR(255), trial_status VARCHAR(255), fda_approval_date DATE, company_location VARCHAR(255)); INSERT INTO clinical_trials (id, company, department, trial_status, fda_approval_date, company_location) VALUES (1, \u0027Asian BioTech 1\u0027, \u0027Oncology\u0027, \u0027Successful\u0027, NULL, \u0027Japan\u0027), (2, \u0027Asian BioTech 2\u0027, \u0027Oncology\u0027, \u0027Failed\u0027, \u00272018-04-02\u0027, \u0027China\u0027), (3, \u0027Asian BioTech 3\u0027, \u0027Neurology\u0027, \u0027Successful\u0027, \u00272019-09-10\u0027, \u0027South Korea\u0027);", + "sql": "SELECT COUNT(*) FROM clinical_trials WHERE department \u003d \u0027Oncology\u0027 AND trial_status \u003d \u0027Successful\u0027 AND fda_approval_date IS NULL AND company_location NOT IN (\u0027South Korea\u0027) AND fda_approval_date BETWEEN \u00272017-01-01\u0027 AND \u00272020-12-31\u0027;", + "sql_explanation": "The SQL query calculates the number of clinical trials conducted by Asian biotech companies in the oncology department that were successful but not yet approved by the FDA between 2017 and 2020, excluding trials from South Korea, by using the COUNT function to count the number of rows in the clinical_trials table where the department is Oncology, the trial_status is Successful, the fda_approval_date is NULL (indicating that the trial has not yet been approved), the company_location is not South Korea, and the fda_approval_date is between 2017-01-01 and 2020-12-31." +}, { + "id": "1547", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Decrease R\u0026D expenditure of \u0027DrugK\u0027 by 15% in H1 2021.", + "sql_context": "CREATE TABLE rd_expenditures_3 (drug_name TEXT, expenditure DECIMAL(10, 2), expenditure_date DATE); INSERT INTO rd_expenditures_3 (drug_name, expenditure, expenditure_date) VALUES (\u0027DrugK\u0027, 300000.00, \u00272021-01-01\u0027), (\u0027DrugK\u0027, 325000.00, \u00272021-02-01\u0027), (\u0027DrugK\u0027, 350000.00, \u00272021-03-01\u0027), (\u0027DrugK\u0027, 375000.00, \u00272021-04-01\u0027), (\u0027DrugK\u0027, 400000.00, \u00272021-05-01\u0027), (\u0027DrugK\u0027, 425000.00, \u00272021-06-01\u0027);", + "sql": "UPDATE rd_expenditures_3 SET expenditure \u003d FLOOR(expenditure * 0.85) WHERE drug_name \u003d \u0027DrugK\u0027 AND expenditure_date BETWEEN \u00272021-01-01\u0027 AND \u00272021-06-30\u0027;", + "sql_explanation": "The SQL query decreases R\u0026D expenditure of \u0027DrugK\u0027 by 15% in H1 2021. The WHERE clause filters the data by drug_name and expenditure_date to ensure only the desired records are updated." +}, { + "id": "2265", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many clinical trials were conducted by \u0027GlobalHealth\u0027 in 2021 in South America?", + "sql_context": "CREATE TABLE clinical_trials (company TEXT, continent TEXT, trial_year INT, trial_count INT); INSERT INTO clinical_trials (company, continent, trial_year, trial_count) VALUES (\u0027GlobalHealth\u0027, \u0027South America\u0027, 2021, 10);", + "sql": "SELECT SUM(trial_count) FROM clinical_trials WHERE company \u003d \u0027GlobalHealth\u0027 AND continent \u003d \u0027South America\u0027 AND trial_year \u003d 2021;", + "sql_explanation": "The SQL query calculates the number of clinical trials conducted by \u0027GlobalHealth\u0027 in South America in 2021 by summing the trial_count where the company is \u0027GlobalHealth\u0027, the continent is \u0027South America\u0027, and the trial_year is 2021." +}, { + "id": "2311", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total sales for product \u0027ProductG\u0027 in the second half of 2021?", + "sql_context": "CREATE TABLE product_sales_data_2 (product_id VARCHAR(10), sale_date DATE, revenue DECIMAL(10,2)); INSERT INTO product_sales_data_2 (product_id, sale_date, revenue) VALUES (\u0027ProductG\u0027, \u00272021-07-10\u0027, 600), (\u0027ProductG\u0027, \u00272021-10-20\u0027, 700);", + "sql": "SELECT SUM(revenue) FROM product_sales_data_2 WHERE product_id \u003d \u0027ProductG\u0027 AND sale_date BETWEEN \u00272021-07-01\u0027 AND \u00272021-12-31\u0027;", + "sql_explanation": "This query calculates the total sales for product \u0027ProductG\u0027 in the second half of 2021 by summing the revenue for all records with a product_id of \u0027ProductG\u0027 and a sale_date within the specified date range." +}, { + "id": "2540", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the most recently approved drug for Alzheimer\u0027s?", + "sql_context": "CREATE TABLE DrugApprovals (drug_name TEXT, approval_date DATE, indication TEXT); INSERT INTO DrugApprovals (drug_name, approval_date, indication) VALUES (\u0027DrugL\u0027, \u00272022-06-01\u0027, \u0027Alzheimer\u0027\u0027s\u0027);", + "sql": "SELECT drug_name, approval_date FROM DrugApprovals WHERE indication \u003d \u0027Alzheimer\u0027\u0027s\u0027 ORDER BY approval_date DESC LIMIT 1;", + "sql_explanation": "This query retrieves the most recently approved drug for Alzheimer\u0027s by filtering the DrugApprovals table data based on the indication and sorting the results in descending order by approval_date, then returning the first row." +}, { + "id": "2879", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average sales of infectious diseases drugs in Singapore?", + "sql_context": "CREATE TABLE sales_data (drug_name VARCHAR(50), country VARCHAR(50), sales_amount NUMERIC(10, 2)); INSERT INTO sales_data (drug_name, country, sales_amount) VALUES (\u0027Drug1\u0027, \u0027Singapore\u0027, 6000000), (\u0027Drug2\u0027, \u0027Singapore\u0027, 7000000), (\u0027Drug3\u0027, \u0027Singapore\u0027, 8000000);", + "sql": "SELECT AVG(sales_amount) FROM sales_data WHERE drug_category \u003d \u0027Infectious Diseases\u0027 AND country \u003d \u0027Singapore\u0027;", + "sql_explanation": "This query calculates the average sales amount for infectious diseases drug_category in Singapore using the AVG function." +}, { + "id": "2903", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total sales revenue for \u0027DrugE\u0027 in the \u0027Canada\u0027 region in Q3 2021?", + "sql_context": "CREATE TABLE sales_data (drug VARCHAR(50), region VARCHAR(50), quarter INT, year INT, revenue FLOAT); INSERT INTO sales_data (drug, region, quarter, year, revenue) VALUES (\u0027DrugE\u0027, \u0027Canada\u0027, 3, 2021, 4000000);", + "sql": "SELECT SUM(revenue) FROM sales_data WHERE drug \u003d \u0027DrugE\u0027 AND region \u003d \u0027Canada\u0027 AND quarter \u003d 3 AND year \u003d 2021;", + "sql_explanation": "The SQL query filters the sales_data table by \u0027DrugE\u0027, the \u0027Canada\u0027 region, Q3, and 2021, and then sums the revenue for the records that meet the criteria." +}, { + "id": "2911", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the market share of \u0027DrugF\u0027 in the \u0027Cardiology\u0027 therapeutic area in 2023?", + "sql_context": "CREATE TABLE market_share (drug VARCHAR(50), therapeutic_area VARCHAR(50), year INT, market_share FLOAT); INSERT INTO market_share (drug, therapeutic_area, year, market_share) VALUES (\u0027DrugF\u0027, \u0027Cardiology\u0027, 2023, 0.32);", + "sql": "SELECT market_share FROM market_share WHERE drug \u003d \u0027DrugF\u0027 AND therapeutic_area \u003d \u0027Cardiology\u0027 AND year \u003d 2023;", + "sql_explanation": "The SQL query selects the market share for \u0027DrugF\u0027 in the \u0027Cardiology\u0027 therapeutic area in 2023 from the market_share table." +}, { + "id": "3126", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the revenue for \u0027DrugD\u0027 in \u0027CountryE\u0027 in Q1 of 2018?", + "sql_context": "CREATE TABLE sales(drug_name TEXT, country TEXT, sales_quarter INT, revenue FLOAT); INSERT INTO sales (drug_name, country, sales_quarter, revenue) VALUES (\u0027DrugA\u0027, \u0027CountryY\u0027, 1, 1100000), (\u0027DrugB\u0027, \u0027CountryX\u0027, 1, 900000), (\u0027DrugD\u0027, \u0027CountryE\u0027, 1, 1300000);", + "sql": "SELECT SUM(revenue) FROM sales WHERE drug_name \u003d \u0027DrugD\u0027 AND country \u003d \u0027CountryE\u0027 AND sales_quarter \u003d 1;", + "sql_explanation": "This query calculates the total revenue for \u0027DrugD\u0027 in \u0027CountryE\u0027 in Q1 of 2018 by summing the revenue values in the sales table where drug_name is \u0027DrugD\u0027, country is \u0027CountryE\u0027, and sales_quarter is 1." +}, { + "id": "3231", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total sales of cardiovascular drugs in Japan?", + "sql_context": "CREATE TABLE sales_data (drug_name VARCHAR(50), country VARCHAR(50), sales_amount NUMERIC(10, 2)); INSERT INTO sales_data (drug_name, country, sales_amount) VALUES (\u0027DrugX\u0027, \u0027Japan\u0027, 5000000), (\u0027DrugY\u0027, \u0027Japan\u0027, 7000000), (\u0027DrugZ\u0027, \u0027Japan\u0027, 6000000);", + "sql": "SELECT SUM(sales_amount) FROM sales_data WHERE drug_category \u003d \u0027Cardiovascular\u0027 AND country \u003d \u0027Japan\u0027;", + "sql_explanation": "This query calculates the total sales amount for cardiovascular drug_category in Japan using the SUM function." +}, { + "id": "3415", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total sales quantity for DrugX in H1 2021?", + "sql_context": "CREATE TABLE sales_data (drug_name VARCHAR(100), sales_quantity INT, half INT, year INT); INSERT INTO sales_data (drug_name, sales_quantity, half, year) VALUES (\u0027DrugX\u0027, 1200, 1, 2021), (\u0027DrugY\u0027, 800, 1, 2021), (\u0027DrugX\u0027, 1500, 2, 2021), (\u0027DrugY\u0027, 900, 2, 2021);", + "sql": "SELECT SUM(sales_quantity) FROM sales_data WHERE drug_name \u003d \u0027DrugX\u0027 AND half \u003d 1 AND year \u003d 2021;", + "sql_explanation": "The SQL query calculates the total sales quantity for DrugX in H1 2021 by summing the sales_quantity values for that drug, half, and year in the sales_data table." +}, { + "id": "3419", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the clinical trials for drug \u0027DrugA\u0027 that were approved after 2018?", + "sql_context": "CREATE TABLE clinical_trials (drug_name VARCHAR(50), approval_date DATE); INSERT INTO clinical_trials (drug_name, approval_date) VALUES (\u0027DrugA\u0027, \u00272019-02-03\u0027), (\u0027DrugB\u0027, \u00272017-06-14\u0027), (\u0027DrugA\u0027, \u00272015-09-22\u0027);", + "sql": "SELECT drug_name FROM clinical_trials WHERE drug_name \u003d \u0027DrugA\u0027 AND approval_date \u003e \u00272018-12-31\u0027;", + "sql_explanation": "This query lists the clinical trials for drug \u0027DrugA\u0027 that were approved after 2018 by selecting all records with a drug_name of \u0027DrugA\u0027 and an approval_date greater than \u00272018-12-31\u0027." +}, { + "id": "3493", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many clinical trials were conducted in Asia for vaccines?", + "sql_context": "CREATE TABLE clinical_trials (country TEXT, drug_class TEXT, trial_count INTEGER);", + "sql": "SELECT SUM(trial_count) FROM clinical_trials WHERE country \u003d \u0027Asia\u0027 AND drug_class \u003d \u0027vaccines\u0027;", + "sql_explanation": "This query calculates the total number of clinical trials conducted in Asia for vaccines by selecting the sum (SUM) of the trial_count column, filtering for country equal to \u0027Asia\u0027 and drug_class equal to \u0027vaccines\u0027." +}, { + "id": "3646", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the R\u0026D expenditure for \u0027DrugG\u0027 to $3,000,000 in Q2 2020.", + "sql_context": "CREATE TABLE drug_g_rd (quarter INTEGER, year INTEGER, amount INTEGER); INSERT INTO drug_g_rd (quarter, year, amount) VALUES (2, 2020, 2500000);", + "sql": "UPDATE drug_g_rd SET amount \u003d 3000000 WHERE drug_g_rd.quarter \u003d 2 AND drug_g_rd.year \u003d 2020;", + "sql_explanation": "This query updates the R\u0026D expenditure for DrugG in Q2 2020 to the new amount of $3,000,000." +}, { + "id": "3701", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete clinical trial data for \u0027DrugE\u0027 that was approved after 2018.", + "sql_context": "CREATE TABLE clinical_trial_data (drug_name TEXT, trial_status TEXT, approval_date DATE); INSERT INTO clinical_trial_data (drug_name, trial_status, approval_date) VALUES (\u0027DrugE\u0027, \u0027Approved\u0027, \u00272019-01-01\u0027);", + "sql": "DELETE FROM clinical_trial_data WHERE drug_name \u003d \u0027DrugE\u0027 AND approval_date \u003e \u00272018-12-31\u0027;", + "sql_explanation": "The SQL query deletes clinical trial data for \u0027DrugE\u0027 that was approved after 2018. The WHERE clause filters the data by drug_name and approval_date to ensure only the desired records are deleted." +}, { + "id": "3774", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many drugs were approved in \u0027Asia\u0027 in 2021?", + "sql_context": "CREATE TABLE drug_approval (drug_name TEXT, year INT, region TEXT); INSERT INTO drug_approval (drug_name, year, region) VALUES (\u0027DrugX\u0027, 2019, \u0027Europe\u0027), (\u0027DrugX\u0027, 2020, \u0027Asia\u0027), (\u0027DrugY\u0027, 2018, \u0027Asia\u0027), (\u0027DrugY\u0027, 2020, \u0027Asia\u0027), (\u0027DrugZ\u0027, 2021, \u0027Asia\u0027), (\u0027DrugA\u0027, 2021, \u0027Asia\u0027);", + "sql": "SELECT COUNT(DISTINCT drug_name) FROM drug_approval WHERE year \u003d 2021 AND region \u003d \u0027Asia\u0027;", + "sql_explanation": "The SQL query retrieves the number of distinct drugs approved in Asia in 2021." +}, { + "id": "3789", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total sales of drug \u0027D004\u0027 in the European Union?", + "sql_context": "CREATE TABLE sales (drug_id VARCHAR(10), region VARCHAR(10), sales_amount NUMERIC(12,2));", + "sql": "SELECT SUM(sales_amount) FROM sales WHERE drug_id \u003d \u0027D004\u0027 AND region \u003d \u0027European Union\u0027;", + "sql_explanation": "This query calculates the total sales of drug \u0027D004\u0027 in the European Union by summing the \u0027sales_amount\u0027 column where \u0027drug_id\u0027 is \u0027D004\u0027 and \u0027region\u0027 is \u0027European Union\u0027." +}, { + "id": "3798", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total revenue for \u0027PharmaAid\u0027 from drug sales in 2021?", + "sql_context": "CREATE TABLE PharmaAid_DrugSales(company VARCHAR(20), year INT, revenue DECIMAL(10,2)); INSERT INTO PharmaAid_DrugSales VALUES(\u0027PharmaAid\u0027, 2021, 18000000.00);", + "sql": "SELECT SUM(revenue) FROM PharmaAid_DrugSales WHERE company \u003d \u0027PharmaAid\u0027 AND year \u003d 2021;", + "sql_explanation": "This query calculates the total revenue for PharmaAid from drug sales in 2021 by summing up the revenue values in the PharmaAid_DrugSales table where the company is PharmaAid and the year is 2021." +}, { + "id": "3806", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which drugs were approved in the second half of 2021?", + "sql_context": "CREATE TABLE drug_approval (drug VARCHAR(255), approval_date DATE); INSERT INTO drug_approval (drug, approval_date) VALUES (\u0027DrugC\u0027, \u00272021-06-15\u0027), (\u0027DrugD\u0027, \u00272022-08-30\u0027), (\u0027DrugE\u0027, \u00272021-12-31\u0027), (\u0027DrugF\u0027, \u00272021-01-01\u0027);", + "sql": "SELECT drug FROM drug_approval WHERE approval_date BETWEEN \u00272021-07-01\u0027 AND \u00272021-12-31\u0027;", + "sql_explanation": "The SQL query filters the records based on the approval date to get the drugs that were approved in the second half of 2021." +}, { + "id": "3852", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average R\u0026D expenditure per clinical trial for the year 2020?", + "sql_context": "CREATE TABLE clinical_trials (id INT, year INT, rd_expenditure FLOAT); INSERT INTO clinical_trials (id, year, rd_expenditure) VALUES (1, 2018, 500000), (2, 2019, 700000), (3, 2020, 800000), (4, 2021, 900000);", + "sql": "SELECT AVG(rd_expenditure) as avg_rd_expenditure FROM clinical_trials WHERE year \u003d 2020;", + "sql_explanation": "This query calculates the average R\u0026D expenditure per clinical trial for the year 2020 by filtering clinical trials data for the year 2020 and calculating the average R\u0026D expenditure." +}, { + "id": "3873", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all clinical trials that have been conducted in \u0027CountryA\u0027 or \u0027CountryB\u0027.", + "sql_context": "CREATE TABLE clinical_trials (trial_id INTEGER, country TEXT); INSERT INTO clinical_trials (trial_id, country) VALUES (1, \u0027CountryA\u0027), (2, \u0027CountryB\u0027), (3, \u0027CountryC\u0027);", + "sql": "SELECT DISTINCT trial_id FROM clinical_trials WHERE country IN (\u0027CountryA\u0027, \u0027CountryB\u0027);", + "sql_explanation": "This query lists all unique clinical trials conducted in \u0027CountryA\u0027 or \u0027CountryB\u0027 by selecting distinct \u0027trial_id\u0027 values where the \u0027country\u0027 is either \u0027CountryA\u0027 or \u0027CountryB\u0027." +}, { + "id": "3903", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the market access strategy for \u0027ProductZ\u0027 in \u0027CountryC\u0027?", + "sql_context": "CREATE TABLE market_access(product varchar(20), country varchar(20), strategy varchar(50));INSERT INTO market_access VALUES (\u0027ProductZ\u0027, \u0027CountryC\u0027, \u0027Exclusive distribution\u0027);", + "sql": "SELECT strategy FROM market_access WHERE product \u003d \u0027ProductZ\u0027 AND country \u003d \u0027CountryC\u0027;", + "sql_explanation": "The SQL query retrieves the market access strategy for \u0027ProductZ\u0027 in \u0027CountryC\u0027 by selecting the \u0027strategy\u0027 column value where the \u0027product\u0027 is \u0027ProductZ\u0027 and the \u0027country\u0027 is \u0027CountryC\u0027." +}, { + "id": "4076", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "Which clinical trials have a \u0027COMPLETED\u0027 status for drug \u0027D002\u0027?", + "sql_context": "CREATE TABLE clinical_trials (drug_id VARCHAR(10), trial_status VARCHAR(10));", + "sql": "SELECT * FROM clinical_trials WHERE drug_id \u003d \u0027D002\u0027 AND trial_status \u003d \u0027COMPLETED\u0027;", + "sql_explanation": "This query retrieves all clinical trials for drug \u0027D002\u0027 with a \u0027COMPLETED\u0027 status by selecting all rows where \u0027drug_id\u0027 is \u0027D002\u0027 and \u0027trial_status\u0027 is \u0027COMPLETED\u0027." +}, { + "id": "4256", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which countries participated in \u0027ClinicalTrial789\u0027?", + "sql_context": "CREATE TABLE clinical_trials (trial_id TEXT, country TEXT); INSERT INTO clinical_trials (trial_id, country) VALUES (\u0027ClinicalTrial123\u0027, \u0027USA\u0027), (\u0027ClinicalTrial123\u0027, \u0027Canada\u0027), (\u0027ClinicalTrial456\u0027, \u0027Mexico\u0027), (\u0027ClinicalTrial789\u0027, \u0027India\u0027), (\u0027ClinicalTrial789\u0027, \u0027Nepal\u0027);", + "sql": "SELECT DISTINCT country FROM clinical_trials WHERE trial_id \u003d \u0027ClinicalTrial789\u0027;", + "sql_explanation": "The SQL query retrieves unique country names where \u0027ClinicalTrial789\u0027 was conducted." +}, { + "id": "4528", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the clinical trial start dates for a specific drug?", + "sql_context": "CREATE TABLE clinical_trials (trial_id INT, drug_name VARCHAR(255), start_date DATE, status VARCHAR(255)); INSERT INTO clinical_trials (trial_id, drug_name, start_date, status) VALUES (1, \u0027DrugA\u0027, \u00272017-06-01\u0027, \u0027Completed\u0027), (2, \u0027DrugB\u0027, \u00272018-11-15\u0027, \u0027In Progress\u0027), (3, \u0027DrugC\u0027, \u00272020-02-20\u0027, \u0027Planned\u0027), (4, \u0027DrugA\u0027, \u00272021-09-01\u0027, \u0027Recruiting\u0027);", + "sql": "SELECT drug_name, start_date FROM clinical_trials WHERE drug_name \u003d \u0027DrugA\u0027;", + "sql_explanation": "This query retrieves the clinical trial start dates for a specific drug by filtering the clinical_trials table based on the drug_name column." +}, { + "id": "1503", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of wastewater not treated in the month of April across all treatment plants?", + "sql_context": "CREATE TABLE wastewater_treatment(plant_id INT, treated_volume FLOAT, not_treated_volume FLOAT, month DATE); INSERT INTO wastewater_treatment(plant_id, treated_volume, not_treated_volume, month) VALUES (1, 10000, 2000, \u00272022-04-01\u0027), (2, 15000, 3000, \u00272022-04-01\u0027), (3, 12000, 1500, \u00272022-04-01\u0027);", + "sql": "SELECT 100.0 * SUM(not_treated_volume) / (SUM(treated_volume) + SUM(not_treated_volume)) AS percentage FROM wastewater_treatment WHERE month \u003d \u00272022-04-01\u0027;", + "sql_explanation": "This query calculates the percentage of wastewater not treated in the month of April by dividing the sum of the not_treated_volume for April by the sum of the treated_volume and not_treated_volume for April and multiplying by 100." +}, { + "id": "1863", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of water that is recycled by wastewater treatment plants in the city of Los Angeles for the year 2018?", + "sql_context": "CREATE TABLE wastewater_treatment(plant_id INT, city VARCHAR(50), year INT, recycled_water_volume FLOAT, total_water_volume FLOAT); INSERT INTO wastewater_treatment(plant_id, city, year, recycled_water_volume, total_water_volume) VALUES (1, \u0027Los Angeles\u0027, 2018, 5000000, 8000000), (2, \u0027Los Angeles\u0027, 2018, 6000000, 9000000);", + "sql": "SELECT city, (SUM(recycled_water_volume) / SUM(total_water_volume)) * 100 FROM wastewater_treatment WHERE city \u003d \u0027Los Angeles\u0027 AND year \u003d 2018;", + "sql_explanation": "This query calculates the percentage of water that is recycled by wastewater treatment plants in the city of Los Angeles for the year 2018. It divides the total recycled water volume by the total water volume and multiplies the result by 100 to get the percentage." +}, { + "id": "1880", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum daily water consumption for the water treatment plant with ID 8 in the state of Washington in 2022?", + "sql_context": "CREATE TABLE water_treatment_plant (plant_id INT, state VARCHAR(50), year INT, month INT, day INT, water_consumption FLOAT); INSERT INTO water_treatment_plant (plant_id, state, year, month, day, water_consumption) VALUES (8, \u0027Washington\u0027, 2022, 1, 1, 12345.6), (8, \u0027Washington\u0027, 2022, 1, 2, 23456.7), (8, \u0027Washington\u0027, 2022, 1, 3, 34567.8);", + "sql": "SELECT MAX(water_consumption) as max_water_consumption FROM water_treatment_plant WHERE plant_id \u003d 8 AND state \u003d \u0027Washington\u0027 AND year \u003d 2022;", + "sql_explanation": "This SQL query calculates the maximum daily water consumption for the water treatment plant with ID 8 in the state of Washington in the year 2022. It does this by selecting the maximum water consumption in the water_treatment_plant table where the plant_id is 8, the state is Washington, and the year is 2022." +}, { + "id": "1998", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total cost of water conservation initiatives in Australia in 2017?", + "sql_context": "CREATE TABLE WaterConservationInitiatives (Id INT, Initiative VARCHAR(100), Location VARCHAR(100), Cost INT, StartDate DATE, EndDate DATE); INSERT INTO WaterConservationInitiatives (Id, Initiative, Location, Cost, StartDate, EndDate) VALUES (1, \u0027Rainwater Harvesting\u0027, \u0027Australia\u0027, 15000, \u00272016-04-01\u0027, \u00272016-12-31\u0027); INSERT INTO WaterConservationInitiatives (Id, Initiative, Location, Cost, StartDate, EndDate) VALUES (2, \u0027Greywater Recycling\u0027, \u0027Australia\u0027, 20000, \u00272017-01-01\u0027, \u00272017-12-31\u0027); INSERT INTO WaterConservationInitiatives (Id, Initiative, Location, Cost, StartDate, EndDate) VALUES (3, \u0027Drip Irrigation\u0027, \u0027Canada\u0027, 25000, \u00272017-04-01\u0027, \u00272017-12-31\u0027); INSERT INTO WaterConservationInitiatives (Id, Initiative, Location, Cost, StartDate, EndDate) VALUES (4, \u0027Smart Landscaping\u0027, \u0027Australia\u0027, 30000, \u00272017-01-01\u0027, \u00272017-12-31\u0027);", + "sql": "SELECT SUM(Cost) FROM WaterConservationInitiatives WHERE Location \u003d \u0027Australia\u0027 AND StartDate \u003c\u003d \u00272017-12-31\u0027 AND EndDate \u003e\u003d \u00272017-01-01\u0027;", + "sql_explanation": "This query calculates the total cost of water conservation initiatives in Australia in 2017 by filtering the WaterConservationInitiatives table using the Location, StartDate, and EndDate columns, and then calculating the sum of the Cost column for those initiatives." +}, { + "id": "2276", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average daily wastewater discharge from municipal sources in New York in 2020?", + "sql_context": "CREATE TABLE wastewater_discharge (location VARCHAR(50), year INT, month INT, daily_discharge FLOAT); INSERT INTO wastewater_discharge (location, year, month, daily_discharge) VALUES (\u0027New York\u0027, 2020, 1, 1.2), (\u0027New York\u0027, 2020, 2, 1.3), (\u0027New York\u0027, 2020, 3, 1.4);", + "sql": "SELECT AVG(w.daily_discharge) AS avg_daily_discharge FROM wastewater_discharge w WHERE w.location \u003d \u0027New York\u0027 AND w.year \u003d 2020;", + "sql_explanation": "This query calculates the average daily wastewater discharge from municipal sources in New York in 2020. The calculation is based on the average of the daily discharge values for New York in 2020, using the AVG function." +}, { + "id": "2374", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total water usage for the \u0027Residential\u0027 category between the dates \u00272022-07-01\u0027 and \u00272022-07-05\u0027 in the water_usage table", + "sql_context": "CREATE TABLE water_usage ( date DATE, usage_category VARCHAR(20), region VARCHAR(20), usage_amount INT ); INSERT INTO water_usage (date, usage_category, region, usage_amount) VALUES ( \u00272022-07-01\u0027, \u0027Residential\u0027, \u0027Northeast\u0027, 15000), (\u00272022-07-02\u0027, \u0027Industrial\u0027, \u0027Midwest\u0027, 200000), (\u00272022-07-03\u0027, \u0027Agricultural\u0027, \u0027West\u0027, 800000);", + "sql": "SELECT SUM(usage_amount) FROM water_usage WHERE usage_category \u003d \u0027Residential\u0027 AND date BETWEEN \u00272022-07-01\u0027 AND \u00272022-07-05\u0027;", + "sql_explanation": "This query calculates the total water usage for the \u0027Residential\u0027 category between the dates \u00272022-07-01\u0027 and \u00272022-07-05\u0027 in the water_usage table. The SUM function is used to compute the total of the specified column, and the WHERE clause filters the records based on the given conditions using the BETWEEN keyword." +}, { + "id": "2718", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total water consumption for municipal usage in Texas and California in 2018?", + "sql_context": "CREATE TABLE water_usage ( id INT, state_id INT, year INT, usage_type VARCHAR(20), water_consumption FLOAT ); INSERT INTO water_usage (id, state_id, year, usage_type, water_consumption) VALUES (1, 1, 2017, \u0027domestic\u0027, 1200), (2, 1, 2018, \u0027domestic\u0027, 1500), (3, 1, 2019, \u0027domestic\u0027, 1700), (4, 2, 2017, \u0027domestic\u0027, 900), (5, 2, 2018, \u0027domestic\u0027, 1100), (6, 2, 2019, \u0027domestic\u0027, 1300), (7, 3, 2017, \u0027domestic\u0027, 1600), (8, 3, 2018, \u0027domestic\u0027, 1800), (9, 3, 2019, \u0027domestic\u0027, 2000);", + "sql": "SELECT SUM(water_consumption) FROM water_usage WHERE state_id IN (1, 2) AND year \u003d 2018 AND usage_type \u003d \u0027domestic\u0027;", + "sql_explanation": "This SQL query calculates the total water consumption for municipal usage in Texas and California in 2018. It filters the records based on state_id 1 and 2, year 2018 and usage_type \u0027domestic\u0027 and then calculates the sum of water consumption." +}, { + "id": "2830", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the daily water consumption for the state of Texas in the month of January?", + "sql_context": "CREATE TABLE DailyWaterUsage (Date DATE, State VARCHAR(20), Usage FLOAT); INSERT INTO DailyWaterUsage (Date, State, Usage) VALUES (\u00272022-01-01\u0027, \u0027Texas\u0027, 500), (\u00272022-01-02\u0027, \u0027Texas\u0027, 800), (\u00272022-01-03\u0027, \u0027Texas\u0027, 900);", + "sql": "SELECT Date, Usage FROM DailyWaterUsage WHERE State \u003d \u0027Texas\u0027 AND Date \u003e\u003d \u00272022-01-01\u0027 AND Date \u003c\u003d \u00272022-01-31\u0027;", + "sql_explanation": "The SQL query retrieves the daily water consumption for the state of Texas in the month of January by filtering the DailyWaterUsage table for rows with the state value \u0027Texas\u0027 and a date within the month of January." +}, { + "id": "2926", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How much water was used for agricultural purposes in California in the year 2019?", + "sql_context": "CREATE TABLE water_usage (id INT, state VARCHAR(20), year INT, usage_type VARCHAR(50), usage FLOAT);", + "sql": "SELECT SUM(usage) FROM water_usage WHERE state \u003d \u0027California\u0027 AND year \u003d 2019 AND usage_type \u003d \u0027agricultural\u0027;", + "sql_explanation": "This query calculates the total amount of water used for agricultural purposes in California in the year 2019. It does so by summing up the usage values in the water_usage table for California, 2019, and agricultural usage_type." +}, { + "id": "3045", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many droughts have been recorded in the state of California in the last 10 years?", + "sql_context": "CREATE TABLE Droughts (state VARCHAR(20), year INT, drought BOOLEAN); INSERT INTO Droughts (state, year, drought) VALUES (\u0027California\u0027, 2010, true), (\u0027Texas\u0027, 2010, false), (\u0027California\u0027, 2011, true), (\u0027Florida\u0027, 2011, false), (\u0027California\u0027, 2012, true), (\u0027New York\u0027, 2012, false), (\u0027California\u0027, 2013, false), (\u0027Texas\u0027, 2013, true), (\u0027California\u0027, 2014, true), (\u0027Florida\u0027, 2014, false);", + "sql": "SELECT COUNT(*) FROM Droughts WHERE state \u003d \u0027California\u0027 AND year BETWEEN 2010 AND 2020 AND drought \u003d true;", + "sql_explanation": "The SQL query calculates the number of droughts recorded in the state of California in the last 10 years by using the COUNT function on all the rows, filtering the data for the state \u0027California\u0027, and years between 2010 and 2020, and where drought is true." +}, { + "id": "3047", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of households with water consumption above 15000 liters per month in the city of Cape Town, South Africa for the year 2020?", + "sql_context": "CREATE TABLE monthly_water_usage (house_id INT, city VARCHAR(255), usage_liters INT, year INT, month INT); INSERT INTO monthly_water_usage (house_id, city, usage_liters, year, month) VALUES (1, \u0027Cape Town\u0027, 16000, 2020, 1), (2, \u0027Cape Town\u0027, 14000, 2020, 2), (3, \u0027Cape Town\u0027, 18000, 2020, 3);", + "sql": "SELECT COUNT(*) FROM monthly_water_usage WHERE city \u003d \u0027Cape Town\u0027 AND usage_liters \u003e 15000 AND year \u003d 2020;", + "sql_explanation": "Count the number of rows with city \u0027Cape Town\u0027, usage_liters greater than 15000, and year 2020." +}, { + "id": "3114", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which conservation efforts in Rio de Janeiro have a cost less than 5000?", + "sql_context": "CREATE TABLE ConservationEfforts (Location VARCHAR(100), Effort VARCHAR(100), Cost INT, Date DATE); INSERT INTO ConservationEfforts (Location, Effort, Cost, Date) VALUES (\u0027Rio de Janeiro\u0027, \u0027Water Recycling\u0027, 4000, \u00272022-01-01\u0027);", + "sql": "SELECT Location, Effort, Cost FROM ConservationEfforts WHERE Location \u003d \u0027Rio de Janeiro\u0027 AND Cost \u003c 5000;", + "sql_explanation": "This query selects the Location, Effort, and Cost from the ConservationEfforts table where the Location is \u0027Rio de Janeiro\u0027 and the Cost is less than 5000." +}, { + "id": "3366", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum water consumption in California for the years 2017 and 2018?", + "sql_context": "CREATE TABLE water_consumption (id INT, state VARCHAR(20), year INT, consumption FLOAT); INSERT INTO water_consumption (id, state, year, consumption) VALUES (1, \u0027California\u0027, 2017, 120.5), (2, \u0027California\u0027, 2018, 130.3), (3, \u0027California\u0027, 2019, 140.0), (4, \u0027New York\u0027, 2017, 115.3), (5, \u0027New York\u0027, 2018, 120.0), (6, \u0027New York\u0027, 2019, 125.5);", + "sql": "SELECT MAX(consumption) FROM water_consumption WHERE state \u003d \u0027California\u0027 AND year IN (2017, 2018);", + "sql_explanation": "This query calculates the maximum water consumption in California for the years 2017 and 2018 by selecting the maximum consumption value where state is \u0027California\u0027 and year is either 2017 or 2018." +}, { + "id": "3443", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total water usage by all agricultural customers in the month of July?", + "sql_context": "CREATE TABLE water_usage(customer_id INT, usage FLOAT, month DATE); INSERT INTO water_usage(customer_id, usage, month) VALUES (1, 500, \u00272022-07-01\u0027), (2, 350, \u00272022-07-01\u0027), (3, 700, \u00272022-07-01\u0027);", + "sql": "SELECT SUM(usage) FROM water_usage WHERE month \u003d \u00272022-07-01\u0027 AND customer_type \u003d \u0027agricultural\u0027;", + "sql_explanation": "This query calculates the total water usage by summing the usage column for rows with a customer_type of \u0027agricultural\u0027 and a month of July 1st, 2022." +}, { + "id": "3458", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the \u0027treatment_cost\u0027 column in the \u0027wastewater_treatment_plants\u0027 view where the plant_id is 7", + "sql_context": "CREATE VIEW wastewater_treatment_plants AS SELECT w.plant_id, w.location, w.capacity, t.cost AS treatment_cost FROM wastewater_treatment w INNER JOIN treatment_costs t ON w.plant_id \u003d t.plant_id;", + "sql": "UPDATE wastewater_treatment_plants SET treatment_cost \u003d treatment_cost * 1.1 WHERE plant_id \u003d 7;", + "sql_explanation": "This SQL query updates the \u0027treatment_cost\u0027 column in the \u0027wastewater_treatment_plants\u0027 view where the plant_id is 7. It uses the UPDATE statement followed by the view name and a SET clause to specify the new value. A WHERE clause is added to specify the condition for updating using the plant_id column." +}, { + "id": "3487", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total water consumption by industrial customers in Texas and Oklahoma?", + "sql_context": "CREATE TABLE industrial_customers (customer_id INT, location VARCHAR(255), water_consumption FLOAT); INSERT INTO industrial_customers (customer_id, location, water_consumption) VALUES (1, \u0027Dallas\u0027, 2000), (2, \u0027Houston\u0027, 2500), (3, \u0027Tulsa\u0027, 1800), (4, \u0027Oklahoma_City\u0027, 2200);", + "sql": "SELECT SUM(water_consumption) FROM industrial_customers WHERE location IN (\u0027Texas\u0027, \u0027Oklahoma\u0027);", + "sql_explanation": "This SQL query calculates the total water consumption by industrial customers in Texas and Oklahoma by using the SUM function on the water_consumption column and filtering the data where the location is either \u0027Texas\u0027 or \u0027Oklahoma\u0027." +}, { + "id": "3543", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How much water was conserved in the state of New York in 2019?", + "sql_context": "CREATE TABLE water_conservation(state VARCHAR(20), year INT, amount_conserved INT); INSERT INTO water_conservation(state, year, amount_conserved) VALUES (\u0027New York\u0027, 2015, 2000), (\u0027New York\u0027, 2016, 2500), (\u0027New York\u0027, 2017, 3000), (\u0027New York\u0027, 2018, 3500), (\u0027New York\u0027, 2019, 4000);", + "sql": "SELECT SUM(amount_conserved) FROM water_conservation WHERE state \u003d \u0027New York\u0027 AND year \u003d 2019;", + "sql_explanation": "This query calculates the total water conservation in the state of New York in 2019 by summing up the amount_conserved column where the state is New York and the year is 2019." +}, { + "id": "3671", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of drought-affected areas in each region?", + "sql_context": "CREATE TABLE drought_data (id INT, region VARCHAR(30), area_size INT, drought_impact INT); INSERT INTO drought_data (id, region, area_size, drought_impact) VALUES (1, \u0027RegionA\u0027, 5000, 2500), (2, \u0027RegionB\u0027, 6000, 4000), (3, \u0027RegionC\u0027, 7000, 3000);", + "sql": "SELECT region, (drought_impact * 100.0 / area_size) as drought_percentage FROM drought_data;", + "sql_explanation": "Calculate the drought-affected percentage for each region by dividing the drought_impact by the area_size." +}, { + "id": "3776", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total volume of recycled water in the state of New York in 2019?", + "sql_context": "CREATE TABLE Water_Recycling (State VARCHAR(20), Year INT, Recycled_Volume INT); INSERT INTO Water_Recycling (State, Year, Recycled_Volume) VALUES (\u0027New York\u0027, 2019, 10000000), (\u0027New York\u0027, 2020, 12000000);", + "sql": "SELECT SUM(Recycled_Volume) FROM Water_Recycling WHERE State \u003d \u0027New York\u0027 AND Year \u003d 2019;", + "sql_explanation": "This query calculates the total volume of recycled water in the state of New York in 2019 by filtering the Water_Recycling table to only include rows with the state \u0027New York\u0027 and year 2019, then using the SUM function to compute the total recycled water volume." +}, { + "id": "3866", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum water consumption in a single day for the city of Chicago?", + "sql_context": "CREATE TABLE Daily_Water_Consumption (city VARCHAR(20), water_consumption FLOAT, date DATE); INSERT INTO Daily_Water_Consumption (city, water_consumption, date) VALUES (\u0027Chicago\u0027, 1500000, \u00272022-01-01\u0027), (\u0027Los Angeles\u0027, 2000000, \u00272022-01-02\u0027), (\u0027Chicago\u0027, 1700000, \u00272022-01-03\u0027), (\u0027New York\u0027, 1600000, \u00272022-01-04\u0027);", + "sql": "SELECT city, MAX(water_consumption) FROM Daily_Water_Consumption WHERE city \u003d \u0027Chicago\u0027;", + "sql_explanation": "The SQL query calculates the maximum water consumption in a single day for the city of Chicago by using the MAX function on the water_consumption column, filtering the data for the city \u0027Chicago\u0027." +}, { + "id": "3950", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many wastewater treatment plants are there in Texas and New York combined?", + "sql_context": "CREATE TABLE wastewater_treatment_plants (id INT, state VARCHAR(20)); INSERT INTO wastewater_treatment_plants (id, state) VALUES (1, \u0027Texas\u0027), (2, \u0027Texas\u0027), (3, \u0027Texas\u0027), (4, \u0027New York\u0027), (5, \u0027New York\u0027);", + "sql": "SELECT COUNT(*) FROM wastewater_treatment_plants WHERE state IN (\u0027Texas\u0027, \u0027New York\u0027);", + "sql_explanation": "The SQL query counts the number of wastewater treatment plants in Texas and New York by using the COUNT function on all rows (*), filtering the data for rows where the state is either \u0027Texas\u0027 or \u0027New York\u0027." +}, { + "id": "4020", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total wastewater treated in California and Texas?", + "sql_context": "CREATE TABLE us_states (state VARCHAR(255), wastewater_treated INT); INSERT INTO us_states (state, wastewater_treated) VALUES (\u0027California\u0027, 2000000), (\u0027Texas\u0027, 3000000);", + "sql": "SELECT SUM(wastewater_treated) FROM us_states WHERE state IN (\u0027California\u0027, \u0027Texas\u0027);", + "sql_explanation": "This query calculates the total wastewater treated in California and Texas by summing up the \u0027wastewater_treated\u0027 column values for rows with \u0027state\u0027 equal to \u0027California\u0027 or \u0027Texas\u0027 in the \u0027us_states\u0027 table." +}, { + "id": "4040", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many drought-affected regions are there in the state of Colorado?", + "sql_context": "CREATE TABLE regions (id INT, region_name VARCHAR(50), state VARCHAR(50), drought_status VARCHAR(50)); INSERT INTO regions VALUES (1, \u0027North Park\u0027, \u0027Colorado\u0027, \u0027Drought\u0027), (2, \u0027South Park\u0027, \u0027Colorado\u0027, \u0027No Drought\u0027), (3, \u0027San Luis Valley\u0027, \u0027Colorado\u0027, \u0027Drought\u0027), (4, \u0027Arkansas River Valley\u0027, \u0027Colorado\u0027, \u0027No Drought\u0027);", + "sql": "SELECT COUNT(*) FROM regions WHERE state \u003d \u0027Colorado\u0027 AND drought_status \u003d \u0027Drought\u0027;", + "sql_explanation": "This query simply counts the number of rows in the regions table where the state column is equal to \u0027Colorado\u0027 and the drought_status column is equal to \u0027Drought\u0027." +}, { + "id": "4320", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which water conservation initiatives have been implemented in the state of Texas?", + "sql_context": "CREATE TABLE water_conservation_initiatives (initiative_id INT, state VARCHAR(20)); INSERT INTO water_conservation_initiatives (initiative_id, state) VALUES (1, \u0027Texas\u0027), (2, \u0027California\u0027), (3, \u0027Florida\u0027);", + "sql": "SELECT initiative_id FROM water_conservation_initiatives WHERE state \u003d \u0027Texas\u0027;", + "sql_explanation": "This SQL query retrieves the water conservation initiatives that have been implemented in the state of Texas by selecting the initiative_id column in the water_conservation_initiatives table, and filtering for rows where the state is \u0027Texas\u0027." +}, { + "id": "4404", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total water consumption in the industrial sector in South Africa?", + "sql_context": "CREATE TABLE water_consumption_sa (id INT, sector VARCHAR(20), consumption FLOAT); INSERT INTO water_consumption_sa (id, sector, consumption) VALUES (1, \u0027Industrial\u0027, 350.0), (2, \u0027Residential\u0027, 150.0), (3, \u0027Agriculture\u0027, 500.0);", + "sql": "SELECT SUM(consumption) FROM water_consumption_sa WHERE sector \u003d \u0027Industrial\u0027;", + "sql_explanation": "This query calculates the total water consumption in the industrial sector in South Africa. It does this by summing (SUM) the consumption column, but only for rows where the sector is \u0027Industrial\u0027." +}, { + "id": "4527", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the wastewater treatment plants with a treatment capacity over 1 million cubic meters per day.", + "sql_context": "CREATE TABLE wastewater_plants (plant_name VARCHAR(50), treatment_capacity NUMERIC(12,2)); INSERT INTO wastewater_plants (plant_name, treatment_capacity) VALUES (\u0027Plant1\u0027, 1200000), (\u0027Plant2\u0027, 800000);", + "sql": "SELECT plant_name FROM wastewater_plants WHERE treatment_capacity \u003e 1000000;", + "sql_explanation": "This query identifies the wastewater treatment plants with a treatment capacity over 1 million cubic meters per day by selecting rows in the \u0027wastewater_plants\u0027 table with \u0027treatment_capacity\u0027 values greater than 1,000,000." +}, { + "id": "4541", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total water usage by all residential users in the city of San Francisco?", + "sql_context": "CREATE TABLE residential_users (id INT, city VARCHAR(20), water_usage FLOAT); INSERT INTO residential_users (id, city, water_usage) VALUES (1, \u0027San Francisco\u0027, 15.5), (2, \u0027San Francisco\u0027, 12.3), (3, \u0027Oakland\u0027, 18.7);", + "sql": "SELECT SUM(water_usage) FROM residential_users WHERE city \u003d \u0027San Francisco\u0027;", + "sql_explanation": "This query calculates the total water usage by all residential users in the city of San Francisco by summing the water_usage column for all rows where the city is \u0027San Francisco\u0027." +}, { + "id": "4550", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the wastewater treatment facilities in Spain and their capacities?", + "sql_context": "CREATE TABLE treatment_facilities_ES (name VARCHAR(50), country VARCHAR(20), capacity INT); INSERT INTO treatment_facilities_ES (name, country, capacity) VALUES (\u0027Facility1\u0027, \u0027Spain\u0027, 5000), (\u0027Facility2\u0027, \u0027Spain\u0027, 7000);", + "sql": "SELECT name, capacity FROM treatment_facilities_ES WHERE country \u003d \u0027Spain\u0027;", + "sql_explanation": "The SQL query lists the wastewater treatment facilities in Spain and their capacities by selecting the \u0027name\u0027 and \u0027capacity\u0027 columns for the \u0027Spain\u0027 country." +}, { + "id": "4790", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the water usage for New York in 2020 to 5000.", + "sql_context": "CREATE TABLE water_usage(state VARCHAR(20), year INT, usage FLOAT);", + "sql": "UPDATE water_usage SET usage\u003d5000 WHERE state\u003d\u0027New York\u0027 AND year\u003d2020;", + "sql_explanation": "This query updates the \u0027water_usage\u0027 table and sets the \u0027usage\u0027 value to 5000 for all records where the \u0027state\u0027 is \u0027New York\u0027 and the \u0027year\u0027 is 2020." +}, { + "id": "4791", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the sensor with the minimum water level in the \u0027sensor_data\u0027 table", + "sql_context": "CREATE TABLE sensor_data (sensor_id INT, water_level FLOAT, timestamp TIMESTAMP);", + "sql": "SELECT sensor_id, MIN(water_level) as min_water_level FROM sensor_data;", + "sql_explanation": "The SQL query finds the sensor with the minimum water level by using the MIN() function on the water_level column and selecting the sensor_id column." +}, { + "id": "4954", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the water usage by month for the customer with id 5?", + "sql_context": "CREATE TABLE customer_water_usage (customer_id INT, month TEXT, usage FLOAT); INSERT INTO customer_water_usage (customer_id, month, usage) VALUES (5, \u0027Jan\u0027, 120.5), (5, \u0027Feb\u0027, 110.7);", + "sql": "SELECT month, usage FROM customer_water_usage WHERE customer_id \u003d 5;", + "sql_explanation": "This query retrieves the water usage by month for the customer with id 5. It filters the records based on the customer_id and returns the month and usage values for the matching records." +}, { + "id": "5187", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum water usage by any user in the state of Florida?", + "sql_context": "CREATE TABLE all_users (id INT, state VARCHAR(20), water_usage FLOAT); INSERT INTO all_users (id, state, water_usage) VALUES (1, \u0027Florida\u0027, 20.5), (2, \u0027Florida\u0027, 25.6), (3, \u0027California\u0027, 30.2);", + "sql": "SELECT MAX(water_usage) FROM all_users WHERE state \u003d \u0027Florida\u0027;", + "sql_explanation": "This query calculates the maximum water usage by any user in the state of Florida by selecting the maximum value of the water_usage column for all rows where the state is \u0027Florida\u0027." +}, { + "id": "5254", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of water saved through water conservation initiatives in each region?", + "sql_context": "CREATE TABLE regional_water_savings (id INT, region VARCHAR(30), savings_percentage FLOAT); INSERT INTO regional_water_savings (id, region, savings_percentage) VALUES (1, \u0027RegionA\u0027, 0.15), (2, \u0027RegionB\u0027, 0.12), (3, \u0027RegionC\u0027, 0.18);", + "sql": "SELECT region, savings_percentage FROM regional_water_savings;", + "sql_explanation": "Directly present the percentage of water saved through water conservation initiatives in each region." +}, { + "id": "5446", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the minimum water usage in a single day from \u0027DailyWaterUsage\u0027 table for the year 2021", + "sql_context": "CREATE TABLE DailyWaterUsage (day DATE, usage INT, year INT);", + "sql": "SELECT MIN(usage) FROM DailyWaterUsage WHERE year \u003d 2021;", + "sql_explanation": "This SQL query calculates the minimum (MIN) value of the \u0027usage\u0027 column in the \u0027DailyWaterUsage\u0027 table for the year 2021." +}, { + "id": "5742", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records in the water_usage table where the usage is over 200 liters", + "sql_context": "CREATE TABLE water_usage (id INT, location VARCHAR(50), usage FLOAT);", + "sql": "DELETE FROM water_usage WHERE usage \u003e 200;", + "sql_explanation": "This query deletes records from the water_usage table where the usage is greater than 200 liters." +}, { + "id": "5808", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total water usage in all sectors?", + "sql_context": "CREATE TABLE water_usage (sector VARCHAR(20), usage INT); INSERT INTO water_usage (sector, usage) VALUES (\u0027residential\u0027, 12000), (\u0027commercial\u0027, 15000), (\u0027industrial\u0027, 20000);", + "sql": "SELECT SUM(usage) FROM water_usage;", + "sql_explanation": "* The SQL query sums up the \u0027usage\u0027 column from the \u0027water_usage\u0027 table. This gives us the total water usage in all sectors." +}, { + "id": "903", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many attendees identified as part of the LGBTQ+ community attended events in New York in 2020?", + "sql_context": "CREATE TABLE Attendees (attendee_id INT, event_id INT, city VARCHAR(50), community_identity VARCHAR(50), attendance_year INT); INSERT INTO Attendees (attendee_id, event_id, city, community_identity, attendance_year) VALUES (1, 100, \u0027New York\u0027, \u0027LGBTQ+\u0027, 2020), (2, 101, \u0027Los Angeles\u0027, \u0027Straight\u0027, 2020), (3, 102, \u0027New York\u0027, \u0027Non-binary\u0027, 2020);", + "sql": "SELECT COUNT(*) FROM Attendees WHERE city \u003d \u0027New York\u0027 AND attendance_year \u003d 2020 AND community_identity IN (\u0027LGBTQ+\u0027, \u0027Queer\u0027, \u0027Gay\u0027, \u0027Lesbian\u0027, \u0027Bisexual\u0027, \u0027Transgender\u0027, \u0027Non-binary\u0027);", + "sql_explanation": "Count the number of attendees who identified as part of the LGBTQ+ community that attended events in New York in 2020. Include attendees who identified as \u0027Queer\u0027, \u0027Gay\u0027, \u0027Lesbian\u0027, \u0027Bisexual\u0027, \u0027Transgender\u0027, or \u0027Non-binary\u0027." +}, { + "id": "1656", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total funding received from government sources for literary programs in Q2 2023?", + "sql_context": "CREATE TABLE Funding (FundingID INT, FundingSource VARCHAR(255), Amount DECIMAL(10,2), Program VARCHAR(255), FundingDate DATE); INSERT INTO Funding (FundingID, FundingSource, Amount, Program, FundingDate) VALUES (1, \u0027Government\u0027, 50000, \u0027Literary\u0027, \u00272023-04-01\u0027), (2, \u0027Private\u0027, 35000, \u0027Visual Arts\u0027, \u00272023-05-01\u0027);", + "sql": "SELECT SUM(Amount) FROM Funding WHERE FundingSource \u003d \u0027Government\u0027 AND Program \u003d \u0027Literary\u0027 AND QUARTER(FundingDate) \u003d 2 AND YEAR(FundingDate) \u003d 2023;", + "sql_explanation": "The SQL query calculates the total funding received from government sources for literary programs in Q2 2023 by filtering the \u0027Funding\u0027 table based on the \u0027FundingSource\u0027, \u0027Program\u0027, \u0027FundingDate\u0027 columns and summing the \u0027Amount\u0027 using the WHERE clause." +}, { + "id": "2715", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of art exhibitions and dance events in the year 2020?", + "sql_context": "CREATE TABLE ArtExhibitions (id INT, year INT, visitors INT); INSERT INTO ArtExhibitions (id, year, visitors) VALUES (1, 2018, 500), (2, 2019, 700), (3, 2020, 600), (4, 2021, 800); CREATE TABLE DanceEvents (id INT, year INT, visitors INT); INSERT INTO DanceEvents (id, year, visitors) VALUES (1, 2019, 300), (2, 2020, 400), (3, 2021, 500);", + "sql": "SELECT SUM(visitors) FROM ArtExhibitions WHERE year \u003d 2020; SELECT SUM(visitors) FROM DanceEvents WHERE year \u003d 2020;", + "sql_explanation": "These queries calculate the total number of art exhibitions and dance events in the year 2020 by summing up the \u0027visitors\u0027 column where the \u0027year\u0027 is 2020 for each table separately." +}, { + "id": "3014", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the \u0027Theater Performances\u0027 table for the participant \u0027Zoe\u0027 who attended the \u0027Comedy\u0027 event.", + "sql_context": "CREATE TABLE theater_performances (performance_id INT, participant_name VARCHAR(50), event_type VARCHAR(50)); INSERT INTO theater_performances (performance_id, participant_name, event_type) VALUES (1, \u0027Ava\u0027, \u0027Drama\u0027), (2, \u0027Bella\u0027, \u0027Musical\u0027), (3, \u0027Chloe\u0027, \u0027Tragedy\u0027);", + "sql": "INSERT INTO theater_performances (performance_id, participant_name, event_type) VALUES (4, \u0027Zoe\u0027, \u0027Comedy\u0027);", + "sql_explanation": "This query inserts a new record into the \u0027theater_performances\u0027 table for the participant \u0027Zoe\u0027 who attended the \u0027Comedy\u0027 event." +}, { + "id": "3023", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum age of students who attended the pottery workshop last week?", + "sql_context": "CREATE TABLE WorkshopAttendees (attendeeID INT, workshopDate DATE, age INT); INSERT INTO WorkshopAttendees (attendeeID, workshopDate, age) VALUES (13, \u00272022-05-01\u0027, 18), (14, \u00272022-05-02\u0027, 20), (15, \u00272022-05-03\u0027, 22);", + "sql": "SELECT MIN(age) FROM WorkshopAttendees WHERE workshopDate \u003e\u003d \u00272022-05-01\u0027 AND workshopDate \u003c\u003d \u00272022-05-07\u0027;", + "sql_explanation": "The SQL query calculates the minimum age of students who attended the pottery workshop last week by filtering the workshopDate between \u00272022-05-01\u0027 and \u00272022-05-07\u0027 and then using the MIN function to find the minimum age from the age column of the WorkshopAttendees table." +}, { + "id": "3314", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record of a volunteer who worked at a visual arts exhibition in New York.", + "sql_context": "CREATE TABLE volunteers (volunteer_id INT, name VARCHAR(50), event_id INT, hours_worked INT); CREATE TABLE events (event_id INT, name VARCHAR(50), city VARCHAR(50), event_type VARCHAR(50)); INSERT INTO events (event_id, name, city, event_type) VALUES (1, \u0027Visual Arts Exhibition\u0027, \u0027New York\u0027, \u0027Visual Arts\u0027);", + "sql": "INSERT INTO volunteers (volunteer_id, name, event_id, hours_worked) VALUES (1, \u0027Karen Brown\u0027, 1, 8);", + "sql_explanation": "This query inserts a new record into the volunteers table, representing a volunteer who worked at a visual arts exhibition in New York, by specifying a unique volunteer_id, name, event_id corresponding to the New York exhibition, and hours_worked." +}, { + "id": "3319", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many art_exhibit events had more than 30 attendees in Paris?", + "sql_context": "CREATE TABLE art_exhibit_attendance (id INT, num_attendees INT, exhibit_location VARCHAR(50)); INSERT INTO art_exhibit_attendance (id, num_attendees, exhibit_location) VALUES (1, 25, \u0027Paris\u0027), (2, 35, \u0027Paris\u0027), (3, 45, \u0027Paris\u0027), (4, 30, \u0027Berlin\u0027), (5, 50, \u0027Berlin\u0027), (6, 20, \u0027London\u0027);", + "sql": "SELECT COUNT(*) FROM art_exhibit_attendance WHERE num_attendees \u003e 30 AND exhibit_location \u003d \u0027Paris\u0027;", + "sql_explanation": "This query counts the number of art_exhibit events that had more than 30 attendees in Paris." +}, { + "id": "3326", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many visual art classes were taught in rural areas in 2021?", + "sql_context": "CREATE TABLE classes (id INT, class_type VARCHAR(50), location VARCHAR(50), year INT); INSERT INTO classes (id, class_type, location, year) VALUES (1, \u0027Visual Art\u0027, \u0027Rural\u0027, 2021), (2, \u0027Music\u0027, \u0027Urban\u0027), (3, \u0027Visual Art\u0027, \u0027Suburban\u0027, 2022);", + "sql": "SELECT COUNT(*) FROM classes WHERE class_type \u003d \u0027Visual Art\u0027 AND location \u003d \u0027Rural\u0027 AND year \u003d 2021;", + "sql_explanation": "The query counts the number of visual art classes taught in rural areas in 2021 by filtering the \u0027classes\u0027 table for visual art classes in rural areas and in the year 2021." +}, { + "id": "3431", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of visitors who attended dance performances in the last quarter?", + "sql_context": "CREATE TABLE DancePerformances (eventID INT, visitorAge INT, visitDate DATE); INSERT INTO DancePerformances (eventID, visitorAge, visitDate) VALUES (1, 32, \u00272022-01-05\u0027), (2, 45, \u00272022-02-10\u0027), (3, 30, \u00272022-03-20\u0027);", + "sql": "SELECT AVG(visitorAge) FROM DancePerformances WHERE visitDate \u003e\u003d DATEADD(quarter, -1, GETDATE());", + "sql_explanation": "This SQL query calculates the average age of visitors by finding the mean value of the visitorAge column in the DancePerformances table. The WHERE clause filters the records to only include those where the visitDate is within the last quarter." +}, { + "id": "3557", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many total donations were made in the month of June in the \"museum_donations\" table?", + "sql_context": "CREATE TABLE museum_donations (donation_id INT, donation_amount FLOAT, donation_date DATE); INSERT INTO museum_donations (donation_id, donation_amount, donation_date) VALUES (1, 250.00, \u00272021-06-01\u0027), (2, 300.00, \u00272021-06-15\u0027), (3, 150.00, \u00272021-07-01\u0027);", + "sql": "SELECT SUM(donation_amount) FROM museum_donations WHERE EXTRACT(MONTH FROM donation_date) \u003d 6;", + "sql_explanation": "Calculate the total donation amount in June by extracting the month from the donation_date and comparing it to June (6)." +}, { + "id": "3681", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total funding_amount for art_exhibit events in 2020 and 2021?", + "sql_context": "CREATE TABLE art_exhibit_funding (id INT, funding_amount INT, event_date DATE); INSERT INTO art_exhibit_funding (id, funding_amount, event_date) VALUES (1, 10000, \u00272020-01-01\u0027), (2, 15000, \u00272021-01-01\u0027), (3, 12000, \u00272020-01-02\u0027), (4, 13000, \u00272021-01-02\u0027);", + "sql": "SELECT SUM(funding_amount) FROM art_exhibit_funding WHERE YEAR(event_date) IN (2020, 2021);", + "sql_explanation": "This query calculates the total funding_amount for art_exhibit events that took place in 2020 and 2021." +}, { + "id": "3858", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of attendees for events in the \u0027Midwest\u0027 region with an attendance of over 500?", + "sql_context": "CREATE TABLE Events (event_id INT, region VARCHAR(20), attendee_count INT); INSERT INTO Events (event_id, region, attendee_count) VALUES (1, \u0027Midwest\u0027, 600), (2, \u0027Southeast\u0027, 400), (3, \u0027Northeast\u0027, 350);", + "sql": "SELECT SUM(attendee_count) FROM Events WHERE region \u003d \u0027Midwest\u0027 AND attendee_count \u003e 500", + "sql_explanation": "Filter records in the \u0027Events\u0027 table to only include those with a region of \u0027Midwest\u0027 and an attendee count greater than 500, then calculate and return the total sum of attendee counts." +}, { + "id": "3874", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average attendance for arts and culture events in \u0027Toronto\u0027 for events with an attendance of over 1000?", + "sql_context": "CREATE TABLE Events (event_id INT, city VARCHAR(20), attendee_count INT); INSERT INTO Events (event_id, city, attendee_count) VALUES (1, \u0027Toronto\u0027, 1200), (2, \u0027Toronto\u0027, 1500), (3, \u0027Toronto\u0027, 900);", + "sql": "SELECT AVG(attendee_count) FROM Events WHERE city \u003d \u0027Toronto\u0027 AND attendee_count \u003e 1000;", + "sql_explanation": "Filter records in the \u0027Events\u0027 table to only include those with a city of \u0027Toronto\u0027 and attendee_count greater than 1000. Then calculate and return the average attendee_count." +}, { + "id": "3879", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average attendance for events in the \u0027Midwest\u0027 region with an attendance of over 400?", + "sql_context": "CREATE TABLE Events (event_id INT, region VARCHAR(20), attendee_count INT); INSERT INTO Events (event_id, region, attendee_count) VALUES (1, \u0027Midwest\u0027, 600), (2, \u0027Southeast\u0027, 400), (3, \u0027Northeast\u0027, 350);", + "sql": "SELECT AVG(attendee_count) FROM Events WHERE region \u003d \u0027Midwest\u0027 AND attendee_count \u003e 400", + "sql_explanation": "Filter records in the \u0027Events\u0027 table to only include those with a region of \u0027Midwest\u0027 and an attendee count greater than 400, then calculate and return the average attendee count." +}, { + "id": "4210", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum age of attendees who participated in the Theater Program in 2023?", + "sql_context": "CREATE TABLE Attendees (id INT PRIMARY KEY, age INT, program VARCHAR(20), year INT); INSERT INTO Attendees (id, age, program, year) VALUES (1, 25, \u0027Theater Program\u0027, 2023); INSERT INTO Attendees (id, age, program, year) VALUES (2, 40, \u0027Dance Program\u0027, 2022);", + "sql": "SELECT MIN(age) FROM Attendees WHERE program \u003d \u0027Theater Program\u0027 AND year \u003d 2023;", + "sql_explanation": "This SQL query calculates the minimum age of attendees who participated in the Theater Program in 2023 by selecting the age column and applying the MIN function, filtering the records with the WHERE clause to only include attendees of the specified program and year." +}, { + "id": "4229", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Add a new view \u0027TopDonors\u0027 that shows the names and donations of donors who have donated over 1000", + "sql_context": "CREATE TABLE Donors (ID INT PRIMARY KEY, Name TEXT, Donation INT);", + "sql": "CREATE VIEW TopDonors AS SELECT Name, Donation FROM Donors WHERE Donation \u003e 1000;", + "sql_explanation": "This creates a new view \u0027TopDonors\u0027 that shows the names and donations of donors who have donated over 1000." +}, { + "id": "5378", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the event name and its corresponding city for all events with attendance greater than 250", + "sql_context": "CREATE TABLE events (event_id INT, event_name VARCHAR(50), city VARCHAR(30), attendance INT); INSERT INTO events (event_id, event_name, city, attendance) VALUES (1, \u0027Theater Play\u0027, \u0027New York\u0027, 200), (2, \u0027Art Exhibit\u0027, \u0027Los Angeles\u0027, 300), (3, \u0027Music Festival\u0027, \u0027New York\u0027, 400);", + "sql": "SELECT event_name, city FROM events WHERE attendance \u003e 250;", + "sql_explanation": "This query filters events with attendance greater than 250 and displays the corresponding event name and city." +}, { + "id": "5385", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total funding received by the \u0027Theater\u0027 program in the \u0027Funding\u0027 table?", + "sql_context": "CREATE TABLE Funding (id INT, program VARCHAR(50), location VARCHAR(50), date DATE, amount DECIMAL(10,2)); INSERT INTO Funding (id, program, location, date, amount) VALUES (1, \u0027Theater\u0027, \u0027California\u0027, \u00272022-01-01\u0027, 10000);", + "sql": "SELECT SUM(amount) FROM Funding WHERE program \u003d \u0027Theater\u0027;", + "sql_explanation": "This query sums the \u0027amount\u0027 column for all rows in the \u0027Funding\u0027 table where the program is \u0027Theater\u0027." +}, { + "id": "5568", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of the audience members who attended the \"Dance\" event?", + "sql_context": "CREATE TABLE Audience (AudienceID INT, Age INT, Event TEXT); INSERT INTO Audience (AudienceID, Age, Event) VALUES (1, 35, \u0027Dance\u0027), (2, 28, \u0027Dance\u0027), (3, 42, \u0027Theater\u0027);", + "sql": "SELECT AVG(Age) FROM Audience WHERE Event \u003d \u0027Dance\u0027;", + "sql_explanation": "This SQL query calculates the average age of the audience members who attended the \"Dance\" event. It does this by using the AVG() function on the \"Age\" column, and filtering the data by the \"Event\" column with a WHERE clause." +}, { + "id": "5615", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum age of the audience members who attended the \"Art\" event?", + "sql_context": "CREATE TABLE Audience (AudienceID INT, Age INT, Event TEXT); INSERT INTO Audience (AudienceID, Age, Event) VALUES (1, 30, \u0027Art\u0027), (2, 25, \u0027Art\u0027), (3, 40, \u0027Art\u0027);", + "sql": "SELECT MIN(Age) FROM Audience WHERE Event \u003d \u0027Art\u0027;", + "sql_explanation": "This SQL query retrieves the minimum age of the audience members who attended the \"Art\" event. It does this by using the MIN() function on the \"Age\" column, and filtering the data by the \"Event\" column with a WHERE clause." +}, { + "id": "1299", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total value of defense contracts awarded to companies in the Asia-Pacific region, for military communication systems, in 2022?", + "sql_context": "CREATE TABLE defense_contracts (contract_id INT, contract_value FLOAT, contract_date DATE, company_region VARCHAR(255), system_type VARCHAR(255)); INSERT INTO defense_contracts (contract_id, contract_value, contract_date, company_region, system_type) VALUES (4, 800000, \u00272022-03-03\u0027, \u0027Asia-Pacific\u0027, \u0027communication\u0027), (5, 900000, \u00272022-05-05\u0027, \u0027Europe\u0027, \u0027surveillance\u0027), (6, 700000, \u00272022-11-11\u0027, \u0027Asia-Pacific\u0027, \u0027communication\u0027);", + "sql": "SELECT SUM(contract_value) FROM defense_contracts WHERE company_region \u003d \u0027Asia-Pacific\u0027 AND system_type \u003d \u0027communication\u0027 AND EXTRACT(YEAR FROM contract_date) \u003d 2022;", + "sql_explanation": "The SQL query calculates the total value of defense contracts awarded to companies in the Asia-Pacific region, for military communication systems, in 2022 by summing the contract_value column, and filtering rows by the company_region, system_type, and contract_date columns. It uses the SUM and WHERE clauses, and the EXTRACT function." +}, { + "id": "1352", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many military aircraft maintenance requests were recorded for the Air Force in Q4 2019?", + "sql_context": "CREATE TABLE maintenance_requests (request_id INT, service_branch VARCHAR(255), request_date DATE); INSERT INTO maintenance_requests (request_id, service_branch, request_date) VALUES (1, \u0027Air Force\u0027, \u00272019-10-01\u0027), (2, \u0027Navy\u0027, \u00272019-12-02\u0027), (3, \u0027Air Force\u0027, \u00272019-11-03\u0027);", + "sql": "SELECT COUNT(*) FROM maintenance_requests WHERE service_branch \u003d \u0027Air Force\u0027 AND EXTRACT(QUARTER FROM request_date) \u003d 4 AND EXTRACT(YEAR FROM request_date) \u003d 2019;", + "sql_explanation": "This query counts the number of military aircraft maintenance requests recorded for the Air Force in Q4 2019. It uses the COUNT function to count the number of records where service_branch is \u0027Air Force\u0027 and the request_date is in Q4 2019." +}, { + "id": "1374", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the defense contracts table for a contract with the US Government?", + "sql_context": "CREATE TABLE defense_contracts (id INT, contract_name VARCHAR(50), contract_value DECIMAL(10,2), contract_date DATE, contract_party VARCHAR(50)); INSERT INTO defense_contracts (id, contract_name, contract_value, contract_date, contract_party) VALUES (1, \u0027Contract A\u0027, 1000000, \u00272021-01-01\u0027, \u0027US Government\u0027), (2, \u0027Contract B\u0027, 2000000, \u00272021-02-01\u0027, \u0027Foreign Government\u0027);", + "sql": "INSERT INTO defense_contracts (id, contract_name, contract_value, contract_date, contract_party) VALUES (3, \u0027Contract C\u0027, 1500000, \u00272021-03-01\u0027, \u0027US Government\u0027);", + "sql_explanation": "This query inserts a new record into the defense contracts table for a contract with the US Government by using the INSERT INTO statement. The new record has an id of 3, a contract name of \u0027Contract C\u0027, a contract value of 1500000, a contract date of \u00272021-03-01\u0027, and a contract party of \u0027US Government\u0027." +}, { + "id": "1709", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many cybersecurity incidents were reported by defense contractors in California in 2021?", + "sql_context": "CREATE TABLE CyberIncidents (company TEXT, incident_date DATE, state TEXT); INSERT INTO CyberIncidents (company, incident_date, state) VALUES (\u0027Contractor X\u0027, \u00272021-02-01\u0027, \u0027California\u0027), (\u0027Contractor Y\u0027, \u00272021-10-15\u0027, \u0027California\u0027), (\u0027Contractor Z\u0027, \u00272021-12-30\u0027, \u0027New York\u0027);", + "sql": "SELECT COUNT(*) FROM CyberIncidents WHERE company LIKE \u0027%defense%\u0027 AND state \u003d \u0027California\u0027 AND incident_date BETWEEN \u00272021-01-01\u0027 AND \u00272021-12-31\u0027;", + "sql_explanation": "The SQL query counts the number of cybersecurity incidents reported by defense contractors in California in 2021 using the COUNT function, filtering for defense contractors, California, and the relevant date range with the WHERE and BETWEEN clauses." +}, { + "id": "1787", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the unemployment rate for veterans in California as of March 2022?", + "sql_context": "CREATE TABLE veteran_unemployment (state varchar(255), unemployment_date date, unemployment_rate decimal(5,2));", + "sql": "SELECT unemployment_rate FROM veteran_unemployment WHERE state \u003d \u0027California\u0027 AND MONTH(unemployment_date) \u003d 3 AND YEAR(unemployment_date) \u003d 2022;", + "sql_explanation": "This query retrieves the unemployment rate for veterans in California as of March 2022 by selecting the \u0027unemployment_rate\u0027 column from the \u0027veteran_unemployment\u0027 table, filtering for rows with \u0027California\u0027 in the \u0027state\u0027 column and the month 3 and year 2022 in the \u0027unemployment_date\u0027 column." +}, { + "id": "1994", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show defense contracts for \u0027Blue Skies Inc.\u0027 and \u0027Green Horizons Inc.\u0027 in Q3 2021", + "sql_context": "CREATE TABLE defense_contracts (company VARCHAR(255), quarter VARCHAR(10), value DECIMAL(10,2));", + "sql": "SELECT company, quarter, value FROM defense_contracts WHERE company IN (\u0027Blue Skies Inc.\u0027, \u0027Green Horizons Inc.\u0027) AND quarter \u003d \u0027Q3 2021\u0027;", + "sql_explanation": "This query returns defense contracts for \u0027Blue Skies Inc.\u0027 and \u0027Green Horizons Inc.\u0027 in Q3 2021 using the IN operator and the AND condition." +}, { + "id": "2094", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total value of defense contracts awarded to companies in California in Q1 2022?", + "sql_context": "CREATE TABLE defense_contracts (contract_id INT, company_name TEXT, state TEXT, value FLOAT, contract_date DATE); INSERT INTO defense_contracts (contract_id, company_name, state, value, contract_date) VALUES (1, \u0027ABC Corp\u0027, \u0027California\u0027, 500000, \u00272022-01-05\u0027), (2, \u0027XYZ Inc\u0027, \u0027California\u0027, 750000, \u00272022-03-15\u0027);", + "sql": "SELECT SUM(value) FROM defense_contracts WHERE state \u003d \u0027California\u0027 AND contract_date \u003e\u003d \u00272022-01-01\u0027 AND contract_date \u003c \u00272022-04-01\u0027;", + "sql_explanation": "This query calculates the total value of defense contracts awarded to companies in California in Q1 2022 by summing the \u0027value\u0027 column where \u0027state\u0027 is \u0027California\u0027 and \u0027contract_date\u0027 is within Q1 2022." +}, { + "id": "2275", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total value of Defense contracts awarded to women-owned businesses in 2019?", + "sql_context": "CREATE TABLE DefenseContracts (contract_id INT, contractor_name VARCHAR(255), gender VARCHAR(255), contract_date DATE, contract_value DECIMAL(10,2)); INSERT INTO DefenseContracts (contract_id, contractor_name, gender, contract_date, contract_value) VALUES (1, \u0027TechCo\u0027, \u0027Female\u0027, \u00272019-01-15\u0027, 50000), (2, \u0027GreenTech\u0027, \u0027Male\u0027, \u00272019-02-20\u0027, 80000);", + "sql": "SELECT SUM(contract_value) FROM DefenseContracts WHERE gender \u003d \u0027Female\u0027 AND contract_date BETWEEN \u00272019-01-01\u0027 AND \u00272019-12-31\u0027;", + "sql_explanation": "The SQL query calculates the total value of Defense contracts awarded to women-owned businesses in 2019 by summing the contract_value column where the gender is \u0027Female\u0027 and the contract_date is within 2019." +}, { + "id": "2300", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the military equipment table for a helicopter in the Atlantic region?", + "sql_context": "CREATE TABLE military_equipment (id INT, equipment_name VARCHAR(50), equipment_type VARCHAR(50), region VARCHAR(50)); INSERT INTO military_equipment (id, equipment_name, equipment_type, region) VALUES (1, \u0027Helicopter A\u0027, \u0027Helicopter\u0027, \u0027Atlantic\u0027), (2, \u0027Tank B\u0027, \u0027Tank\u0027, \u0027Pacific\u0027);", + "sql": "INSERT INTO military_equipment (id, equipment_name, equipment_type, region) VALUES (3, \u0027Helicopter C\u0027, \u0027Helicopter\u0027, \u0027Atlantic\u0027);", + "sql_explanation": "This query inserts a new record into the military equipment table for a helicopter in the Atlantic region by using the INSERT INTO statement. The new record has an id of 3, an equipment name of \u0027Helicopter C\u0027, an equipment type of \u0027Helicopter\u0027, and a region of \u0027Atlantic\u0027." +}, { + "id": "2364", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of military equipment maintenance requests in Germany in the last 12 months?", + "sql_context": "CREATE TABLE MaintenanceRequests (id INT, country VARCHAR(50), request_date DATE); INSERT INTO MaintenanceRequests (id, country, request_date) VALUES (1, \u0027Germany\u0027, \u00272021-01-05\u0027), (2, \u0027France\u0027, \u00272021-02-18\u0027), (3, \u0027Germany\u0027, \u00272021-03-24\u0027);", + "sql": "SELECT COUNT(*) FROM MaintenanceRequests WHERE country \u003d \u0027Germany\u0027 AND request_date \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 12 MONTH);", + "sql_explanation": "This query counts the total number of military equipment maintenance requests in Germany in the last 12 months. It filters the MaintenanceRequests table to only include rows with a country of \u0027Germany\u0027 and a request_date within the last 12 months using the WHERE clause. Then, it counts the number of rows using the COUNT function." +}, { + "id": "2389", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average veteran unemployment rate for the last 12 months, rounded to the nearest integer?", + "sql_context": "CREATE TABLE veteran_unemployment (unemployment_rate FLOAT, report_date DATE); INSERT INTO veteran_unemployment (unemployment_rate, report_date) VALUES (4.1, \u00272021-12-01\u0027), (4.3, \u00272021-11-01\u0027), (4.5, \u00272021-10-01\u0027);", + "sql": "SELECT ROUND(AVG(unemployment_rate)) FROM veteran_unemployment WHERE report_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 12 MONTH);", + "sql_explanation": "This query calculates the average veteran unemployment rate for the last 12 months, rounded to the nearest integer, by selecting the round of average of unemployment_rate from the veteran_unemployment table where the report_date is within the last 12 months." +}, { + "id": "2504", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which military equipment has not been maintained for at least six months and is located in Europe?", + "sql_context": "CREATE TABLE Equipment (id INT, name VARCHAR(255), type VARCHAR(255), last_maintenance DATE, location VARCHAR(255)); INSERT INTO Equipment (id, name, type, last_maintenance, location) VALUES (1, \u0027Tank A\u0027, \u0027Armor\u0027, \u00272021-09-01\u0027, \u0027North America\u0027); INSERT INTO Equipment (id, name, type, last_maintenance, location) VALUES (2, \u0027Helicopter X\u0027, \u0027Air Support\u0027, \u00272021-08-15\u0027, \u0027Europe\u0027);", + "sql": "SELECT name, type, location FROM Equipment WHERE last_maintenance \u003c DATEADD(month, -6, GETDATE()) AND location \u003d \u0027Europe\u0027;", + "sql_explanation": "This query selects the name, type, and location of equipment that hasn\u0027t been maintained in the last 6 months and is located in Europe." +}, { + "id": "2517", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the \u0027veteran_employment\u0027 table for a veteran hired as a \u0027data analyst\u0027 on 2022-02-20 with a salary of $70,000", + "sql_context": "CREATE TABLE veteran_employment (veteran_id INT, job_title VARCHAR(30), hire_date DATE, salary FLOAT);", + "sql": "INSERT INTO veteran_employment (veteran_id, job_title, hire_date, salary) VALUES (2, \u0027data analyst\u0027, \u00272022-02-20\u0027, 70000);", + "sql_explanation": "This query inserts a new record into the \u0027veteran_employment\u0027 table for a veteran hired as a \u0027data analyst\u0027 on February 20th, 2022 with a salary of $70,000." +}, { + "id": "2641", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the employment rate for veterans in the defense industry in New York as of 2020?", + "sql_context": "CREATE TABLE EmploymentStats (state VARCHAR(255), year INT, industry VARCHAR(255), veteran_employment_rate FLOAT); INSERT INTO EmploymentStats (state, year, industry, veteran_employment_rate) VALUES (\u0027California\u0027, 2021, \u0027Defense\u0027, 0.15), (\u0027New York\u0027, 2020, \u0027Defense\u0027, 0.13);", + "sql": "SELECT veteran_employment_rate FROM EmploymentStats WHERE state \u003d \u0027New York\u0027 AND year \u003d 2020 AND industry \u003d \u0027Defense\u0027;", + "sql_explanation": "This query selects the veteran employment rate from the EmploymentStats table where the state is New York, the year is 2020, and the industry is \u0027Defense\u0027." +}, { + "id": "2666", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of defense contracts awarded in H1 2020 to companies in Texas?", + "sql_context": "CREATE TABLE DefenseContracts (ContractID INT, CompanyName TEXT, State TEXT, ContractDate DATE); INSERT INTO DefenseContracts (ContractID, CompanyName, State, ContractDate) VALUES (1, \u0027ABC Corporation\u0027, \u0027Texas\u0027, \u00272020-01-10\u0027), (2, \u0027XYZ Incorporated\u0027, \u0027California\u0027, \u00272020-02-15\u0027), (3, \u0027DEF Enterprises\u0027, \u0027Texas\u0027, \u00272020-03-20\u0027), (4, \u0027LMN Industries\u0027, \u0027New York\u0027, \u00272020-04-25\u0027);", + "sql": "SELECT ContractID FROM DefenseContracts WHERE State \u003d \u0027Texas\u0027 AND ContractDate BETWEEN \u00272020-01-01\u0027 AND \u00272020-06-30\u0027;", + "sql_explanation": "This SQL query selects ContractID column from the DefenseContracts table, filters the records for companies in Texas and contracts awarded between January 1, 2020, and June 30, 2020, to find the number of defense contracts awarded in H1 2020 to companies in Texas." +}, { + "id": "2818", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new threat intelligence metrics into the threat_intelligence table with IDs 4 and 5, names \u0027Physical Attacks\u0027 and \u0027Insider Threats\u0027, and values 20 and 30", + "sql_context": "CREATE TABLE threat_intelligence (id INT, name VARCHAR(50), value INT);", + "sql": "INSERT INTO threat_intelligence (id, name, value) VALUES (4, \u0027Physical Attacks\u0027, 20), (5, \u0027Insider Threats\u0027, 30);", + "sql_explanation": "This query inserts two new records into the threat_intelligence table with IDs 4 and 5, names \u0027Physical Attacks\u0027 and \u0027Insider Threats\u0027, and values 20 and 30." +}, { + "id": "3030", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of military equipment in service in California as of Q2 2022?", + "sql_context": "CREATE TABLE Equipment_Inventory (inventory_id INT, equipment_type TEXT, state TEXT, in_service INT, quarter INT, year INT); INSERT INTO Equipment_Inventory (inventory_id, equipment_type, state, in_service, quarter, year) VALUES (1, \u0027Helicopter\u0027, \u0027California\u0027, 50, 2, 2022), (2, \u0027Tank\u0027, \u0027California\u0027, 30, 2, 2022);", + "sql": "SELECT SUM(in_service) FROM Equipment_Inventory WHERE state \u003d \u0027California\u0027 AND quarter \u003d 2 AND year \u003d 2022;", + "sql_explanation": "The SQL query calculates the total number of military equipment in service in California as of Q2 2022 by summing the in_service where the state is California, quarter is 2 and year is 2022." +}, { + "id": "3128", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all equipment maintenance records for the month of June 2021", + "sql_context": "CREATE TABLE equipment_maintenance (equipment_id int, maintenance_date date, maintenance_type varchar(255), maintenance_cost int);", + "sql": "SELECT * FROM equipment_maintenance WHERE MONTH(maintenance_date) \u003d 6 AND YEAR(maintenance_date) \u003d 2021;", + "sql_explanation": "This query retrieves all equipment maintenance records for June 2021 by selecting all columns from the \u0027equipment_maintenance\u0027 table, filtering for rows with the month 6 and year 2021 in the \u0027maintenance_date\u0027 column." +}, { + "id": "3273", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 5 defense contractors by awarded contract value?", + "sql_context": "CREATE TABLE defense_contractors (contractor_id INT, contractor_name VARCHAR(255), contract_value FLOAT); INSERT INTO defense_contractors (contractor_id, contractor_name, contract_value) VALUES (1, \u0027Lockheed Martin\u0027, 52000000000), (2, \u0027Boeing\u0027, 41000000000), (3, \u0027Raytheon Technologies\u0027, 28000000000), (4, \u0027Northrop Grumman\u0027, 27000000000), (5, \u0027General Dynamics\u0027, 25000000000);", + "sql": "SELECT contractor_name, contract_value FROM defense_contractors ORDER BY contract_value DESC LIMIT 5;", + "sql_explanation": "This query orders the defense contractors by contract value in descending order and limits the results to the top 5, returning the names and contract values of the top 5 defense contractors by awarded contract value." +}, { + "id": "3282", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of threat intelligence reports created in Q1 2022", + "sql_context": "CREATE TABLE threat_intelligence (report_id int, report_date date, report_type varchar(255));", + "sql": "SELECT COUNT(*) FROM threat_intelligence WHERE QUARTER(report_date) \u003d 1 AND YEAR(report_date) \u003d 2022;", + "sql_explanation": "This query counts the number of threat intelligence reports created in Q1 2022 by counting all rows in the \u0027threat_intelligence\u0027 table, filtering for rows with Q1 and year 2022 in the \u0027report_date\u0027 column." +}, { + "id": "3373", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average threat level for the last 30 days", + "sql_context": "CREATE TABLE threat_intelligence (id INT, threat_level INT, threat_date DATE); INSERT INTO threat_intelligence (id, threat_level, threat_date) VALUES (1, 5, \u00272022-01-01\u0027), (2, 3, \u00272022-01-02\u0027), (3, 7, \u00272022-01-03\u0027);", + "sql": "SELECT AVG(threat_level) FROM threat_intelligence WHERE threat_date \u003e\u003d CURDATE() - INTERVAL 30 DAY;", + "sql_explanation": "This SQL query calculates the average threat level for the last 30 days by selecting the threat level for all records with a threat date within the last 30 days and calculating the average of those values." +}, { + "id": "3427", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of military equipment maintenance requests in Q1 2022?", + "sql_context": "CREATE TABLE MaintenanceRequests (RequestID INT, RequestDate DATE); INSERT INTO MaintenanceRequests (RequestID, RequestDate) VALUES (1, \u00272022-01-05\u0027), (2, \u00272022-02-12\u0027), (3, \u00272022-03-20\u0027), (4, \u00272022-04-25\u0027), (5, \u00272022-05-10\u0027);", + "sql": "SELECT COUNT(*) FROM MaintenanceRequests WHERE RequestDate BETWEEN \u00272022-01-01\u0027 AND \u00272022-03-31\u0027;", + "sql_explanation": "This SQL query counts all records in the MaintenanceRequests table for Q1 2022 to find the total number of military equipment maintenance requests in Q1 2022." +}, { + "id": "3781", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the defense contractors in California with contracts worth over 1 million", + "sql_context": "CREATE TABLE defense_contracts (contract_id INT, company_name TEXT, state TEXT, value FLOAT); INSERT INTO defense_contracts (contract_id, company_name, state, value) VALUES (1, \u0027XYZ Corp\u0027, \u0027California\u0027, 1500000), (2, \u0027ABC Inc\u0027, \u0027Texas\u0027, 500000), (3, \u0027DEF Ltd\u0027, \u0027California\u0027, 800000);", + "sql": "SELECT company_name FROM defense_contracts WHERE state \u003d \u0027California\u0027 AND value \u003e 1000000;", + "sql_explanation": "* Queries the defense_contracts table for records with state\u003d\u0027California\u0027 and value greater than 1 million, returning the company_name column." +}, { + "id": "3824", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many veterans are employed in the defense industry in Texas?", + "sql_context": "CREATE TABLE veteran_employment (industry VARCHAR(255), state VARCHAR(255), employment NUMERIC); INSERT INTO veteran_employment (industry, state, employment) VALUES (\u0027Defense\u0027, \u0027Texas\u0027, 15000), (\u0027Aerospace\u0027, \u0027Texas\u0027, 10000);", + "sql": "SELECT employment FROM veteran_employment WHERE industry \u003d \u0027Defense\u0027 AND state \u003d \u0027Texas\u0027;", + "sql_explanation": "This SQL query determines the number of veterans employed in the defense industry in Texas by selecting the \u0027employment\u0027 column where the \u0027industry\u0027 is \u0027Defense\u0027 and the \u0027state\u0027 is \u0027Texas\u0027." +}, { + "id": "3997", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total value of defense contracts awarded to companies in California in Q1 2020?", + "sql_context": "CREATE TABLE defense_contracts (contract_id INT, company_name VARCHAR(255), contract_value DECIMAL(10,2), state VARCHAR(2), quarter INT);", + "sql": "SELECT SUM(contract_value) FROM defense_contracts WHERE state \u003d \u0027CA\u0027 AND quarter \u003d 1;", + "sql_explanation": "This query calculates the total value of defense contracts by summing the contract_value column, filtering for contracts awarded to companies in California (CA) and in Q1 (quarter 1)." +}, { + "id": "4144", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total value of military equipment maintenance contracts in Texas since 2020?", + "sql_context": "CREATE TABLE maintenance (id INT, location VARCHAR(255), date DATE, value FLOAT); INSERT INTO maintenance (id, location, date, value) VALUES (1, \u0027Texas\u0027, \u00272020-01-01\u0027, 5000), (2, \u0027California\u0027, \u00272021-01-01\u0027, 7000);", + "sql": "SELECT SUM(value) FROM maintenance WHERE location \u003d \u0027Texas\u0027 AND year(date) \u003e\u003d 2020;", + "sql_explanation": "This query sums the value of all maintenance contracts in Texas since 2020." +}, { + "id": "4152", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add a new vendor named \u0027XYZ Defense\u0027 to the vendor_contracts table with a vendor_id of 123", + "sql_context": "CREATE TABLE vendor_contracts (vendor_id INT, vendor_name VARCHAR(50), contract_id INT, contract_value DECIMAL(10,2));", + "sql": "INSERT INTO vendor_contracts (vendor_id, vendor_name) VALUES (123, \u0027XYZ Defense\u0027);", + "sql_explanation": "This query inserts a new record into the vendor_contracts table for a vendor named \u0027XYZ Defense\u0027 with a vendor_id of 123. The contract_id and contract_value fields are left NULL." +}, { + "id": "4160", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the threat level of all records in the Middle East region to \u0027High\u0027?", + "sql_context": "CREATE TABLE threat_intelligence (id INT, threat_type VARCHAR(50), threat_level VARCHAR(50), region VARCHAR(50)); INSERT INTO threat_intelligence (id, threat_type, threat_level, region) VALUES (1, \u0027Cyber\u0027, \u0027Medium\u0027, \u0027Asia\u0027), (2, \u0027Physical\u0027, \u0027Low\u0027, \u0027Middle East\u0027);", + "sql": "UPDATE threat_intelligence SET threat_level \u003d \u0027High\u0027 WHERE region \u003d \u0027Middle East\u0027;", + "sql_explanation": "This query updates the threat level of all records in the Middle East region to \u0027High\u0027 by using the UPDATE statement with a WHERE clause. The WHERE clause checks if the region is \u0027Middle East\u0027." +}, { + "id": "4330", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average cybersecurity contract value?", + "sql_context": "CREATE TABLE contracts (id INT, category VARCHAR(255), value DECIMAL(10,2));INSERT INTO contracts (id, category, value) VALUES (1, \u0027Aircraft\u0027, 5000000.00), (2, \u0027Missiles\u0027, 2000000.00), (3, \u0027Shipbuilding\u0027, 8000000.00), (4, \u0027Cybersecurity\u0027, 3000000.00), (5, \u0027Aircraft\u0027, 6000000.00), (6, \u0027Shipbuilding\u0027, 9000000.00);", + "sql": "SELECT AVG(value) as avg_value FROM contracts WHERE category \u003d \u0027Cybersecurity\u0027;", + "sql_explanation": "The SQL query calculates the average contract value for the \u0027Cybersecurity\u0027 category. It uses the WHERE clause to filter records where the \u0027category\u0027 column is equal to \u0027Cybersecurity\u0027, and the AVG function to find the average \u0027value\u0027 for those records." +}, { + "id": "4388", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum maintenance cost for military helicopters?", + "sql_context": "CREATE TABLE helicopters (id INT, model VARCHAR(50), maintenance_cost FLOAT); INSERT INTO helicopters (id, model, maintenance_cost) VALUES (1, \u0027AH-64\u0027, 45000), (2, \u0027CH-47\u0027, 50000), (3, \u0027UH-60\u0027, 38000);", + "sql": "SELECT MAX(maintenance_cost) FROM helicopters WHERE model LIKE \u0027%helicopter%\u0027;", + "sql_explanation": "The SQL query calculates the maximum maintenance cost for military helicopters by using the MAX() function on the maintenance_cost column and filtering on the model using the LIKE keyword." +}, { + "id": "4512", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average time to resolve low-priority threat intelligence incidents?", + "sql_context": "CREATE TABLE threat_intelligence (incident_id INT, priority VARCHAR(255), resolution_time FLOAT, date DATE); INSERT INTO threat_intelligence (incident_id, priority, resolution_time, date) VALUES (1, \u0027high\u0027, 5.3, \u00272020-01-01\u0027); INSERT INTO threat_intelligence (incident_id, priority, resolution_time, date) VALUES (2, \u0027low\u0027, 2.1, \u00272020-01-05\u0027);", + "sql": "SELECT AVG(resolution_time) FROM threat_intelligence WHERE priority \u003d \u0027low\u0027;", + "sql_explanation": "This SQL query calculates the average time to resolve low-priority threat intelligence incidents by averaging the \u0027resolution_time\u0027 column where the \u0027priority\u0027 is \u0027low\u0027." +}, { + "id": "4526", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average maintenance cost for military equipment in the Pacific region?", + "sql_context": "CREATE TABLE MilitaryEquipment (Id INT, EquipmentName VARCHAR(50), MaintenanceCost DECIMAL(10,2), Region VARCHAR(50)); INSERT INTO MilitaryEquipment (Id, EquipmentName, MaintenanceCost, Region) VALUES (1, \u0027Tank\u0027, 5000, \u0027Pacific\u0027), (2, \u0027Helicopter\u0027, 8000, \u0027Europe\u0027);", + "sql": "SELECT AVG(MaintenanceCost) FROM MilitaryEquipment WHERE Region \u003d \u0027Pacific\u0027;", + "sql_explanation": "This SQL query calculates the average maintenance cost for military equipment in the Pacific region. It uses the AVG function to find the mean value of the MaintenanceCost column, where the Region column is equal to \u0027Pacific\u0027." +}, { + "id": "4581", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of defense contracts awarded to Raytheon in 2019?", + "sql_context": "CREATE TABLE contract (id INT, company VARCHAR(255), value FLOAT, year INT); INSERT INTO contract (id, company, value, year) VALUES (1, \u0027Raytheon\u0027, 12000000, 2019), (2, \u0027Raytheon\u0027, 18000000, 2018);", + "sql": "SELECT SUM(value) FROM contract WHERE company \u003d \u0027Raytheon\u0027 AND year \u003d 2019;", + "sql_explanation": "This query sums the value of all contracts awarded to Raytheon in the year 2019." +}, { + "id": "4624", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total amount spent on defense contracts in Q1 of 2019?", + "sql_context": "CREATE TABLE Defense_Contracts (ID INT, Quarter VARCHAR(50), Year INT, Amount INT); INSERT INTO Defense_Contracts (ID, Quarter, Year, Amount) VALUES (1, \u0027Q1\u0027, 2017, 1500000), (2, \u0027Q1\u0027, 2019, 2000000), (3, \u0027Q2\u0027, 2018, 1750000);", + "sql": "SELECT Amount FROM Defense_Contracts WHERE Quarter \u003d \u0027Q1\u0027 AND Year \u003d 2019;", + "sql_explanation": "The SQL query filters the Defense_Contracts table by Quarter (\u0027Q1\u0027) and Year (2019), and returns the Amount spent on defense contracts in Q1 of 2019." +}, { + "id": "4740", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total value of defense contracts awarded to companies in Texas", + "sql_context": "CREATE TABLE defense_contracts (contract_id INT, company_name VARCHAR(100), state VARCHAR(50), contract_value FLOAT);", + "sql": "SELECT SUM(contract_value) FROM defense_contracts WHERE state \u003d \u0027Texas\u0027;", + "sql_explanation": "This query calculates the total value of defense contracts awarded to companies in Texas. It does this by summing up the contract_value column, but only for rows where the state column is equal to \u0027Texas\u0027." +}, { + "id": "4912", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of military personnel employed by a defense contractor in India?", + "sql_context": "CREATE TABLE MilitaryPersonnel (id INT, contractor VARCHAR(50), country VARCHAR(50), personnel INT); INSERT INTO MilitaryPersonnel (id, contractor, country, personnel) VALUES (1, \u0027Larsen \u0026 Toubro\u0027, \u0027India\u0027, 10000), (2, \u0027Tata Advanced Systems\u0027, \u0027India\u0027, 8000), (3, \u0027Bharat Electronics\u0027, \u0027India\u0027, 9000);", + "sql": "SELECT MAX(personnel) FROM MilitaryPersonnel WHERE country \u003d \u0027India\u0027;", + "sql_explanation": "This query calculates the maximum number of military personnel employed by a defense contractor in India. It filters the MilitaryPersonnel table to only include rows with a country of \u0027India\u0027 using the WHERE clause. Then, it calculates the maximum value of the personnel column using the MAX function." +}, { + "id": "4947", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of contracts awarded to Women-Owned Small Businesses (WOSB) in the Defense industry", + "sql_context": "CREATE TABLE defense_contracts (contract_id INT, contract_value FLOAT, business_type VARCHAR(20)); INSERT INTO defense_contracts (contract_id, contract_value, business_type) VALUES (1, 500000, \u0027WOSB\u0027);", + "sql": "SELECT COUNT(*) FROM defense_contracts WHERE business_type \u003d \u0027WOSB\u0027;", + "sql_explanation": "This query counts the number of records in the defense_contracts table where the business_type is \u0027WOSB\u0027, effectively finding the number of contracts awarded to Women-Owned Small Businesses in the Defense industry." +}, { + "id": "5179", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average cost of military equipment maintenance in the US?", + "sql_context": "CREATE TABLE military_equipment (id INT, country VARCHAR(50), cost FLOAT); INSERT INTO military_equipment (id, country, cost) VALUES (1, \u0027USA\u0027, 1500000); INSERT INTO military_equipment (id, country, cost) VALUES (2, \u0027USA\u0027, 1800000);", + "sql": "SELECT AVG(cost) FROM military_equipment WHERE country \u003d \u0027USA\u0027;", + "sql_explanation": "The SQL query calculates the average cost of military equipment maintenance in the USA by selecting the cost column and applying the AVG function for rows with the country set to \u0027USA\u0027." +}, { + "id": "5467", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "Select all fighter aircraft from \u0027military_equipment\u0027 table", + "sql_context": "CREATE TABLE military_equipment (id INT PRIMARY KEY, name VARCHAR(255), type VARCHAR(255), manufacturer VARCHAR(255), year INT, country VARCHAR(255)); INSERT INTO military_equipment (id, name, type, manufacturer, year, country) VALUES (1, \u0027M1 Abrams\u0027, \u0027Tank\u0027, \u0027General Dynamics\u0027, 1980, \u0027USA\u0027), (2, \u0027F-15 Eagle\u0027, \u0027Fighter\u0027, \u0027McDonnell Douglas\u0027, 1976, \u0027USA\u0027);", + "sql": "SELECT * FROM military_equipment WHERE type \u003d \u0027Fighter\u0027;", + "sql_explanation": "This selects all records from the \u0027military_equipment\u0027 table where the type is \u0027Fighter\u0027, returning information about the F-15 Eagle." +}, { + "id": "5721", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average maintenance cost for military aircraft?", + "sql_context": "CREATE TABLE aircraft (id INT, model VARCHAR(50), maintenance_cost FLOAT); INSERT INTO aircraft (id, model, maintenance_cost) VALUES (1, \u0027F-16\u0027, 35000), (2, \u0027F-35\u0027, 42000), (3, \u0027A-10\u0027, 28000);", + "sql": "SELECT AVG(maintenance_cost) FROM aircraft;", + "sql_explanation": "The SQL query calculates the average maintenance cost for all military aircraft by using the AVG() function on the maintenance_cost column." +}, { + "id": "1402", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of intelligence operations conducted by the Russian government in the last 10 years?", + "sql_context": "CREATE TABLE IntelligenceOperations (id INT, country VARCHAR(255), operation VARCHAR(255), operation_date DATE); INSERT INTO IntelligenceOperations (id, country, operation, operation_date) VALUES (1, \u0027Russian Federation\u0027, \u0027Operation Grom\u0027, \u00272012-01-01\u0027); INSERT INTO IntelligenceOperations (id, country, operation, operation_date) VALUES (2, \u0027Russian Federation\u0027, \u0027Operation Parnas\u0027, \u00272019-01-01\u0027);", + "sql": "SELECT COUNT(*) as Total_Operations FROM IntelligenceOperations WHERE country \u003d \u0027Russian Federation\u0027 AND operation_date \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 10 YEAR);", + "sql_explanation": "This SQL query calculates the total number of intelligence operations conducted by the Russian government in the last 10 years. It does this by selecting the count of all rows in the \u0027IntelligenceOperations\u0027 table and filtering the results based on the \u0027country\u0027 and \u0027operation_date\u0027 columns." +}, { + "id": "1441", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many cybersecurity vulnerabilities were discovered in the last 6 months by the Canadian Cybersecurity Agency?", + "sql_context": "CREATE TABLE CybersecurityVulnerabilities (ID INT, DiscoveryDate DATE, Agency TEXT); INSERT INTO CybersecurityVulnerabilities (ID, DiscoveryDate, Agency) VALUES (1, \u00272022-04-01\u0027, \u0027Canadian Cybersecurity Agency\u0027), (2, \u00272022-03-15\u0027, \u0027US Cybersecurity Agency\u0027), (3, \u00272022-02-01\u0027, \u0027Canadian Cybersecurity Agency\u0027);", + "sql": "SELECT COUNT(*) FROM CybersecurityVulnerabilities WHERE DiscoveryDate \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 6 MONTH) AND Agency \u003d \u0027Canadian Cybersecurity Agency\u0027;", + "sql_explanation": "The query calculates the number of cybersecurity vulnerabilities discovered in the last 6 months by the Canadian Cybersecurity Agency by selecting the count of all records in the CybersecurityVulnerabilities table that have a DiscoveryDate within the last 6 months and an Agency set to \u0027Canadian Cybersecurity Agency\u0027." +}, { + "id": "2878", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Provide the names and locations of all national security agencies in Africa", + "sql_context": "CREATE TABLE national_security_agencies (id INT, name TEXT, location TEXT); INSERT INTO national_security_agencies (id, name, location) VALUES (1, \u0027State Security Agency\u0027, \u0027South Africa\u0027), (2, \u0027National Intelligence and Security Service\u0027, \u0027Algeria\u0027), (3, \u0027National Security Agency\u0027, \u0027Nigeria\u0027);", + "sql": "SELECT name, location FROM national_security_agencies WHERE location IN (\u0027South Africa\u0027, \u0027Algeria\u0027, \u0027Nigeria\u0027);", + "sql_explanation": "This query lists the names and locations of all national security agencies in Africa by selecting the name and location of all records in the national_security_agencies table where the location is either South Africa, Algeria or Nigeria." +}, { + "id": "2908", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all cybersecurity incidents with a severity level above 7 in 2019, ordered by date.", + "sql_context": "CREATE TABLE cybersecurity_incidents(incident_id INT, incident_date DATE, severity INT); INSERT INTO cybersecurity_incidents(incident_id, incident_date, severity) VALUES (1, \u00272019-01-01\u0027, 8), (2, \u00272019-05-15\u0027, 5), (3, \u00272019-12-31\u0027, 9);", + "sql": "SELECT * FROM cybersecurity_incidents WHERE YEAR(incident_date) \u003d 2019 AND severity \u003e 7 ORDER BY incident_date;", + "sql_explanation": "Retrieve all cybersecurity incidents with a severity level greater than 7 in 2019, ordered by date, by querying the cybersecurity_incidents table." +}, { + "id": "2945", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the number of intelligence operations conducted in the first quarter of 2022.", + "sql_context": "CREATE TABLE intelligence_ops (id INT, operation_date DATE); INSERT INTO intelligence_ops (id, operation_date) VALUES (1, \u00272022-01-01\u0027); INSERT INTO intelligence_ops (id, operation_date) VALUES (2, \u00272022-02-03\u0027);", + "sql": "SELECT COUNT(*) FROM intelligence_ops WHERE operation_date \u003e\u003d \u00272022-01-01\u0027 AND operation_date \u003c\u003d \u00272022-03-31\u0027;", + "sql_explanation": "This query displays the number of intelligence operations conducted in the first quarter of 2022 by selecting all records with an \u0027operation_date\u0027 within the first quarter of 2022 and then calculating the count of those records." +}, { + "id": "3456", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of military technology programs in the North American region, excluding programs with a budget above $1 billion?", + "sql_context": "CREATE TABLE MilitaryPrograms (region VARCHAR(255), program VARCHAR(255), budget DECIMAL(10,2)); INSERT INTO MilitaryPrograms (region, program, budget) VALUES (\u0027North America\u0027, \u0027ProgramA\u0027, 400000000.00), (\u0027North America\u0027, \u0027ProgramB\u0027, 600000000.00), (\u0027North America\u0027, \u0027ProgramC\u0027, 50000000.00), (\u0027Asia\u0027, \u0027ProgramD\u0027, 300000000.00), (\u0027Asia\u0027, \u0027ProgramE\u0027, 200000000.00);", + "sql": "SELECT AVG(budget) FROM MilitaryPrograms WHERE region \u003d \u0027North America\u0027 AND budget \u003c 1000000000;", + "sql_explanation": "This query calculates the average number of military technology programs in the North American region, excluding programs with a budget above $1 billion. It does this by using the AVG function to calculate the average value of the budget column, while also using a WHERE clause to filter for rows where the region is \u0027North America\u0027 and the budget is less than $1 billion." +}, { + "id": "3618", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average budget of military technology programs in the Asia-Pacific region, excluding programs with a budget over $500 million?", + "sql_context": "CREATE TABLE MilitaryBudget (region VARCHAR(255), program VARCHAR(255), budget DECIMAL(10,2)); INSERT INTO MilitaryBudget (region, program, budget) VALUES (\u0027Asia-Pacific\u0027, \u0027ProgramA\u0027, 400000000.00), (\u0027Asia-Pacific\u0027, \u0027ProgramB\u0027, 600000000.00), (\u0027Europe\u0027, \u0027ProgramC\u0027, 300000000.00);", + "sql": "SELECT AVG(budget) FROM MilitaryBudget WHERE region \u003d \u0027Asia-Pacific\u0027 AND budget \u003c\u003d 500000000;", + "sql_explanation": "This query calculates the average budget of military technology programs in the Asia-Pacific region, excluding programs with a budget over $500 million. It does this by using the AVG function to find the average of the budget column, while also using a WHERE clause to filter for rows where the region is \u0027Asia-Pacific\u0027 and the budget is less than or equal to $500 million." +}, { + "id": "3794", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "Show the intelligence operations that were conducted by the USA in the last decade.", + "sql_context": "CREATE TABLE intelligence_operations (id INT, operation_name VARCHAR(255), country VARCHAR(255), start_date DATE, end_date DATE);INSERT INTO intelligence_operations (id, operation_name, country, start_date, end_date) VALUES (1, \u0027Operation Joint\u0027, \u0027USA\u0027, \u00272011-01-01\u0027, \u00272011-12-31\u0027), (2, \u0027Operation Coalition\u0027, \u0027USA\u0027, \u00272015-01-01\u0027, \u00272015-12-31\u0027);", + "sql": "SELECT * FROM intelligence_operations WHERE country \u003d \u0027USA\u0027 AND YEAR(start_date) \u003e\u003d 2010;", + "sql_explanation": "This query returns all the intelligence operations that were conducted by the USA in the last decade by using the YEAR() function to filter the start_date and the country filter." +}, { + "id": "4064", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total cost of military technology developed in the \u0027military_technology\u0027 table for the year 2019?", + "sql_context": "CREATE TABLE military_technology (id INT, technology_name TEXT, type TEXT, development_cost FLOAT, development_year INT); INSERT INTO military_technology (id, technology_name, type, development_cost, development_year) VALUES (1, \u0027Stealth Bomber\u0027, \u0027Aircraft\u0027, 50000000, 2019), (2, \u0027Submarine\u0027, \u0027Naval\u0027, 300000000, 2018), (3, \u0027Cybersecurity Software\u0027, \u0027Software\u0027, 5000000, 2019);", + "sql": "SELECT SUM(development_cost) FROM military_technology WHERE development_year \u003d 2019;", + "sql_explanation": "This query calculates the total cost of military technology developed in the year 2019 by summing up all the \u0027development_cost\u0027 values in the \u0027military_technology\u0027 table where the \u0027development_year\u0027 column is equal to 2019." +}, { + "id": "4127", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which intelligence operations were conducted before the year 2000 in the \u0027intelligence_agency\u0027 table?", + "sql_context": "CREATE TABLE intelligence_agency (agency VARCHAR(50), operation_name VARCHAR(50), operation_year INT); INSERT INTO intelligence_agency (agency, operation_name, operation_year) VALUES (\u0027CIA\u0027, \u0027Operation Bluebird\u0027, 1950), (\u0027CIA\u0027, \u0027Operation Mockingbird\u0027, 1960), (\u0027MI6\u0027, \u0027Operation Gold\u0027, 1956), (\u0027MI6\u0027, \u0027Operation Silver\u0027, 1962), (\u0027NSA\u0027, \u0027Operation Shamrock\u0027, 1945);", + "sql": "SELECT agency, operation_name FROM intelligence_agency WHERE operation_year \u003c 2000;", + "sql_explanation": "This query retrieves the names of the intelligence operations that were conducted before the year 2000 from the \u0027intelligence_agency\u0027 table by filtering the records based on the \u0027operation_year\u0027 column." +}, { + "id": "4134", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of military personnel in the \u0027Air Force\u0027 and \u0027Navy\u0027 branches?", + "sql_context": "CREATE TABLE MilitaryPersonnel (ID INT, Branch VARCHAR(10), Personnel INT); INSERT INTO MilitaryPersonnel (ID, Branch, Personnel) VALUES (1, \u0027Army\u0027, 500000), (2, \u0027Navy\u0027, 350000), (3, \u0027Air Force\u0027, 400000);", + "sql": "SELECT SUM(Personnel) FROM MilitaryPersonnel WHERE Branch IN (\u0027Air Force\u0027, \u0027Navy\u0027);", + "sql_explanation": "This query calculates the total number of personnel in the \u0027Air Force\u0027 and \u0027Navy\u0027 branches by summing the \u0027Personnel\u0027 column values where the \u0027Branch\u0027 is either \u0027Air Force\u0027 or \u0027Navy\u0027." +}, { + "id": "4186", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of personnel that can be deployed by each military technology?", + "sql_context": "CREATE TABLE Military_Technologies (Name VARCHAR(255), Max_Personnel INT); INSERT INTO Military_Technologies (Name, Max_Personnel) VALUES (\u0027M1 Abrams\u0027, 4), (\u0027AH-64 Apache\u0027, 2), (\u0027M2 Bradley\u0027, 3);", + "sql": "SELECT Name, Max_Personnel FROM Military_Technologies ORDER BY Max_Personnel DESC;", + "sql_explanation": "The SQL query sorts the Military_Technologies table by the maximum number of personnel in descending order and returns all the rows that meet this condition." +}, { + "id": "4381", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 3 countries with the most military bases?", + "sql_context": "CREATE TABLE military_bases (id INT, country VARCHAR(255), num_bases INT); INSERT INTO military_bases (id, country, num_bases) VALUES (1, \u0027United States\u0027, 800), (2, \u0027China\u0027, 600), (3, \u0027Russia\u0027, 500), (4, \u0027United Kingdom\u0027, 300);", + "sql": "SELECT country, num_bases FROM military_bases ORDER BY num_bases DESC LIMIT 3;", + "sql_explanation": "The SQL query selects the top 3 countries with the most military bases, ordering them by the number of bases in descending order." +}, { + "id": "4453", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all military exercises that took place in the Arctic region since 2017.", + "sql_context": "CREATE TABLE military_exercises (id INT, name TEXT, region TEXT, year INT);INSERT INTO military_exercises (id, name, region, year) VALUES (1, \u0027Arctic Edge\u0027, \u0027Arctic\u0027, 2017);INSERT INTO military_exercises (id, name, region, year) VALUES (2, \u0027Northern Edge\u0027, \u0027Arctic\u0027, 2019);", + "sql": "SELECT name FROM military_exercises WHERE region \u003d \u0027Arctic\u0027 AND year \u003e\u003d 2017;", + "sql_explanation": "This query lists all military exercises that took place in the Arctic region since 2017 by filtering the military_exercises table for the Arctic region and year greater than or equal to 2017 and then returning the name column." +}, { + "id": "4603", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of military vehicles produced by companies based in the Asia-Pacific region in the \u0027military_vehicles\u0027 table?", + "sql_context": "CREATE TABLE military_vehicles (company VARCHAR(50), region VARCHAR(50), production_year INT, quantity INT); INSERT INTO military_vehicles (company, region, production_year, quantity) VALUES (\u0027Company A\u0027, \u0027Asia-Pacific\u0027, 2010, 500), (\u0027Company B\u0027, \u0027Asia-Pacific\u0027, 2015, 700), (\u0027Company C\u0027, \u0027Europe\u0027, 2012, 600), (\u0027Company D\u0027, \u0027Americas\u0027, 2018, 800);", + "sql": "SELECT SUM(quantity) FROM military_vehicles WHERE region \u003d \u0027Asia-Pacific\u0027;", + "sql_explanation": "This query calculates the total number of military vehicles produced by companies based in the Asia-Pacific region by summing the \u0027quantity\u0027 column for records that have a \u0027region\u0027 value of \u0027Asia-Pacific\u0027." +}, { + "id": "4625", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average implementation year of cybersecurity strategies in the \u0027cybersecurity_strategies\u0027 table?", + "sql_context": "CREATE TABLE cybersecurity_strategies (strategy_name VARCHAR(50), implementation_year INT); INSERT INTO cybersecurity_strategies (strategy_name, implementation_year) VALUES (\u0027Firewall\u0027, 2018), (\u0027Intrusion Detection System\u0027, 2019), (\u0027Multi-Factor Authentication\u0027, 2020), (\u0027Zero Trust\u0027, 2021), (\u0027Encryption\u0027, 2017);", + "sql": "SELECT AVG(implementation_year) as avg_year FROM cybersecurity_strategies;", + "sql_explanation": "This query calculates the average implementation year of cybersecurity strategies in the \u0027cybersecurity_strategies\u0027 table by calculating the average of the \u0027implementation_year\u0027 column." +}, { + "id": "4722", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the name and type of military cyber commands in the \u0027Military_Cyber_Commands\u0027 table.", + "sql_context": "CREATE SCHEMA IF NOT EXISTS defense_security;CREATE TABLE IF NOT EXISTS defense_security.Military_Cyber_Commands (id INT PRIMARY KEY, command_name VARCHAR(255), type VARCHAR(255));INSERT INTO defense_security.Military_Cyber_Commands (id, command_name, type) VALUES (1, \u0027USCYBERCOM\u0027, \u0027Defensive Cyber Operations\u0027), (2, \u0027JTF-CND\u0027, \u0027Offensive Cyber Operations\u0027), (3, \u002710th Fleet\u0027, \u0027Network Warfare\u0027);", + "sql": "SELECT command_name, type FROM defense_security.Military_Cyber_Commands;", + "sql_explanation": "This SQL query lists the name and type of military cyber commands in the \u0027Military_Cyber_Commands\u0027 table by selecting both the \u0027command_name\u0027 and \u0027type\u0027 columns." +}, { + "id": "4744", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of military technology patents filed by China in a single year?", + "sql_context": "CREATE TABLE tech_patents_china (country VARCHAR(255), year INT, num_patents INT); INSERT INTO tech_patents_china (country, year, num_patents) VALUES (\u0027China\u0027, 2015, 1000), (\u0027China\u0027, 2016, 1200), (\u0027China\u0027, 2017, 1400);", + "sql": "SELECT MAX(num_patents) FROM tech_patents_china WHERE country \u003d \u0027China\u0027;", + "sql_explanation": "This query finds the maximum number of military technology patents filed by China in a single year. It does this by selecting the maximum (MAX) of the num_patents column, where the country is China." +}, { + "id": "4786", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all intelligence operations in \u0027Asia-Pacific\u0027 for the \u0027IntelligenceOperations\u0027 schema.", + "sql_context": "CREATE SCHEMA IF NOT EXISTS IntelligenceOperations; CREATE TABLE IF NOT EXISTS IntelligenceOperations.Ops (op_id INT, op_name VARCHAR(255), region VARCHAR(255), start_date DATE, end_date DATE); INSERT INTO IntelligenceOperations.Ops (op_id, op_name, region, start_date, end_date) VALUES (1, \u0027Operation Red Sparrow\u0027, \u0027Asia-Pacific\u0027, \u00272018-01-01\u0027, \u00272018-12-31\u0027), (2, \u0027Operation Silent Storm\u0027, \u0027Europe\u0027, \u00272019-01-01\u0027, \u00272019-12-31\u0027);", + "sql": "SELECT * FROM IntelligenceOperations.Ops WHERE region \u003d \u0027Asia-Pacific\u0027;", + "sql_explanation": "The SQL query filters records in the \u0027Ops\u0027 table based on the region field, looking for records that have the value \u0027Asia-Pacific\u0027." +}, { + "id": "4935", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names of the intelligence officers involved in Operation Red Lotus?", + "sql_context": "CREATE TABLE IntelligenceOfficers (ID INT, Name TEXT, Operation TEXT); INSERT INTO IntelligenceOfficers (ID, Name, Operation) VALUES (1, \u0027Alice\u0027, \u0027Red Lotus\u0027), (2, \u0027Bob\u0027, \u0027Blue Tide\u0027), (3, \u0027Charlie\u0027, \u0027Red Lotus\u0027);", + "sql": "SELECT Name FROM IntelligenceOfficers WHERE Operation \u003d \u0027Red Lotus\u0027;", + "sql_explanation": "The SQL query filters the IntelligenceOfficers table to only include rows with an Operation set to \u0027Red Lotus\u0027. It then selects the Name column from these filtered rows." +}, { + "id": "4936", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all cybersecurity incidents that occurred in the Asia-Pacific region, including their details and the countermeasures taken.", + "sql_context": "CREATE TABLE cybersecurity_incidents (id INT, title VARCHAR(255), description TEXT, region VARCHAR(255), countermeasure TEXT);INSERT INTO cybersecurity_incidents (id, title, description, region, countermeasure) VALUES (1, \u0027Incident A\u0027, \u0027Details about Incident A\u0027, \u0027Asia-Pacific\u0027, \u0027Countermeasure A\u0027), (2, \u0027Incident B\u0027, \u0027Details about Incident B\u0027, \u0027Europe\u0027, \u0027Countermeasure B\u0027);", + "sql": "SELECT * FROM cybersecurity_incidents WHERE region \u003d \u0027Asia-Pacific\u0027;", + "sql_explanation": "This query filters the cybersecurity incidents based on the region and returns all the incidents that occurred in the Asia-Pacific region." +}, { + "id": "5104", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Military tech budget changes in 2019", + "sql_context": "CREATE TABLE budget_changes (year INT, budget_change FLOAT);", + "sql": "SELECT year, budget_change FROM budget_changes WHERE year \u003d 2019;", + "sql_explanation": "This query retrieves military tech budget changes in 2019 by selecting rows from the budget_changes table where the year is 2019." +}, { + "id": "5169", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of successful cybersecurity incidents in the \u0027Incident_Report\u0027 table?", + "sql_context": "CREATE TABLE Incident_Report (id INT, incident VARCHAR(50), status VARCHAR(50), date DATE); INSERT INTO Incident_Report (id, incident, status, date) VALUES (1, \u0027Malware Attack\u0027, \u0027Resolved\u0027, \u00272021-03-01\u0027); INSERT INTO Incident_Report (id, incident, status, date) VALUES (2, \u0027Phishing Attempt\u0027, \u0027Unresolved\u0027, \u00272021-03-02\u0027);", + "sql": "SELECT COUNT(*) FROM Incident_Report WHERE status \u003d \u0027Resolved\u0027;", + "sql_explanation": "This SQL query counts all records in the \u0027Incident_Report\u0027 table with the status \u0027Resolved\u0027 to find the number of successful cybersecurity incidents." +}, { + "id": "5215", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of military personnel in the Middle East?", + "sql_context": "CREATE TABLE military_personnel_middle_east (country VARCHAR(255), num_personnel INT); INSERT INTO military_personnel_middle_east (country, num_personnel) VALUES (\u0027Israel\u0027, 170000), (\u0027Saudi Arabia\u0027, 225000), (\u0027Iran\u0027, 525000);", + "sql": "SELECT AVG(num_personnel) FROM military_personnel_middle_east;", + "sql_explanation": "This query calculates the average number of military personnel in the Middle East. It does this by selecting the average (AVG) of the num_personnel column from the military_personnel_middle_east table." +}, { + "id": "5367", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the earliest implementation date for military intelligence operations?", + "sql_context": "CREATE TABLE IntelligenceOperations (Id INT, Name VARCHAR(50), ImplementationDate DATE); INSERT INTO IntelligenceOperations VALUES (1, \u0027SIGINT\u0027, \u00271960-01-01\u0027); INSERT INTO IntelligenceOperations VALUES (2, \u0027HUMINT\u0027, \u00271945-05-07\u0027); INSERT INTO IntelligenceOperations VALUES (3, \u0027GEOINT\u0027, \u00272001-09-11\u0027);", + "sql": "SELECT MIN(ImplementationDate) FROM IntelligenceOperations;", + "sql_explanation": "We select the minimum implementation date from the IntelligenceOperations table to find the earliest date for military intelligence operations." +}, { + "id": "5422", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records of a specific type of intelligence operation in the \"intelligence_ops\" table", + "sql_context": "CREATE TABLE intelligence_ops (id INT, year INT, location VARCHAR(255), type VARCHAR(255), result VARCHAR(255)); INSERT INTO intelligence_ops (id, year, location, type, result) VALUES (1, 2015, \u0027Russia\u0027, \u0027Surveillance\u0027, \u0027Success\u0027), (2, 2015, \u0027Germany\u0027, \u0027Infiltration\u0027, \u0027Failure\u0027);", + "sql": "DELETE FROM intelligence_ops WHERE type \u003d \u0027Infiltration\u0027;", + "sql_explanation": "This query deletes all records of infiltration operations from the \"intelligence_ops\" table." +}, { + "id": "5463", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of military aircraft in the \u0027NorthAmerica\u0027 schema?", + "sql_context": "CREATE SCHEMA NorthAmerica; CREATE TABLE MilitaryAircraft (id INT, name VARCHAR(255), type VARCHAR(255), quantity INT); INSERT INTO MilitaryAircraft (id, name, type, quantity) VALUES (1, \u0027F-16\u0027, \u0027Fighter Jet\u0027, 50); INSERT INTO MilitaryAircraft (id, name, type, quantity) VALUES (2, \u0027B-52\u0027, \u0027Bomber\u0027, 20);", + "sql": "SELECT SUM(quantity) FROM NorthAmerica.MilitaryAircraft;", + "sql_explanation": "This query sums the \u0027quantity\u0027 column in the \u0027MilitaryAircraft\u0027 table within the \u0027NorthAmerica\u0027 schema, providing the total number of military aircraft." +}, { + "id": "5559", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which cybersecurity strategies were implemented in \u0027Year\u0027 2020?", + "sql_context": "CREATE TABLE Cybersecurity (id INT, strategy VARCHAR(50), year INT, description VARCHAR(255)); INSERT INTO Cybersecurity (id, strategy, year, description) VALUES (1, \u0027Next-Gen Firewall\u0027, 2020, \u0027Deployed next-generation firewall for network security. \u0027); INSERT INTO Cybersecurity (id, strategy, year, description) VALUES (2, \u0027Intrusion Detection System\u0027, 2019, \u0027Implemented Intrusion Detection System in the network. \u0027);", + "sql": "SELECT strategy FROM Cybersecurity WHERE year \u003d 2020;", + "sql_explanation": "This SQL query selects all cybersecurity strategies implemented in the year 2020 from the \u0027Cybersecurity\u0027 table." +}, { + "id": "5574", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List military bases with a budget over 10 million dollars from the \u0027MilitaryBases\u0027 table", + "sql_context": "CREATE TABLE MilitaryBases (Base_ID INT, Base_Name VARCHAR(255), Base_Location VARCHAR(255), Budget INT); INSERT INTO MilitaryBases (Base_ID, Base_Name, Base_Location, Budget) VALUES (1, \u0027Fort Hood\u0027, \u0027Texas\u0027, 9000000); INSERT INTO MilitaryBases (Base_ID, Base_Name, Base_Location, Budget) VALUES (2, \u0027Peterson AFB\u0027, \u0027Colorado\u0027, 12000000);", + "sql": "SELECT * FROM MilitaryBases WHERE Budget \u003e 10000000;", + "sql_explanation": "This query retrieves all records from the \u0027MilitaryBases\u0027 table where the budget is over 10 million dollars. By specifying the WHERE clause with the Budget column and a value greater than 10000000, the query lists all bases that meet the budget criteria." +}, { + "id": "5607", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names of the military operations in the \u0027Military_Operations\u0027 table?", + "sql_context": "CREATE TABLE Military_Operations (id INT, operation VARCHAR(50)); INSERT INTO Military_Operations (id, operation) VALUES (1, \u0027Operation Enduring Freedom\u0027); INSERT INTO Military_Operations (id, operation) VALUES (2, \u0027Operation Iraqi Freedom\u0027);", + "sql": "SELECT DISTINCT operation FROM Military_Operations;", + "sql_explanation": "This SQL query selects all distinct military operations from the \u0027Military_Operations\u0027 table." +}, { + "id": "5625", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the name and year of all national security strategies implemented before 2015 in the \u0027Strategy\u0027 table?", + "sql_context": "CREATE TABLE Strategy (id INT, name VARCHAR(50), year INT, continent VARCHAR(50)); INSERT INTO Strategy (id, name, year, continent) VALUES (1, \u0027National Security Strategy 2010\u0027, 2010, \u0027North America\u0027); INSERT INTO Strategy (id, name, year, continent) VALUES (2, \u0027National Security Strategy 2015\u0027, 2015, \u0027Europe\u0027);", + "sql": "SELECT name, year FROM Strategy WHERE year \u003c 2015;", + "sql_explanation": "This SQL query selects the \u0027name\u0027 and \u0027year\u0027 columns from the \u0027Strategy\u0027 table where the \u0027year\u0027 column is less than 2015." +}, { + "id": "5709", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "Show all intelligence operations in the \u0027Africa\u0027 schema.", + "sql_context": "CREATE SCHEMA Africa; CREATE TABLE IntelligenceOperations (id INT, name VARCHAR(255), location VARCHAR(255), date DATE); INSERT INTO IntelligenceOperations (id, name, location, date) VALUES (1, \u0027Operation Flintlock\u0027, \u0027Niger\u0027, \u00272023-02-01\u0027); INSERT INTO IntelligenceOperations (id, name, location, date) VALUES (2, \u0027Operation Juniper Shield\u0027, \u0027Somalia\u0027, \u00272022-10-15\u0027);", + "sql": "SELECT * FROM Africa.IntelligenceOperations;", + "sql_explanation": "This query retrieves all records from the \u0027IntelligenceOperations\u0027 table within the \u0027Africa\u0027 schema." +}, { + "id": "5755", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List military bases and their respective countries.", + "sql_context": "CREATE TABLE military_bases (id INT, base VARCHAR(255), country VARCHAR(255)); INSERT INTO military_bases (id, base, country) VALUES (1, \u0027Fort Bragg\u0027, \u0027United States\u0027), (2, \u0027Camp Basra\u0027, \u0027Iraq\u0027), (3, \u0027Falls Station\u0027, \u0027United Kingdom\u0027), (4, \u0027Garrison Petawawa\u0027, \u0027Canada\u0027);", + "sql": "SELECT base, country FROM military_bases;", + "sql_explanation": "The SQL query lists military bases along with their respective countries." +}, { + "id": "5756", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average budget of cybersecurity strategies in the \u0027Cyber_Strategies\u0027 table?", + "sql_context": "CREATE TABLE Cyber_Strategies (id INT, name VARCHAR(50), location VARCHAR(20), type VARCHAR(20), budget INT); INSERT INTO Cyber_Strategies (id, name, location, type, budget) VALUES (1, \u0027Cyber Shield\u0027, \u0027North America\u0027, \u0027Defense\u0027, 5000000);", + "sql": "SELECT AVG(budget) FROM Cyber_Strategies;", + "sql_explanation": "This query calculates the average budget of cybersecurity strategies in the \u0027Cyber_Strategies\u0027 table by averaging the \u0027budget\u0027 column." +}, { + "id": "5757", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all intelligence operations in the operations table that were conducted before the year 2000.", + "sql_context": "CREATE TABLE operations (name TEXT, description TEXT, year INT); INSERT INTO operations (name, description, year) VALUES (\u0027Operation Desert Storm\u0027, \u0027Military intervention in Iraq.\u0027, 1991), (\u0027Operation Enduring Freedom\u0027, \u0027Military intervention in Afghanistan.\u0027, 2001), (\u0027Operation Just Cause\u0027, \u0027Military intervention in Panama.\u0027, 1989);", + "sql": "DELETE FROM operations WHERE year \u003c 2000;", + "sql_explanation": "This query deletes all rows from the operations table where the year is less than 2000." +}, { + "id": "5792", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "What are the details of intelligence operations in the \u0027Intelligence\u0027 schema?", + "sql_context": "CREATE SCHEMA IF NOT EXISTS Intelligence; CREATE TABLE IF NOT EXISTS Intelligence.Intel_Ops (operation_id INT, operation_name VARCHAR(255), location VARCHAR(255), start_date DATE, end_date DATE); INSERT INTO Intelligence.Intel_Ops (operation_id, operation_name, location, start_date, end_date) VALUES (1, \u0027Operation Blue Star\u0027, \u0027India\u0027, \u00271984-06-01\u0027, \u00271984-06-06\u0027), (2, \u0027Operation Just Cause\u0027, \u0027Panama\u0027, \u00271989-12-20\u0027, \u00271990-01-03\u0027);", + "sql": "SELECT * FROM Intelligence.Intel_Ops;", + "sql_explanation": "The SQL query retrieves all records from the \u0027Intel_Ops\u0027 table within the \u0027Intelligence\u0027 schema, providing details of intelligence operations." +}, { + "id": "5844", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average size of military equipment in the \u0027armory\u0027 table?", + "sql_context": "CREATE TABLE armory (id INT, equipment_name TEXT, size FLOAT);", + "sql": "SELECT AVG(size) FROM armory;", + "sql_explanation": "This query calculates the average size of military equipment by summing up all the \u0027size\u0027 values in the \u0027armory\u0027 table and then dividing by the count of \u0027size\u0027 values." +}, { + "id": "3140", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many users have a heart rate above 150 during evening workouts?", + "sql_context": "CREATE TABLE workouts (id INT, user_id INT, heart_rate INT, workout_time TIME); INSERT INTO workouts (id, user_id, heart_rate, workout_time) VALUES (1, 1, 160, \u002718:00:00\u0027);", + "sql": "SELECT COUNT(*) FROM workouts WHERE heart_rate \u003e 150 AND workout_time BETWEEN \u002718:00:00\u0027 AND \u002723:59:59\u0027;", + "sql_explanation": "Count the number of users with a heart rate above 150 during evening workouts. The query filters workouts between 6:00 PM and 11:59 PM and calculates the number of users with a heart rate above 150." +}, { + "id": "3269", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many users have a \u0027Basic\u0027 membership in the \u0027North\u0027 region?", + "sql_context": "CREATE TABLE memberships (id INT, user_id INT, membership_type VARCHAR(255), region VARCHAR(255));", + "sql": "SELECT COUNT(DISTINCT user_id) FROM memberships WHERE membership_type \u003d \u0027Basic\u0027 AND region \u003d \u0027North\u0027;", + "sql_explanation": "The query selects the distinct user_id\u0027s from the memberships table that have a \u0027Basic\u0027 membership in the \u0027North\u0027 region." +}, { + "id": "3594", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total duration of all Pilates classes in the last month?", + "sql_context": "CREATE TABLE pilates_classes (id INT, class_id INT, duration FLOAT, class_date DATE);", + "sql": "SELECT SUM(duration) FROM pilates_classes WHERE class_date \u003e\u003d DATE(NOW()) - INTERVAL 1 MONTH;", + "sql_explanation": "The query calculates the total duration of all Pilates classes in the last month by summing the duration column in the pilates_classes table." +}, { + "id": "3775", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum heart rate for users during evening workouts?", + "sql_context": "CREATE TABLE workouts (id INT, user_id INT, heart_rate INT, workout_time TIME); INSERT INTO workouts (id, user_id, heart_rate, workout_time) VALUES (1, 1, 120, \u002718:00:00\u0027);", + "sql": "SELECT MIN(heart_rate) FROM workouts WHERE workout_time BETWEEN \u002718:00:00\u0027 AND \u002723:59:59\u0027;", + "sql_explanation": "Determine the minimum heart rate for users during their evening workouts. The query filters workouts between 6:00 PM and 11:59 PM and calculates the minimum heart rate." +}, { + "id": "4041", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add a new record for a user with id 20, name \u0027John Doe\u0027 and email \u0027johndoe@example.com\u0027 to the members table", + "sql_context": "CREATE TABLE members (id INT, name VARCHAR(50), email VARCHAR(50));", + "sql": "INSERT INTO members (id, name, email) VALUES (20, \u0027John Doe\u0027, \u0027johndoe@example.com\u0027);", + "sql_explanation": "This query inserts a new record into the members table with the specified id, name, and email values." +}, { + "id": "4280", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many unique members have a membership type of \"premium\"?", + "sql_context": "CREATE TABLE members (member_id INT, membership_type VARCHAR(10)); INSERT INTO members VALUES (1,\u0027Premium\u0027),(2,\u0027Basic\u0027),(3,\u0027Premium\u0027),(4,\u0027Standard\u0027);", + "sql": "SELECT COUNT(DISTINCT member_id) FROM members WHERE membership_type \u003d \u0027Premium\u0027;", + "sql_explanation": "The SQL query selects the count of distinct member_id records where the membership_type is \u0027Premium\u0027. It returns the number of unique members who have a premium membership type." +}, { + "id": "4297", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many users have a heart rate monitor and live in Texas?", + "sql_context": "CREATE TABLE Users (user_id INT, has_heart_rate_monitor BOOLEAN, state VARCHAR(2)); INSERT INTO Users (user_id, has_heart_rate_monitor, state) VALUES (1, true, \u0027TX\u0027); INSERT INTO Users (user_id, has_heart_rate_monitor, state) VALUES (2, false, \u0027NY\u0027);", + "sql": "SELECT COUNT(*) FROM Users WHERE has_heart_rate_monitor \u003d true AND state \u003d \u0027TX\u0027;", + "sql_explanation": "This query counts the number of users who have a heart rate monitor and live in Texas by filtering the Users table on the has_heart_rate_monitor and state columns." +}, { + "id": "4416", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete users who have not logged in for over a year from the users table", + "sql_context": "CREATE TABLE users (id INT PRIMARY KEY, name VARCHAR(255), last_login DATETIME);", + "sql": "DELETE FROM users WHERE last_login \u003c\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR);", + "sql_explanation": "This query deletes users who haven\u0027t logged in for over a year by using the DELETE statement and specifying the users table. It filters the records based on the last_login column, which is compared to a date calculated by subtracting one year from the current date using the DATE_SUB function." +}, { + "id": "4515", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all members who participated in a workout activity on a specific date (e.g., 2021-02-20).", + "sql_context": "CREATE TABLE member_workout_date (member_id INT, activity VARCHAR(50), workout_date DATE); INSERT INTO member_workout_date (member_id, activity, workout_date) VALUES (1, \u0027Running\u0027, \u00272021-02-20\u0027); INSERT INTO member_workout_date (member_id, activity, workout_date) VALUES (2, \u0027Yoga\u0027, \u00272021-02-20\u0027); INSERT INTO member_workout_date (member_id, activity, workout_date) VALUES (3, \u0027Running\u0027, \u00272021-02-20\u0027);", + "sql": "SELECT member_id FROM member_workout_date WHERE workout_date \u003d \u00272021-02-20\u0027;", + "sql_explanation": "This query lists the member id for all members who participated in a workout activity on a specific date (2021-02-20) from the member_workout_date table." +}, { + "id": "4925", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total duration of all workouts offered in the \u0027Central\u0027 region?", + "sql_context": "CREATE SCHEMA fitness; CREATE TABLE workouts (id INT, workout_type VARCHAR(50), region VARCHAR(50), duration INT, calories_burnt INT); INSERT INTO workouts (id, workout_type, region, duration, calories_burnt) VALUES (1, \u0027Yoga\u0027, \u0027Central\u0027, 60, 150); INSERT INTO workouts (id, workout_type, region, duration, calories_burnt) VALUES (2, \u0027Pilates\u0027, \u0027Central\u0027, 45, 120);", + "sql": "SELECT SUM(duration) FROM fitness.workouts WHERE region \u003d \u0027Central\u0027;", + "sql_explanation": "This SQL query calculates the total duration of all workouts offered in the \u0027Central\u0027 region. It uses the SUM() function to add up all the durations of the workouts and filters the data for the Central region using the WHERE clause." +}, { + "id": "5025", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum heart rate recorded for users while doing yoga?", + "sql_context": "CREATE TABLE workouts (id INT, user_id INT, duration INT, date DATE, heart_rate INT, exercise_type VARCHAR(50)); INSERT INTO workouts (id, user_id, duration, date, heart_rate, exercise_type) VALUES (1, 101, 60, \u00272022-06-01\u0027, 120, \u0027yoga\u0027), (2, 102, 45, \u00272022-06-01\u0027, 130, \u0027running\u0027), (3, 101, 75, \u00272022-06-02\u0027, 115, \u0027yoga\u0027);", + "sql": "SELECT MAX(heart_rate) FROM workouts WHERE exercise_type \u003d \u0027yoga\u0027;", + "sql_explanation": "This query selects the maximum heart rate recorded for users while doing yoga." +}, { + "id": "5087", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum weight lifted by users who have achieved a specific strength milestone?", + "sql_context": "CREATE TABLE Strength (id INT, user_id INT, milestone TEXT, weight INT); INSERT INTO Strength (id, user_id, milestone, weight) VALUES (1, 1, \u0027squat 100kg\u0027, 105), (2, 2, \u0027deadlift 150kg\u0027, 160);", + "sql": "SELECT MAX(weight) FROM Strength WHERE milestone \u003d \u0027squat 100kg\u0027;", + "sql_explanation": "The SQL query calculates the maximum weight lifted by users who have achieved a specific strength milestone by filtering the data using the WHERE clause to only consider users who have achieved the specified milestone and using the MAX function to find the maximum weight value in the filtered data." +}, { + "id": "5132", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many members have a maximum heart rate over 180?", + "sql_context": "CREATE TABLE member_workouts (member_id INT, max_heart_rate INT); INSERT INTO member_workouts (member_id, max_heart_rate) VALUES (1, 190), (2, 170), (3, 185), (4, 160), (5, 200);", + "sql": "SELECT COUNT(*) FROM member_workouts WHERE max_heart_rate \u003e 180;", + "sql_explanation": "The SQL query counts the number of members with a maximum heart rate over 180 using the COUNT function and filters the records using the WHERE clause." +}, { + "id": "5135", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of members who have used wearable technology to track their workouts", + "sql_context": "CREATE TABLE member_workouts (member_id INT, wearable_tech BOOLEAN);", + "sql": "SELECT COUNT(*) FROM member_workouts WHERE wearable_tech \u003d TRUE;", + "sql_explanation": "This query counts the number of members who have used wearable technology to track their workouts." +}, { + "id": "5136", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total duration (in minutes) of all yoga classes taken by users with the first name \u0027Amy\u0027?", + "sql_context": "CREATE TABLE yoga_classes (class_id INT, user_id INT, duration INT, first_name VARCHAR(10));", + "sql": "SELECT SUM(duration) FROM yoga_classes WHERE first_name \u003d \u0027Amy\u0027;", + "sql_explanation": "This query sums the duration of all yoga classes taken by users with the first name \u0027Amy\u0027 from the yoga_classes table." +}, { + "id": "5336", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of minutes spent doing a specific activity?", + "sql_context": "CREATE TABLE Activities (id INT, user_id INT, minutes INT, activity TEXT); INSERT INTO Activities (id, user_id, minutes, activity) VALUES (1, 1, 60, \u0027yoga\u0027), (2, 2, 90, \u0027meditation\u0027);", + "sql": "SELECT SUM(minutes) FROM Activities WHERE activity \u003d \u0027yoga\u0027;", + "sql_explanation": "The SQL query calculates the total number of minutes spent doing a specific activity by filtering the data using the WHERE clause to only consider minutes spent doing the specified activity and using the SUM function to sum the minutes values in the filtered data." +}, { + "id": "5345", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum number of push-ups performed by users on a given day?", + "sql_context": "CREATE TABLE Exercises (UserID INT, Exercise VARCHAR(20), Reps INT, Date DATE); INSERT INTO Exercises (UserID, Exercise, Reps, Date) VALUES (1, \u0027Push-ups\u0027, 25, \u00272022-04-01\u0027), (1, \u0027Sit-ups\u0027, 30, \u00272022-04-01\u0027), (2, \u0027Push-ups\u0027, 30, \u00272022-04-01\u0027), (2, \u0027Pull-ups\u0027, 15, \u00272022-04-01\u0027);", + "sql": "SELECT MIN(Reps) FROM Exercises WHERE Exercise \u003d \u0027Push-ups\u0027;", + "sql_explanation": "The SQL query finds the minimum number of push-ups performed by users on a given day by using the MIN function on the Reps column, and filtering for rows with Exercise equal to \u0027Push-ups\u0027." +}, { + "id": "5383", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of calories burned in a single workout by users from India?", + "sql_context": "CREATE TABLE workouts (id INT, user_id INT, workout_date DATE, calories INT, country VARCHAR(50)); INSERT INTO workouts (id, user_id, workout_date, calories, country) VALUES (1, 123, \u00272022-01-01\u0027, 300, \u0027USA\u0027); INSERT INTO workouts (id, user_id, workout_date, calories, country) VALUES (2, 456, \u00272022-01-02\u0027, 400, \u0027Canada\u0027);", + "sql": "SELECT MAX(calories) FROM workouts WHERE country \u003d \u0027India\u0027;", + "sql_explanation": "This query calculates the maximum number of calories burned in a single workout by users from India. It does so by using the MAX function to find the highest value in the calories column where the country column equals \u0027India\u0027." +}, { + "id": "5389", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of male members from the FitnessMembers table?", + "sql_context": "CREATE TABLE FitnessMembers (member_id INT, name VARCHAR(50), age INT, gender VARCHAR(10)); INSERT INTO FitnessMembers (member_id, name, age, gender) VALUES (1, \u0027John Doe\u0027, 25, \u0027Male\u0027); INSERT INTO FitnessMembers (member_id, name, age, gender) VALUES (2, \u0027Jane Smith\u0027, 30, \u0027Female\u0027); INSERT INTO FitnessMembers (member_id, name, age, gender) VALUES (3, \u0027Alice Johnson\u0027, 35, \u0027Female\u0027); INSERT INTO FitnessMembers (member_id, name, age, gender) VALUES (4, \u0027Bob Brown\u0027, 40, \u0027Male\u0027);", + "sql": "SELECT AVG(age) FROM FitnessMembers WHERE gender \u003d \u0027Male\u0027;", + "sql_explanation": "The query calculates the average age of male members in the FitnessMembers table by filtering the records based on gender." +}, { + "id": "5424", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the last name of member with ID 1 to \u0027Smith\u0027.", + "sql_context": "CREATE TABLE Members (MemberID INT, FirstName VARCHAR(50), LastName VARCHAR(50)); INSERT INTO Members (MemberID, FirstName, LastName) VALUES (1, \u0027John\u0027, \u0027Doe\u0027); INSERT INTO Members (MemberID, FirstName, LastName) VALUES (2, \u0027Jane\u0027, \u0027Doe\u0027);", + "sql": "UPDATE Members SET LastName \u003d \u0027Smith\u0027 WHERE MemberID \u003d 1;", + "sql_explanation": "This query updates the last name of member with ID 1 to \u0027Smith\u0027 by performing an UPDATE statement on the Members table with the specified WHERE clause." +}, { + "id": "5471", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the earliest join date for members from the USA?", + "sql_context": "CREATE TABLE Members (MemberID INT, JoinDate DATE, Country VARCHAR(20)); INSERT INTO Members (MemberID, JoinDate, Country) VALUES (1, \u00272022-04-05\u0027, \u0027Canada\u0027), (2, \u00272022-03-12\u0027, \u0027USA\u0027), (3, \u00272022-06-20\u0027, \u0027Canada\u0027), (4, \u00272022-05-01\u0027, \u0027USA\u0027);", + "sql": "SELECT MIN(JoinDate) FROM Members WHERE Country \u003d \u0027USA\u0027;", + "sql_explanation": "The SQL query determines the earliest join date for members from the USA by filtering the \u0027Members\u0027 table for members from the USA and then applying the MIN function to the \u0027JoinDate\u0027 column." +}, { + "id": "5513", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many members have a gold membership?", + "sql_context": "CREATE SCHEMA fitness; USE fitness; CREATE TABLE members (member_id INT PRIMARY KEY, name VARCHAR(50), age INT, membership VARCHAR(20)); INSERT INTO members (member_id, name, age, membership) VALUES (1, \u0027John Doe\u0027, 30, \u0027gold\u0027), (2, \u0027Jane Smith\u0027, 40, \u0027silver\u0027), (3, \u0027Mike Johnson\u0027, 50, \u0027platinum\u0027), (4, \u0027Alice Davis\u0027, 35, NULL), (5, \u0027Bob Brown\u0027, 45, \u0027gold\u0027);", + "sql": "SELECT COUNT(*) FROM members WHERE membership \u003d \u0027gold\u0027;", + "sql_explanation": "This query counts the number of rows in the \u0027members\u0027 table where the membership is \u0027gold\u0027, which means that it counts only members who have a gold membership." +}, { + "id": "5618", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many users have a body fat percentage above 20%?", + "sql_context": "CREATE TABLE BodyStats (UserID INT, BodyFat INT); INSERT INTO BodyStats (UserID, BodyFat) VALUES (1, 18), (1, 22), (2, 15), (2, 25);", + "sql": "SELECT COUNT(*) FROM BodyStats WHERE BodyFat \u003e 20;", + "sql_explanation": "This query counts the number of users who have a body fat percentage above 20% by using the COUNT function on all rows, and filtering for rows with BodyFat \u003e 20." +}, { + "id": "5711", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete equipment data for the item with EquipmentID 3, which has been discontinued.", + "sql_context": "CREATE TABLE Equipment (EquipmentID INT, EquipmentName VARCHAR(50), PurchaseDate DATE, PurchaseLocation VARCHAR(50)); INSERT INTO Equipment (EquipmentID, EquipmentName, PurchaseDate, PurchaseLocation) VALUES (1, \u0027Treadmill\u0027, \u00272021-10-01\u0027, \u0027Canada\u0027); INSERT INTO Equipment (EquipmentID, EquipmentName, PurchaseDate, PurchaseLocation) VALUES (2, \u0027Elliptical\u0027, \u00272022-01-20\u0027, \u0027Germany\u0027); INSERT INTO Equipment (EquipmentID, EquipmentName, PurchaseDate, PurchaseLocation) VALUES (3, \u0027Stationary Bike\u0027, \u00272022-02-05\u0027, \u0027Brazil\u0027);", + "sql": "DELETE FROM Equipment WHERE EquipmentID \u003d 3;", + "sql_explanation": "This query deletes equipment data for the item with EquipmentID 3, which has been discontinued, by performing a DELETE statement on the Equipment table with the specified WHERE clause." +}, { + "id": "471", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many graduate students in the Physics department are from underrepresented racial or ethnic backgrounds and have a GPA of at least 3.5?", + "sql_context": "CREATE SCHEMA if not exists higher_ed;CREATE TABLE if not exists higher_ed.students(id INT, name VARCHAR(255), department VARCHAR(255), gpa DECIMAL(3,2), race VARCHAR(50));", + "sql": "SELECT COUNT(*) FROM higher_ed.students WHERE department \u003d \u0027Physics\u0027 AND gpa \u003e\u003d 3.5 AND race IN (\u0027Black or African American\u0027, \u0027Hispanic or Latinx\u0027, \u0027Native American or Alaska Native\u0027, \u0027Native Hawaiian or Pacific Islander\u0027);", + "sql_explanation": "This query calculates the number of graduate students in the Physics department who are from underrepresented racial or ethnic backgrounds and have a GPA of at least 3.5. It does so by selecting the entire students table and applying the COUNT function to it, while filtering the table for rows with department equal to \u0027Physics\u0027, gpa greater than or equal to 3.5, and race in the set of underrepresented racial or ethnic backgrounds." +}, { + "id": "1135", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of academic publications by authors from Africa in the last 5 years?", + "sql_context": "CREATE TABLE academic_publications (id INT, author_name TEXT, author_region TEXT, publication_date DATE); INSERT INTO academic_publications (id, author_name, author_region, publication_date) VALUES (1, \u0027Grace\u0027, \u0027Africa\u0027, \u00272021-05-01\u0027); INSERT INTO academic_publications (id, author_name, author_region, publication_date) VALUES (2, \u0027Hugo\u0027, \u0027Europe\u0027, \u00272022-09-15\u0027); INSERT INTO academic_publications (id, author_name, author_region, publication_date) VALUES (3, \u0027Ike\u0027, \u0027Africa\u0027, \u00272023-02-10\u0027);", + "sql": "SELECT author_region, COUNT(*) FROM academic_publications WHERE author_region \u003d \u0027Africa\u0027 AND publication_date BETWEEN DATE_SUB(CURRENT_DATE, INTERVAL 5 YEAR) AND CURRENT_DATE", + "sql_explanation": "This query calculates the total number of academic publications (COUNT(*)) by authors (FROM academic_publications) from Africa (author_region \u003d \u0027Africa\u0027) in the last 5 years (BETWEEN DATE_SUB(CURRENT_DATE, INTERVAL 5 YEAR) AND CURRENT_DATE)." +}, { + "id": "1408", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the ratio of graduate students from rural areas to the total number of graduate students in the field of Agriculture?", + "sql_context": "CREATE TABLE grad_students (id INT, name VARCHAR(50), rural VARCHAR(50), field VARCHAR(50));", + "sql": "SELECT ROUND(COUNT(*) * 100.0 / NULLIF(SUM(CASE WHEN rural \u003d \u0027Yes\u0027 THEN 1 ELSE 0 END), 0), 2) AS rural_percentage FROM grad_students WHERE field \u003d \u0027Agriculture\u0027;", + "sql_explanation": "1. Groups the records by rural. 2. Calculates the percentage of graduate students from rural areas in the field of Agriculture. 3. Returns the percentage as a ratio." +}, { + "id": "1546", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 3 graduate students with the highest number of academic publications in the College of Arts and Humanities?", + "sql_context": "CREATE TABLE academic_publications (id INT, student_name VARCHAR(50), student_major VARCHAR(50), publication_count INT); INSERT INTO academic_publications (id, student_name, student_major, publication_count) VALUES (1, \u0027Hannah Nguyen\u0027, \u0027English\u0027, 15), (2, \u0027Ali Ahmed\u0027, \u0027History\u0027, 20), (3, \u0027Sofia Rodriguez\u0027, \u0027Philosophy\u0027, 18), (4, \u0027Taro Tanaka\u0027, \u0027Music\u0027, 12), (5, \u0027Xiao Wang\u0027, \u0027Art\u0027, 14);", + "sql": "SELECT student_name, publication_count FROM academic_publications WHERE student_major LIKE \u0027%Arts and Humanities%\u0027 ORDER BY publication_count DESC LIMIT 3;", + "sql_explanation": "This SQL query identifies the top 3 graduate students with the highest number of academic publications in the College of Arts and Humanities. It does this by using the SELECT statement to retrieve the student name and publication count from the academic_publications table. The query filters the results to only include rows where the student major is in the College of Arts and Humanities, orders the results by the publication count in descending order, and then limits the results to the top 3 rows." +}, { + "id": "2290", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 5 graduate students with the highest number of research publications in the Mathematics department.", + "sql_context": "CREATE TABLE graduate_students (id INT, name VARCHAR(100), department VARCHAR(50), publications INT); INSERT INTO graduate_students (id, name, department, publications) VALUES (1, \u0027Bob\u0027, \u0027Mathematics\u0027, 20);", + "sql": "SELECT name, department, publications FROM graduate_students WHERE department \u003d \u0027Mathematics\u0027 ORDER BY publications DESC LIMIT 5;", + "sql_explanation": "This query lists the top 5 graduate students with the highest number of research publications in the Mathematics department. It does this by using the ORDER BY clause to sort the publications column in descending order, and then using the LIMIT clause to limit the results to the top 5 rows." +}, { + "id": "2483", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of research grants received by the College of Engineering in the last 5 years?", + "sql_context": "CREATE TABLE grants (id INT, department VARCHAR(50), amount DECIMAL(10,2), grant_date DATE); INSERT INTO grants (id, department, amount, grant_date) VALUES (1, \u0027Mechanical Engineering\u0027, 25000.00, \u00272018-01-01\u0027), (2, \u0027Electrical Engineering\u0027, 30000.00, \u00272019-05-15\u0027), (3, \u0027Chemical Engineering\u0027, 22000.00, \u00272020-12-31\u0027), (4, \u0027Mechanical Engineering\u0027, 18000.00, \u00272021-03-05\u0027);", + "sql": "SELECT SUM(amount) FROM grants WHERE department LIKE \u0027Engineering%\u0027 AND grant_date \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 5 YEAR);", + "sql_explanation": "This query calculates the total amount of grants received by the College of Engineering in the last 5 years." +}, { + "id": "2487", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add a new research grant to the grants table for the Computer Science department", + "sql_context": "CREATE TABLE grants (id INT, title VARCHAR(50), amount DECIMAL(10,2), department VARCHAR(50));", + "sql": "INSERT INTO grants (id, title, amount, department) VALUES (40, \u0027New Computer Science Grant\u0027, 75000.00, \u0027Computer Science\u0027);", + "sql_explanation": "This query adds a new research grant to the grants table for the Computer Science department. The grants table includes a column for the department associated with each grant." +}, { + "id": "2488", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many female professors work in the Humanities division, and what percentage of the total faculty does this represent?", + "sql_context": "CREATE TABLE divisions (id INT, name VARCHAR(255)); INSERT INTO divisions (id, name) VALUES (1, \u0027Humanities\u0027); CREATE TABLE faculty (id INT, name VARCHAR(255), gender VARCHAR(6), division_id INT, PRIMARY KEY (id), FOREIGN KEY (division_id) REFERENCES divisions(id)); INSERT INTO faculty (id, name, gender, division_id) VALUES (1, \u0027Alice\u0027, \u0027Female\u0027, 1), (2, \u0027Bob\u0027, \u0027Male\u0027, 1), (3, \u0027Charlie\u0027, \u0027Non-binary\u0027, 1);", + "sql": "SELECT (COUNT(f.id) FILTER (WHERE f.gender \u003d \u0027Female\u0027)) / COUNT(f.id) AS percentage FROM faculty f WHERE f.division_id \u003d 1;", + "sql_explanation": "This query calculates the percentage of female professors in the Humanities division. It does so by counting the number of female faculty members in the Humanities division and dividing this by the total number of faculty members in the division. The result is expressed as a percentage." +}, { + "id": "3187", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the authors of publications in the Journal of Computer Science in the year 2018?", + "sql_context": "CREATE TABLE publications (id INT, author VARCHAR(50), year INT, journal VARCHAR(50)); INSERT INTO publications (id, author, year, journal) VALUES (1, \u0027Alice\u0027, 2019, \u0027Journal of Computer Science\u0027), (2, \u0027Bob\u0027, 2018, \u0027Journal of Physics\u0027), (3, \u0027Eve\u0027, 2019, \u0027Journal of Mathematics\u0027), (4, \u0027Alice\u0027, 2018, \u0027Journal of Computer Science\u0027);", + "sql": "SELECT DISTINCT author FROM publications WHERE journal \u003d \u0027Journal of Computer Science\u0027 AND year \u003d 2018;", + "sql_explanation": "This query retrieves the names of all authors who published in the Journal of Computer Science in the year 2018 by selecting all records with the specified conditions and then computing the distinct values of the author column." +}, { + "id": "3242", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records from the student_records table where the student_name is \u0027John Doe\u0027 and the department is \u0027Electrical Engineering\u0027", + "sql_context": "CREATE TABLE student_records (student_id INT, student_name VARCHAR(255), department VARCHAR(255)); INSERT INTO student_records (student_id, student_name, department) VALUES (1, \u0027Alice Smith\u0027, \u0027Computer Science\u0027), (2, \u0027John Doe\u0027, \u0027Electrical Engineering\u0027), (3, \u0027Bob Johnson\u0027, \u0027Mechanical Engineering\u0027);", + "sql": "DELETE FROM student_records WHERE student_name \u003d \u0027John Doe\u0027 AND department \u003d \u0027Electrical Engineering\u0027;", + "sql_explanation": "The SQL query deletes all records from the student_records table where the student_name is \u0027John Doe\u0027 and the department is \u0027Electrical Engineering\u0027. It does this by filtering the student_records table to only include records where the student_name is \u0027John Doe\u0027 and the department is \u0027Electrical Engineering\u0027. Then, it deletes these records from the table." +}, { + "id": "3392", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many research grants were awarded to faculty members in the College of Engineering in 2018?", + "sql_context": "CREATE TABLE if not exists FACULTY(id INT, name TEXT, department TEXT, position TEXT, salary INT);CREATE TABLE if not exists GRANTS(id INT, faculty_id INT, grant_name TEXT, grant_amount INT, grant_date DATE, college TEXT);", + "sql": "SELECT COUNT(*) FROM GRANTS WHERE college \u003d \u0027College of Engineering\u0027 AND grant_date LIKE \u00272018-%\u0027;", + "sql_explanation": "The SQL query counts (COUNT(*)) the number of records in the GRANTS table where college is \u0027College of Engineering\u0027 and grant_date starts with \u00272018-\u0027 indicating the year 2018." +}, { + "id": "3402", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average salary of female professors in the College of Arts and Humanities?", + "sql_context": "CREATE TABLE professors(id INT, name VARCHAR(50), department VARCHAR(50), salary FLOAT, gender VARCHAR(10)); INSERT INTO professors VALUES (1, \u0027Alice\u0027, \u0027Arts and Humanities\u0027, 80000.0, \u0027Female\u0027); INSERT INTO professors VALUES (2, \u0027Bob\u0027, \u0027Science\u0027, 85000.0, \u0027Male\u0027); INSERT INTO professors VALUES (3, \u0027Charlie\u0027, \u0027Arts and Humanities\u0027, 78000.0, \u0027Female\u0027);", + "sql": "SELECT AVG(salary) FROM professors WHERE department \u003d \u0027Arts and Humanities\u0027 AND gender \u003d \u0027Female\u0027;", + "sql_explanation": "This query calculates the average salary for female professors in the College of Arts and Humanities by filtering the professors table for the relevant department and gender, and then computing the average salary." +}, { + "id": "3412", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of research grants awarded to women principal investigators in the last 3 years?", + "sql_context": "CREATE TABLE research_grants (id INT, pi_gender VARCHAR(10), year INT, amount INT); INSERT INTO research_grants (id, pi_gender, year, amount) VALUES (1, \u0027Female\u0027, 2020, 50000); INSERT INTO research_grants (id, pi_gender, year, amount) VALUES (2, \u0027Male\u0027, 2019, 75000);", + "sql": "SELECT SUM(amount) FROM research_grants WHERE pi_gender \u003d \u0027Female\u0027 AND year BETWEEN 2019 AND 2021;", + "sql_explanation": "This query calculates the total amount of research grants awarded to women principal investigators in the last 3 years. It filters the results by pi_gender and year using the WHERE clause and the BETWEEN operator. Then, it calculates the total amount of grants awarded to female PIs using the SUM function." +}, { + "id": "3491", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records in the \u0027Graduate_Students\u0027 table where \u0027Gender\u0027 is \u0027Not Specified\u0027 and \u0027Enrollment_Status\u0027 is \u0027Inactive\u0027", + "sql_context": "CREATE TABLE Graduate_Students (Student_ID INT, First_Name VARCHAR(50), Last_Name VARCHAR(50), Gender VARCHAR(20), Enrollment_Status VARCHAR(20));", + "sql": "DELETE FROM Graduate_Students WHERE Gender \u003d \u0027Not Specified\u0027 AND Enrollment_Status \u003d \u0027Inactive\u0027;", + "sql_explanation": "This query deletes records from the \u0027Graduate_Students\u0027 table where \u0027Gender\u0027 is \u0027Not Specified\u0027 and \u0027Enrollment_Status\u0027 is \u0027Inactive\u0027. It uses the DELETE statement, followed by the FROM keyword and the table name. A WHERE clause filters the records to be deleted based on the given conditions." +}, { + "id": "3507", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of research grants awarded to non-binary faculty members in the College of Education?", + "sql_context": "CREATE TABLE faculty (id INT, name VARCHAR(100), department VARCHAR(50), gender VARCHAR(50), grant_amount DECIMAL(10,2)); INSERT INTO faculty (id, name, department, gender, grant_amount) VALUES (1, \u0027Kavi\u0027, \u0027Education\u0027, \u0027Non-binary\u0027, 120000.00);", + "sql": "SELECT AVG(grant_amount) FROM faculty WHERE department \u003d \u0027Education\u0027 AND gender \u003d \u0027Non-binary\u0027;", + "sql_explanation": "This query calculates the average research grant amount awarded to non-binary faculty members in the College of Education. It does this by using the AVG function on the grant_amount column, while filtering for the Education department and gender of Non-binary." +}, { + "id": "3555", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update graduate student records with new addresses.", + "sql_context": "CREATE TABLE graduate_students (id INT PRIMARY KEY, name VARCHAR(50), department VARCHAR(50), address VARCHAR(100));", + "sql": "UPDATE graduate_students SET address \u003d \u0027123 Main St, San Francisco, CA\u0027 WHERE id IN (1, 3, 5);", + "sql_explanation": "This query updates the address of the students with id 1, 3 and 5 to \u0027123 Main St, San Francisco, CA\u0027. It does this by using the UPDATE statement and specifying the graduate_students table, and then using the WHERE clause to filter the results to only include students with id 1, 3 and 5." +}, { + "id": "3637", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new faculty member into the faculty table, with the faculty_id 3, name \u0027Charlie Green\u0027, and department \u0027Chemistry\u0027.", + "sql_context": "CREATE TABLE faculty (faculty_id INT, name TEXT, department TEXT);", + "sql": "INSERT INTO faculty (faculty_id, name, department) VALUES (3, \u0027Charlie Green\u0027, \u0027Chemistry\u0027);", + "sql_explanation": "The SQL query inserts a new record into the faculty table with the specified faculty_id, name, and department." +}, { + "id": "4321", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum research grant amount received by a graduate student in the Physics department?", + "sql_context": "CREATE TABLE graduate_students (id INT, name VARCHAR(50), department VARCHAR(50), grant_received DECIMAL(10,2), grant_year INT);", + "sql": "SELECT MAX(grant_received) FROM graduate_students WHERE department \u003d \u0027Physics\u0027;", + "sql_explanation": "This SQL query calculates the maximum research grant amount received by a graduate student in the Physics department. It does this by using the MAX function on the grant_received column and filtering the data where the department is Physics." +}, { + "id": "4635", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of publications from the Mathematics department?", + "sql_context": "CREATE TABLE Faculty (FacultyID int, Name varchar(50), Department varchar(50), NumPublications int); INSERT INTO Faculty (FacultyID, Name, Department, NumPublications) VALUES (1, \u0027John Doe\u0027, \u0027Mathematics\u0027, 15); INSERT INTO Faculty (FacultyID, Name, Department, NumPublications) VALUES (2, \u0027Jane Smith\u0027, \u0027Mathematics\u0027, 20); INSERT INTO Faculty (FacultyID, Name, Department, NumPublications) VALUES (3, \u0027Mary Johnson\u0027, \u0027Physics\u0027, 25); INSERT INTO Faculty (FacultyID, Name, Department, NumPublications) VALUES (4, \u0027Bob Brown\u0027, \u0027Physics\u0027, 10);", + "sql": "SELECT SUM(NumPublications) FROM Faculty WHERE Department \u003d \u0027Mathematics\u0027;", + "sql_explanation": "This query calculates the total number of publications from the Mathematics department by summing the NumPublications column and filtering for the Mathematics department." +}, { + "id": "4716", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total grant amount awarded to the \u0027Arts and Humanities\u0027 department", + "sql_context": "CREATE TABLE grants (id INT, department VARCHAR(20), amount FLOAT); INSERT INTO grants (id, department, amount) VALUES (1, \u0027Arts and Humanities\u0027, 50000.0), (2, \u0027Sciences\u0027, 75000.0);", + "sql": "SELECT SUM(amount) FROM grants WHERE department \u003d \u0027Arts and Humanities\u0027;", + "sql_explanation": "This query calculates the total grant amount awarded to the \u0027Arts and Humanities\u0027 department by summing the \u0027amount\u0027 column where the \u0027department\u0027 is \u0027Arts and Humanities\u0027." +}, { + "id": "4896", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many graduate students in the Engineering department have not published any papers?", + "sql_context": "CREATE TABLE EngineeringStudents(StudentID INT, NumPapers INT); INSERT INTO EngineeringStudents(StudentID, NumPapers) VALUES (1, 3), (2, 0), (3, NULL);", + "sql": "SELECT COUNT(StudentID) FROM EngineeringStudents WHERE NumPapers \u003d 0;", + "sql_explanation": "This query counts the number of Engineering graduate students who have not published any papers by filtering based on the number of papers." +}, { + "id": "5219", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the graduate students who have not published any research papers?", + "sql_context": "CREATE TABLE graduate_students (id INT, name VARCHAR(50), department VARCHAR(50), num_publications INT); INSERT INTO graduate_students (id, name, department, num_publications) VALUES (1, \u0027Hannah\u0027, \u0027Biology\u0027, 3), (2, \u0027Ike\u0027, \u0027Biology\u0027, 0), (3, \u0027Jasmine\u0027, \u0027Biology\u0027, 2);", + "sql": "SELECT name FROM graduate_students WHERE num_publications \u003d 0;", + "sql_explanation": "Simply filter the graduate_students table to only include rows where the number of publications is 0." +}, { + "id": "5238", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of research grants awarded to graduate students from Canada?", + "sql_context": "CREATE TABLE research_grants (id INT, student_id INT, country VARCHAR(50)); INSERT INTO research_grants (id, student_id, country) VALUES (1, 123, \u0027USA\u0027), (2, 456, \u0027Canada\u0027), (3, 789, \u0027Mexico\u0027), (4, 111, \u0027Canada\u0027), (5, 222, \u0027USA\u0027);", + "sql": "SELECT COUNT(*) FROM research_grants WHERE country \u003d \u0027Canada\u0027;", + "sql_explanation": "This query counts the number of research grants awarded to graduate students from Canada by filtering the country column for \u0027Canada\u0027 and then calculating the count of records." +}, { + "id": "5557", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all unique research grant IDs and their corresponding principal investigators.", + "sql_context": "CREATE TABLE ResearchGrants (GrantID INT, PI_Name VARCHAR(50), Department VARCHAR(50), GrantAmount DECIMAL(10,2)); INSERT INTO ResearchGrants (GrantID, PI_Name, Department, GrantAmount) VALUES (1, \u0027James Anderson\u0027, \u0027Chemistry\u0027, 50000); INSERT INTO ResearchGrants (GrantID, PI_Name, Department, GrantAmount) VALUES (2, \u0027Sophia Thompson\u0027, \u0027Biology\u0027, 75000); INSERT INTO ResearchGrants (GrantID, PI_Name, Department, GrantAmount) VALUES (3, \u0027Michael Brown\u0027, \u0027Physics\u0027, 100000);", + "sql": "SELECT DISTINCT GrantID, PI_Name FROM ResearchGrants;", + "sql_explanation": "This SQL query lists all unique research grant IDs and their corresponding principal investigators. It uses the DISTINCT keyword to ensure that each grant ID and PI name is only listed once, and the SELECT clause to specify the columns to include in the results." +}, { + "id": "5584", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum grant amount awarded to any faculty member in the College of Fine Arts and Design?", + "sql_context": "CREATE TABLE faculty (id INT, name VARCHAR(100), department VARCHAR(50), grant_amount DECIMAL(10,2)); INSERT INTO faculty (id, name, department, grant_amount) VALUES (1, \u0027Jules\u0027, \u0027Theater\u0027, 30000.00), (2, \u0027Visual Arts\u0027, 40000.00); CREATE VIEW fine_arts_departments AS SELECT * FROM faculty WHERE department LIKE \u0027Fine%\u0027 OR department LIKE \u0027Design%\u0027;", + "sql": "SELECT MAX(grant_amount) FROM fine_arts_departments;", + "sql_explanation": "This query calculates the maximum grant amount awarded to any faculty member in the College of Fine Arts and Design. It does this by using the MAX function on the grant_amount column, while filtering for the Fine Arts and Design departments using the fine_arts_departments view." +}, { + "id": "5850", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records from the students table", + "sql_context": "CREATE TABLE students (id INT, name VARCHAR(50), email VARCHAR(50)); INSERT INTO students (id, name, email) VALUES (1, \u0027John Doe\u0027, \u0027johndoe@example.com\u0027), (2, \u0027Jane Doe\u0027, \u0027janedoe@example.com\u0027);", + "sql": "DELETE FROM students;", + "sql_explanation": "This query deletes all records from the students table. Without a WHERE clause, the DELETE statement removes all rows." +}, { + "id": "1143", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many urban refugees have been supported by \u0027World Aid\u0027 in the \u0027Asia\u0027 region with over 40 years of age, in the last 3 years?", + "sql_context": "CREATE TABLE refugee (id INT, name VARCHAR(255), age INT, location VARCHAR(255), supported_by VARCHAR(255), support_date DATE); INSERT INTO refugee (id, name, age, location, supported_by, support_date) VALUES (1, \u0027John Doe\u0027, 45, \u0027Asia\u0027, \u0027World Aid\u0027, \u00272020-01-01\u0027);", + "sql": "SELECT COUNT(*) FROM refugee WHERE location \u003d \u0027Asia\u0027 AND supported_by \u003d \u0027World Aid\u0027 AND age \u003e 40 AND support_date BETWEEN DATE_SUB(CURDATE(), INTERVAL 3 YEAR) AND CURDATE();", + "sql_explanation": "This query counts the number of urban refugees by filtering the \u0027refugee\u0027 table based on the \u0027Asia\u0027 region, \u0027World Aid\u0027 organization, age over 40, and support date within the last 3 years." +}, { + "id": "1598", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which organizations had the highest number of successful interventions in \u0027Africa\u0027 in 2018?", + "sql_context": "CREATE TABLE organizations (id INT, name VARCHAR(255)); INSERT INTO organizations (id, name) VALUES (1, \u0027UNHCR\u0027), (2, \u0027IOM\u0027), (3, \u0027World Vision\u0027); CREATE TABLE interventions (id INT, organization_id INT, intervention_type VARCHAR(255), success INT, intervention_date DATE, location VARCHAR(255)); INSERT INTO interventions (id, organization_id, intervention_type, success, intervention_date, location) VALUES (1, 1, \u0027Food Assistance\u0027, 500, \u00272018-01-01\u0027, \u0027Africa\u0027), (2, 1, \u0027Health Services\u0027, 300, \u00272018-02-01\u0027, \u0027Asia\u0027), (3, 2, \u0027Food Assistance\u0027, 700, \u00272018-03-01\u0027, \u0027Africa\u0027), (4, 2, \u0027Health Services\u0027, 400, \u00272018-04-01\u0027, \u0027Europe\u0027), (5, 3, \u0027Food Assistance\u0027, 600, \u00272018-05-01\u0027, \u0027Africa\u0027), (6, 3, \u0027Health Services\u0027, 800, \u00272018-06-01\u0027, \u0027Asia\u0027);", + "sql": "SELECT organization_id, MAX(success) as highest_successful_interventions FROM interventions WHERE YEAR(intervention_date) \u003d 2018 AND location \u003d \u0027Africa\u0027;", + "sql_explanation": "The SQL query groups the interventions by organization_id, filters for the year 2018 and location \u0027Africa\u0027, and selects the maximum value of the success column for each group." +}, { + "id": "2384", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all the water projects funded by the EU in Malawi in 2019.", + "sql_context": "CREATE TABLE water_projects (project_name TEXT, funder TEXT, start_date DATE, end_date DATE, location TEXT); INSERT INTO water_projects (project_name, funder, start_date, end_date, location) VALUES (\u0027Mudi Dam\u0027, \u0027EU\u0027, \u00272019-01-10\u0027, \u00272021-06-30\u0027, \u0027Mangochi\u0027), (\u0027Likhubula Water Supply\u0027, \u0027EU\u0027, \u00272019-03-01\u0027, \u00272020-12-20\u0027, \u0027Phalombe\u0027), (\u0027Nkhata Bay Water Network\u0027, \u0027USAID\u0027, \u00272018-06-15\u0027, \u00272022-09-30\u0027, \u0027Nkhata Bay\u0027);", + "sql": "SELECT * FROM water_projects WHERE funder \u003d \u0027EU\u0027 AND start_date BETWEEN \u00272019-01-01\u0027 AND \u00272019-12-31\u0027 AND location \u003d \u0027Malawi\u0027;", + "sql_explanation": "The SQL query lists all the water projects funded by the EU in Malawi in 2019. It filters the \u0027water_projects\u0027 table to only include projects funded by the EU, located in Malawi, and started between January 1st and December 31st, 2019." +}, { + "id": "2495", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of funding received by the Red Cross for disaster relief in Nepal since 2015?", + "sql_context": "CREATE TABLE disaster_relief_funding (organization TEXT, funding_amount INTEGER, funding_date DATE); INSERT INTO disaster_relief_funding (organization, funding_amount, funding_date) VALUES (\u0027Red Cross\u0027, 500000, \u00272015-04-25\u0027), (\u0027World Vision\u0027, 300000, \u00272015-04-25\u0027), (\u0027CARE\u0027, 400000, \u00272017-08-24\u0027);", + "sql": "SELECT SUM(funding_amount) FROM disaster_relief_funding WHERE organization \u003d \u0027Red Cross\u0027 AND funding_date \u003e\u003d \u00272015-01-01\u0027;", + "sql_explanation": "The SQL query calculates the total amount of funding received by the Red Cross for disaster relief in Nepal since 2015. It filters the \u0027disaster_relief_funding\u0027 table to only include rows where the organization is \u0027Red Cross\u0027 and the funding_date is on or after 2015-01-01, and then uses the SUM function to calculate the total funding amount." +}, { + "id": "2656", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the name and nationality of the person who provided the most water assistance in Syria in 2021?", + "sql_context": "CREATE TABLE water_assistance (id INT, name TEXT, nationality TEXT, quantity INT, country TEXT, year INT); INSERT INTO water_assistance (id, name, nationality, quantity, country, year) VALUES (1, \u0027Ahmed Al-Masri\u0027, \u0027Syrian\u0027, 1000, \u0027Syria\u0027, 2021), (2, \u0027Fatima Al-Khalil\u0027, \u0027Syrian\u0027, 1200, \u0027Syria\u0027, 2021), (3, \u0027Ali Al-Said\u0027, \u0027Iraqi\u0027, 1500, \u0027Syria\u0027, 2021);", + "sql": "SELECT name, nationality FROM water_assistance WHERE country \u003d \u0027Syria\u0027 AND year \u003d 2021 ORDER BY quantity DESC LIMIT 1;", + "sql_explanation": "The SQL query selects the name and nationality columns from the water_assistance table, filters for records with country \u0027Syria\u0027 and year 2021, orders the results by quantity in descending order, and limits the result set to 1 row. This returns the name and nationality of the person who provided the most water assistance in Syria in 2021." +}, { + "id": "2860", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which education resources were distributed in South Sudan in Q4 2021?", + "sql_context": "CREATE TABLE education_resources (id INT, resource TEXT, quantity INT, country TEXT, quarter INT, year INT); INSERT INTO education_resources (id, resource, quantity, country, quarter, year) VALUES (1, \u0027Textbooks\u0027, 500, \u0027South Sudan\u0027, 4, 2021), (2, \u0027School Supplies\u0027, 300, \u0027South Sudan\u0027, 4, 2021), (3, \u0027Laptops\u0027, 200, \u0027South Sudan\u0027, 4, 2021);", + "sql": "SELECT DISTINCT resource FROM education_resources WHERE country \u003d \u0027South Sudan\u0027 AND quarter \u003d 4 AND year \u003d 2021;", + "sql_explanation": "The SQL query selects the DISTINCT resource column from the education_resources table, filters for records with country \u0027South Sudan\u0027, quarter 4, and year 2021. This returns the distinct education resources distributed in South Sudan in Q4 2021." +}, { + "id": "3087", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the name and location of ongoing disaster response projects in Haiti as of 2022?", + "sql_context": "CREATE TABLE disaster_response (id INT, location TEXT, year INT, ongoing BOOLEAN); INSERT INTO disaster_response (id, location, year, ongoing) VALUES (1, \u0027Haiti\u0027, 2022, TRUE), (2, \u0027Philippines\u0027, 2021, FALSE);", + "sql": "SELECT name, location FROM disaster_response WHERE location \u003d \u0027Haiti\u0027 AND year \u003d 2022 AND ongoing \u003d TRUE;", + "sql_explanation": "The SQL query selects the name and location columns from the disaster_response table where the location is equal to \u0027Haiti\u0027, the year is equal to 2022, and the ongoing column is equal to TRUE. This returns the names and locations of ongoing disaster response projects in Haiti as of 2022. Note that the table does not contain any name information, so this query will return an empty result set." +}, { + "id": "3215", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of water wells dug in \"Latin America\" since 2018?", + "sql_context": "CREATE TABLE water_wells (id INT, project_id INT, location VARCHAR(255), construction_date DATE); INSERT INTO water_wells (id, project_id, location, construction_date) VALUES (1, 4001, \u0027Colombia\u0027, \u00272019-05-01\u0027); INSERT INTO water_wells (id, project_id, location, construction_date) VALUES (2, 4002, \u0027Peru\u0027, \u00272018-02-01\u0027);", + "sql": "SELECT COUNT(*) FROM water_wells WHERE location \u003d \u0027Latin America\u0027 AND YEAR(construction_date) \u003e\u003d 2018;", + "sql_explanation": "Count the number of water wells dug in \"Latin America\" since 2018." +}, { + "id": "3521", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all disaster response teams that worked in \u0027Afghanistan\u0027 and \u0027Pakistan\u0027 from \u0027disaster_response\u0027 table.", + "sql_context": "CREATE TABLE disaster_response(id INT, team_name VARCHAR(255), location VARCHAR(255)); INSERT INTO disaster_response(id, team_name, location) VALUES (\u0027DR001\u0027, \u0027Team A\u0027, \u0027Afghanistan\u0027), (\u0027DR002\u0027, \u0027Team B\u0027, \u0027Pakistan\u0027), (\u0027DR003\u0027, \u0027Team C\u0027, \u0027Nepal\u0027);", + "sql": "SELECT DISTINCT team_name FROM disaster_response WHERE location IN (\u0027Afghanistan\u0027, \u0027Pakistan\u0027);", + "sql_explanation": "This query lists all unique team names that worked in Afghanistan and Pakistan by selecting distinct \u0027team_name\u0027 where \u0027location\u0027 is either \u0027Afghanistan\u0027 or \u0027Pakistan\u0027." +}, { + "id": "3779", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of people affected per disaster?", + "sql_context": "CREATE TABLE Disaster (id INT PRIMARY KEY, name VARCHAR(50), type VARCHAR(50), affected_region_id INT, FOREIGN KEY (affected_region_id) REFERENCES Region(id)); INSERT INTO Disaster (id, name, type, affected_region_id) VALUES (1, \u0027Flood\u0027, \u0027Water\u0027, 1); INSERT INTO Disaster (id, name, type, affected_region_id) VALUES (2, \u0027Earthquake\u0027, \u0027Geological\u0027, 2); CREATE TABLE Affected_Population (id INT PRIMARY KEY, disaster_id INT, population_size INT, FOREIGN KEY (disaster_id) REFERENCES Disaster(id)); INSERT INTO Affected_Population (id, disaster_id, population_size) VALUES (1, 1, 5000); INSERT INTO Affected_Population (id, disaster_id, population_size) VALUES (2, 2, 3000);", + "sql": "SELECT AVG(ap.population_size) as avg_population_per_disaster FROM Affected_Population ap;", + "sql_explanation": "The query calculates the average number of people affected per disaster by averaging the population_size column." +}, { + "id": "3923", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of funds spent on refugee support in Africa?", + "sql_context": "CREATE TABLE funds (id INT, category TEXT, region TEXT, amount DECIMAL(10,2)); INSERT INTO funds (id, category, region, amount) VALUES (1, \u0027Refugee Support\u0027, \u0027Middle East\u0027, 250000.00), (2, \u0027Disaster Response\u0027, \u0027Asia\u0027, 300000.00), (3, \u0027Community Development\u0027, \u0027Africa\u0027, 150000.00), (4, \u0027Refugee Support\u0027, \u0027Africa\u0027, 50000.00), (5, \u0027Refugee Support\u0027, \u0027Africa\u0027, 75000.00);", + "sql": "SELECT SUM(amount) FROM funds WHERE category \u003d \u0027Refugee Support\u0027 AND region \u003d \u0027Africa\u0027;", + "sql_explanation": "This query calculates the total amount of funds spent on refugee support in Africa by filtering the rows with \u0027Refugee Support\u0027 as the category and \u0027Africa\u0027 as the region, and then using the SUM function to find the total amount of funds." +}, { + "id": "4444", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all the unique disaster types that occurred in \u0027asia\u0027 region from the \u0027disaster_response\u0027 table?", + "sql_context": "CREATE TABLE disaster_response (id INT, disaster_type TEXT, location TEXT, response INT, year INT);", + "sql": "SELECT DISTINCT disaster_type FROM disaster_response WHERE location \u003d \u0027asia\u0027;", + "sql_explanation": "The SQL query lists all the unique disaster types that occurred in the \u0027asia\u0027 region from the \u0027disaster_response\u0027 table by selecting DISTINCT disaster_type where the location is \u0027asia\u0027." +}, { + "id": "4692", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of people served by a community center in \u0027Asia\u0027?", + "sql_context": "CREATE TABLE community_centers (center_id INT, name TEXT, location TEXT, people_served INT); INSERT INTO community_centers (center_id, name, location, people_served) VALUES (1, \u0027Center A\u0027, \u0027Asia\u0027, 500), (2, \u0027Center B\u0027, \u0027Europe\u0027, 800), (3, \u0027Center C\u0027, \u0027Asia\u0027, 1000);", + "sql": "SELECT MAX(people_served) FROM community_centers WHERE location \u003d \u0027Asia\u0027;", + "sql_explanation": "This query finds the maximum number of people served by a community center in \u0027Asia\u0027 using the MAX function on the people_served column and WHERE clause to filter the rows with \u0027Asia\u0027 as location." +}, { + "id": "4972", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total funding amount for projects in the \u0027Education\u0027 sector?", + "sql_context": "CREATE TABLE projects (id INT, sector TEXT, total_funding DECIMAL); INSERT INTO projects (id, sector, total_funding) VALUES (1, \u0027Health\u0027, 10000.00), (2, \u0027Education\u0027, 15000.00), (3, \u0027Agriculture\u0027, 20000.00);", + "sql": "SELECT SUM(total_funding) FROM projects WHERE sector \u003d \u0027Education\u0027;", + "sql_explanation": "This query calculates the total funding amount for projects in the \u0027Education\u0027 sector. It does so by summing the values in the \u0027total_funding\u0027 column, but only for rows where the \u0027sector\u0027 column is equal to \u0027Education\u0027." +}, { + "id": "5021", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many disaster response teams are there in Asia?", + "sql_context": "CREATE TABLE disaster_response_teams (id INT, name VARCHAR(100), region VARCHAR(50)); INSERT INTO disaster_response_teams (id, name, region) VALUES (1, \u0027Team A\u0027, \u0027Asia\u0027), (2, \u0027Team B\u0027, \u0027Africa\u0027), (3, \u0027Team C\u0027, \u0027Asia\u0027);", + "sql": "SELECT COUNT(*) FROM disaster_response_teams WHERE region \u003d \u0027Asia\u0027;", + "sql_explanation": "This query counts the number of disaster response teams in Asia. It does this by using the COUNT() function on all rows (*), and filtering the data to only include rows where the region is \u0027Asia\u0027." +}, { + "id": "5111", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the name and location of the refugee camp with the highest population?", + "sql_context": "CREATE TABLE camp (camp_id INT, name VARCHAR(50), location VARCHAR(50), population INT); INSERT INTO camp (camp_id, name, location, population) VALUES (1, \u0027Camp A\u0027, \u0027City A\u0027, 500), (2, \u0027Camp B\u0027, \u0027City B\u0027, 700), (3, \u0027Camp C\u0027, \u0027City C\u0027, 300);", + "sql": "SELECT name, location FROM camp ORDER BY population DESC LIMIT 1;", + "sql_explanation": "The query selects the name and location of the refugee camp with the highest population by ordering the camps in descending order based on the population and selecting the first record." +}, { + "id": "5173", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of medical supply distributions in Haiti?", + "sql_context": "CREATE TABLE medical_supplies (id INT, location VARCHAR(255), distribution_date DATE); INSERT INTO medical_supplies (id, location, distribution_date) VALUES (1, \u0027Haiti\u0027, \u00272022-01-01\u0027), (2, \u0027Syria\u0027, \u00272022-01-02\u0027), (3, \u0027Haiti\u0027, \u00272022-01-03\u0027);", + "sql": "SELECT COUNT(*) FROM medical_supplies WHERE location \u003d \u0027Haiti\u0027;", + "sql_explanation": "Counts the number of rows in the \u0027medical_supplies\u0027 table where the \u0027location\u0027 is \u0027Haiti\u0027." +}, { + "id": "2250", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many landfills are there in North America with a capacity greater than 10000 tons as of 2022?\u0027", + "sql_context": "CREATE TABLE landfills (country VARCHAR(50), capacity INT, year INT); INSERT INTO landfills (country, capacity, year) VALUES (\u0027Mexico\u0027, 15000, 2022), (\u0027Canada\u0027, 13000, 2022), (\u0027USA\u0027, 11000, 2022);", + "sql": "SELECT COUNT(*) as num_landfills FROM landfills WHERE capacity \u003e 10000 AND year \u003d 2022 AND country IN (\u0027Mexico\u0027, \u0027Canada\u0027, \u0027USA\u0027);", + "sql_explanation": "This SQL query calculates the number of landfills in North America with a capacity greater than 10000 tons as of 2022. It does this by filtering the data for the year 2022 and countries in North America, and then counting the number of landfills with a capacity greater than 10000 tons." +}, { + "id": "2427", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total waste generated by residential sectors in the city of San Francisco in 2020?", + "sql_context": "CREATE TABLE waste_generation (id INT, sector VARCHAR(20), location VARCHAR(20), amount DECIMAL(10,2), date DATE); INSERT INTO waste_generation (id, sector, location, amount, date) VALUES (1, \u0027residential\u0027, \u0027San Francisco\u0027, 500, \u00272020-01-01\u0027);", + "sql": "SELECT SUM(amount) FROM waste_generation WHERE sector \u003d \u0027residential\u0027 AND location \u003d \u0027San Francisco\u0027 AND date \u003d \u00272020-01-01\u0027;", + "sql_explanation": "This query calculates the total waste generated by residential sectors in San Francisco in 2020 by summing the \u0027amount\u0027 column in the \u0027waste_generation\u0027 table where the \u0027sector\u0027 is \u0027residential\u0027, \u0027location\u0027 is \u0027San Francisco\u0027 and \u0027date\u0027 is \u00272020-01-01\u0027." +}, { + "id": "2810", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records of waste generation from the residential sector in New York City in 2021.", + "sql_context": "CREATE TABLE waste_generation (id INT, sector VARCHAR(20), location VARCHAR(20), amount DECIMAL(10,2), date DATE); INSERT INTO waste_generation (id, sector, location, amount, date) VALUES (1, \u0027residential\u0027, \u0027New York City\u0027, 500, \u00272021-01-01\u0027);", + "sql": "DELETE FROM waste_generation WHERE sector \u003d \u0027residential\u0027 AND location \u003d \u0027New York City\u0027 AND date \u003d \u00272021-01-01\u0027;", + "sql_explanation": "This query deletes records of waste generation from the residential sector in New York City in 2021 by filtering for the \u0027sector\u0027 of \u0027residential\u0027, \u0027location\u0027 of \u0027New York City\u0027, and \u0027date\u0027 of \u00272021-01-01\u0027 in the \u0027waste_generation\u0027 table." +}, { + "id": "3013", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total landfill capacity for European countries?", + "sql_context": "CREATE TABLE LandfillCapacities (country VARCHAR(50), capacity INT); INSERT INTO LandfillCapacities (country, capacity) VALUES (\u0027Germany\u0027, 120000), (\u0027France\u0027, 90000), (\u0027UK\u0027, 80000);", + "sql": "SELECT SUM(capacity) FROM LandfillCapacities WHERE country IN (\u0027Germany\u0027, \u0027France\u0027, \u0027UK\u0027, \u0027Italy\u0027, \u0027Spain\u0027);", + "sql_explanation": "Calculate the total landfill capacity for European countries by selecting the sum of capacity from the LandfillCapacities table where the country is in the given list." +}, { + "id": "3019", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the recycling rates for each material type in 2020, and calculate the overall recycling rate for that year.", + "sql_context": "CREATE TABLE recycling_rates(material_type TEXT, recycling_rate DECIMAL(3,2), recycling_year INT); INSERT INTO recycling_rates(material_type, recycling_rate, recycling_year) VALUES(\u0027Plastic\u0027, 0.35, 2020), (\u0027Glass\u0027, 0.60, 2020), (\u0027Paper\u0027, 0.75, 2020);", + "sql": "SELECT SUM(recycling_rate) / COUNT(DISTINCT material_type) FROM recycling_rates WHERE recycling_year \u003d 2020;", + "sql_explanation": "The SQL query calculates the overall recycling rate for 2020 by summing the recycling_rate for all material types and dividing by the count of distinct material types, filtered for the year 2020." +}, { + "id": "3180", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total waste generated in the industrial sector in the state of California?", + "sql_context": "CREATE TABLE waste_generation_state (sector VARCHAR(20), state VARCHAR(20), waste_quantity INT); INSERT INTO waste_generation_state (sector, state, waste_quantity) VALUES (\u0027residential\u0027, \u0027California\u0027, 2000), (\u0027commercial\u0027, \u0027California\u0027, 2500), (\u0027industrial\u0027, \u0027California\u0027, 4000);", + "sql": "SELECT waste_quantity FROM waste_generation_state WHERE sector \u003d \u0027industrial\u0027 AND state \u003d \u0027California\u0027;", + "sql_explanation": "* This SQL query selects the waste_quantity from the waste_generation_state table where the sector is \u0027industrial\u0027 and the state is \u0027California\u0027 to find the total waste generated in the industrial sector in the state of California." +}, { + "id": "3205", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a table \u0027waste_generation\u0027 with columns id, region, year, and metric", + "sql_context": "CREATE TABLE waste_generation ( id INT PRIMARY KEY, region VARCHAR(50), year INT, metric DECIMAL(5,2));", + "sql": "CREATE TABLE waste_generation ( id INT PRIMARY KEY, region VARCHAR(50), year INT, metric DECIMAL(5,2));", + "sql_explanation": "This SQL statement creates a table named \u0027waste_generation\u0027 with columns \u0027id\u0027, \u0027region\u0027, \u0027year\u0027, and \u0027metric\u0027. It uses the CREATE TABLE statement and specifies the column names and data types." +}, { + "id": "3225", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which circular economy initiatives have the highest recycling rates?", + "sql_context": "CREATE TABLE circular_initiatives (id INT, initiative_name VARCHAR(50), recycling_rate FLOAT);", + "sql": "SELECT initiative_name, recycling_rate FROM circular_initiatives ORDER BY recycling_rate DESC LIMIT 1;", + "sql_explanation": "The SQL query selects the initiative_name and recycling_rate columns from the circular_initiatives table and orders the rows by recycling_rate in descending order, returning only the top row." +}, { + "id": "3312", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new recycling plant with name \u0027Recycling Plant 6\u0027 in Canada that processes 5 types of waste.", + "sql_context": "CREATE TABLE recycling_plants (name TEXT, country TEXT, waste_types INTEGER); INSERT INTO recycling_plants (name, country, waste_types) VALUES (\u0027Recycling Plant 1\u0027, \u0027Canada\u0027, 6), (\u0027Recycling Plant 2\u0027, \u0027Canada\u0027, 4);", + "sql": "INSERT INTO recycling_plants (name, country, waste_types) VALUES (\u0027Recycling Plant 6\u0027, \u0027Canada\u0027, 5);", + "sql_explanation": "The SQL query inserts a new row into the recycling_plants table with the values name \u003d \u0027Recycling Plant 6\u0027, country \u003d \u0027Canada\u0027, waste_types \u003d 5." +}, { + "id": "3471", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average monthly waste generation in the residential sector in the city of Seattle?", + "sql_context": "CREATE TABLE waste_generation (city varchar(255), sector varchar(255), waste_amount float, generation_date date); INSERT INTO waste_generation (city, sector, waste_amount, generation_date) VALUES (\u0027Seattle\u0027, \u0027Residential\u0027, 1500, \u00272022-01-01\u0027); INSERT INTO waste_generation (city, sector, waste_amount, generation_date) VALUES (\u0027Seattle\u0027, \u0027Residential\u0027, 1600, \u00272022-02-01\u0027);", + "sql": "SELECT AVG(waste_amount) FROM waste_generation WHERE city \u003d \u0027Seattle\u0027 AND sector \u003d \u0027Residential\u0027", + "sql_explanation": "This query calculates the average monthly waste generation in the residential sector in the city of Seattle by taking the average of waste_amount from the waste_generation table where city is \u0027Seattle\u0027 and sector is \u0027Residential\u0027." +}, { + "id": "3566", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the landfill capacity for the country of South Africa for the year 2030?", + "sql_context": "CREATE TABLE country_landfill_capacity (country VARCHAR(20), year INT, capacity INT); INSERT INTO country_landfill_capacity (country, year, capacity) VALUES (\u0027South Africa\u0027, 2030, 8000000);", + "sql": "SELECT capacity FROM country_landfill_capacity WHERE country \u003d \u0027South Africa\u0027 AND year \u003d 2030;", + "sql_explanation": "This query calculates the landfill capacity for the country of South Africa for the year 2030. It does so by selecting the capacity for South Africa in the country_landfill_capacity table for the year 2030." +}, { + "id": "3649", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total waste generated in India in the year 2020?", + "sql_context": "CREATE TABLE WasteGeneration (country VARCHAR(50), year INT, waste_generated_kg FLOAT);", + "sql": "SELECT SUM(waste_generated_kg) FROM WasteGeneration WHERE country \u003d \u0027India\u0027 AND year \u003d 2020;", + "sql_explanation": "The SQL query calculates the total waste generated in India in the year 2020 by selecting the sum of waste_generated_kg values for records with \u0027India\u0027 as the country and \u00272020\u0027 as the year." +}, { + "id": "3656", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average landfill capacity used by the \u0027City\u0027 and \u0027County\u0027 sectors, excluding records with a capacity of 0.", + "sql_context": "CREATE TABLE landfill_data (sector VARCHAR(20), capacity FLOAT); INSERT INTO landfill_data (sector, capacity) VALUES (\u0027City\u0027, 500), (\u0027City\u0027, 600), (\u0027County\u0027, 700), (\u0027County\u0027, 800), (\u0027Private\u0027, 900), (\u0027State\u0027, 1000);", + "sql": "SELECT AVG(capacity) FROM landfill_data WHERE sector IN (\u0027City\u0027, \u0027County\u0027) AND capacity \u003e 0;", + "sql_explanation": "This query calculates the average landfill capacity used by the \u0027City\u0027 and \u0027County\u0027 sectors, excluding records with a capacity of 0. It does this by using the AVG function to calculate the average \u0027capacity\u0027 value for the \u0027City\u0027 and \u0027County\u0027 sectors, as determined by the WHERE clause." +}, { + "id": "3907", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the recycling rate for the state of Texas for the year 2020?\u0027", + "sql_context": "CREATE TABLE state_recycling_rates (state VARCHAR(20), year INT, recycling_rate FLOAT); INSERT INTO state_recycling_rates (state, year, recycling_rate) VALUES (\u0027Texas\u0027, 2020, 0.25);", + "sql": "SELECT recycling_rate FROM state_recycling_rates WHERE state \u003d \u0027Texas\u0027 AND year \u003d 2020;", + "sql_explanation": "This query calculates the recycling rate for the state of Texas for the year 2020. It does so by selecting the recycling_rate for the state of Texas in the state_recycling_rates table for the year 2020." +}, { + "id": "3945", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average waste generation per capita in the Asian region?", + "sql_context": "CREATE TABLE WasteGeneration (country VARCHAR(255), waste_generation_kg_per_capita DECIMAL(5,2), region VARCHAR(255)); INSERT INTO WasteGeneration (country, waste_generation_kg_per_capita, region) VALUES (\u0027Japan\u0027, 3.2, \u0027Asia\u0027), (\u0027China\u0027, 5.1, \u0027Asia\u0027), (\u0027India\u0027, 1.7, \u0027Asia\u0027);", + "sql": "SELECT AVG(waste_generation_kg_per_capita) FROM WasteGeneration WHERE region \u003d \u0027Asia\u0027;", + "sql_explanation": "Calculate the average waste generation per capita for the records with the \u0027Asia\u0027 region." +}, { + "id": "4148", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new landfill capacity records for the \u0027East\u0027 region in 2023 with a capacity of 180000 cubic meters.", + "sql_context": "CREATE TABLE landfill_capacity(region VARCHAR(20), year INT, capacity INT);", + "sql": "INSERT INTO landfill_capacity(region, year, capacity) VALUES(\u0027East\u0027, 2023, 180000);", + "sql_explanation": "Inserts a new record into the landfill_capacity table for the \u0027East\u0027 region, the year 2023, and a capacity of 180000 cubic meters." +}, { + "id": "4303", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total landfill capacity (in cubic meters) for the \u0027eastern\u0027 region in 2022?", + "sql_context": "CREATE TABLE landfill_capacity(region VARCHAR(10), year INT, capacity INT); INSERT INTO landfill_capacity VALUES(\u0027eastern\u0027, 2021, 500000), (\u0027eastern\u0027, 2022, 550000), (\u0027western\u0027, 2021, 600000), (\u0027western\u0027, 2022, 650000);", + "sql": "SELECT capacity FROM landfill_capacity WHERE region \u003d \u0027eastern\u0027 AND year \u003d 2022;", + "sql_explanation": "The SQL query returns the landfill capacity for the eastern region in the year 2022." +}, { + "id": "4595", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total waste generation in the city of Toronto in 2021?", + "sql_context": "CREATE TABLE waste_generation(city VARCHAR(20), year INT, amount INT); INSERT INTO waste_generation VALUES(\u0027Toronto\u0027, 2021, 250000);", + "sql": "SELECT amount FROM waste_generation WHERE city \u003d \u0027Toronto\u0027 AND year \u003d 2021;", + "sql_explanation": "This query selects the amount of waste generated in the city of Toronto in the year 2021. It filters the waste_generation table based on the city and year columns and returns the corresponding amount." +}, { + "id": "4840", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the recycling rate for each country in 2019 from the \u0027recycling_rates\u0027 table", + "sql_context": "CREATE TABLE recycling_rates (country VARCHAR(50), year INT, recycling_rate FLOAT);", + "sql": "SELECT country, recycling_rate FROM recycling_rates WHERE year \u003d 2019;", + "sql_explanation": "This query retrieves the recycling rate for each country in 2019 by selecting the \u0027country\u0027 and \u0027recycling_rate\u0027 columns where the \u0027year\u0027 column value is 2019." +}, { + "id": "4930", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete waste generation records for the \u0027South\u0027 region in the year 2021.", + "sql_context": "CREATE TABLE waste_generation(region VARCHAR(20), year INT, waste_gram INT); INSERT INTO waste_generation(region, year, waste_gram) VALUES(\u0027North\u0027, 2021, 50000),(\u0027North\u0027, 2022, 60000),(\u0027South\u0027, 2021, 40000),(\u0027South\u0027, 2022, 70000);", + "sql": "DELETE FROM waste_generation WHERE region \u003d \u0027South\u0027 AND year \u003d 2021;", + "sql_explanation": "Filters the waste_generation table for the \u0027South\u0027 region and the year 2021, then deletes those records." +}, { + "id": "5220", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the circular economy initiative in the city of London?", + "sql_context": "CREATE TABLE circular_economy (city VARCHAR(255), initiative VARCHAR(255)); INSERT INTO circular_economy (city, initiative) VALUES (\u0027London\u0027, \u0027Waste to Wealth\u0027);", + "sql": "SELECT initiative FROM circular_economy WHERE city \u003d \u0027London\u0027;", + "sql_explanation": "This query selects the circular economy initiative in the city of London from the circular_economy table." +}, { + "id": "5356", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the landfill capacity for each region?", + "sql_context": "CREATE TABLE region_landfill (region_id INT, region_name VARCHAR(50), landfill_capacity INT); INSERT INTO region_landfill (region_id, region_name, landfill_capacity) VALUES (1, \u0027RegionA\u0027, 500000), (2, \u0027RegionB\u0027, 350000);", + "sql": "SELECT region_name, landfill_capacity FROM region_landfill;", + "sql_explanation": "This query simply selects the region_name and landfill_capacity from the region_landfill table." +}, { + "id": "5409", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records from the \u0027waste_generation\u0027 table where the \u0027waste_type\u0027 is \u0027plastic\u0027", + "sql_context": "CREATE TABLE waste_generation (id INT, city VARCHAR(255), state VARCHAR(255), country VARCHAR(255), waste_type VARCHAR(255), amount INT);", + "sql": "DELETE FROM waste_generation WHERE waste_type \u003d \u0027plastic\u0027;", + "sql_explanation": "This query deletes all records from the \u0027waste_generation\u0027 table where the \u0027waste_type\u0027 is \u0027plastic\u0027." +}, { + "id": "5512", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records from the \u0027landfill_capacity\u0027 table where the \u0027country\u0027 is \u0027Mexico\u0027", + "sql_context": "CREATE TABLE landfill_capacity (id INT, city VARCHAR(255), state VARCHAR(255), country VARCHAR(255), capacity INT);", + "sql": "DELETE FROM landfill_capacity WHERE country \u003d \u0027Mexico\u0027;", + "sql_explanation": "This query deletes all records from the \u0027landfill_capacity\u0027 table where the \u0027country\u0027 is \u0027Mexico\u0027." +}, { + "id": "5514", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total CO2 emission from waste management in Canada?", + "sql_context": "CREATE TABLE CanadianWasteData (waste_type VARCHAR(50), CO2_emission_tonnes FLOAT); INSERT INTO CanadianWasteData (waste_type, CO2_emission_tonnes) VALUES (\u0027Municipal Solid Waste\u0027, 10.5), (\u0027Industrial Waste\u0027, 41.7), (\u0027Hazardous Waste\u0027, 2.9);", + "sql": "SELECT SUM(CO2_emission_tonnes) FROM CanadianWasteData;", + "sql_explanation": "Calculate the total CO2 emission from waste management in Canada by summing the CO2_emission_tonnes column." +}, { + "id": "5581", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum recycling rate for any material?", + "sql_context": "CREATE TABLE Recycling_Rates_All (material VARCHAR(20), region VARCHAR(20), recycling_rate DECIMAL(4,2)); INSERT INTO Recycling_Rates_All (material, region, recycling_rate) VALUES (\u0027Glass\u0027, \u0027East\u0027, 0.60), (\u0027Paper\u0027, \u0027East\u0027, 0.75), (\u0027Plastic\u0027, \u0027West\u0027, 0.55), (\u0027Metal\u0027, \u0027North\u0027, 0.80), (\u0027Glass\u0027, \u0027West\u0027, 0.70), (\u0027Paper\u0027, \u0027West\u0027, 0.65), (\u0027Metal\u0027, \u0027West\u0027, 0.85);", + "sql": "SELECT MAX(recycling_rate) FROM Recycling_Rates_All;", + "sql_explanation": "The SQL query calculates the maximum recycling rate for any material by using the MAX function." +}, { + "id": "2662", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the average price per gram of cannabis in Alaska in the third quarter of 2020?", + "sql_context": "CREATE TABLE sales (id INT, state VARCHAR(20), price DECIMAL(10,2), weight DECIMAL(10,2), month INT, year INT);", + "sql": "SELECT AVG(price/weight) FROM sales WHERE state \u003d \u0027Alaska\u0027 AND (month \u003d 7 OR month \u003d 8 OR month \u003d 9) AND year \u003d 2020;", + "sql_explanation": "This query calculates the average price per gram of cannabis in Alaska in the third quarter of 2020 by dividing the price column by the weight column in the sales table where the state is Alaska and the month is between 7 and 9 and the year is 2020, and then averaging the result." +}, { + "id": "3237", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new sale for the state of Washington in Q3 2022 with a revenue of 20000 and a strain of \"Purple Haze\"", + "sql_context": "CREATE TABLE sales (id INT, state VARCHAR(50), quarter VARCHAR(10), strain VARCHAR(50), revenue INT);", + "sql": "INSERT INTO sales (state, quarter, strain, revenue) VALUES (\u0027Washington\u0027, \u0027Q3\u0027, \u0027Purple Haze\u0027, 20000);", + "sql_explanation": "This query inserts a new sale for the state of Washington in Q3 2022 with a revenue of 20000 and a strain of \"Purple Haze\" by inserting a new row into the sales table with the specified values." +}, { + "id": "3466", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the average price per gram for Indica strains in Oregon in 2021?", + "sql_context": "CREATE TABLE prices (id INT, state VARCHAR(50), year INT, strain_type VARCHAR(50), price FLOAT); INSERT INTO prices (id, state, year, strain_type, price) VALUES (1, \u0027Oregon\u0027, 2021, \u0027Indica\u0027, 12.5), (2, \u0027Oregon\u0027, 2021, \u0027Sativa\u0027, 15.0), (3, \u0027California\u0027, 2021, \u0027Hybrid\u0027, 13.7);", + "sql": "SELECT AVG(price) FROM prices WHERE state \u003d \u0027Oregon\u0027 AND year \u003d 2021 AND strain_type \u003d \u0027Indica\u0027;", + "sql_explanation": "This query calculates the average price per gram for Indica strains in Oregon in 2021 by averaging the price column where state is Oregon, year is 2021, and strain type is Indica." +}, { + "id": "3724", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the total number of dispensaries in the state of Washington as of January 1, 2023.", + "sql_context": "CREATE TABLE if not exists dispensaries(id INT, state VARCHAR(50), open_date DATE);INSERT INTO dispensaries(id, state, open_date) VALUES (1, \u0027Washington\u0027, \u00272020-01-01\u0027), (2, \u0027Washington\u0027, \u00272021-01-01\u0027), (3, \u0027Washington\u0027, \u00272022-01-01\u0027);", + "sql": "SELECT COUNT(*) FROM dispensaries WHERE state \u003d \u0027Washington\u0027 AND open_date \u003c\u003d \u00272023-01-01\u0027;", + "sql_explanation": "This query counts the number of dispensaries in the state of Washington as of January 1, 2023." +}, { + "id": "3870", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue generated by cannabis sales in New York in Q1 2022?", + "sql_context": "CREATE TABLE ny_sales (revenue DECIMAL(10,2), quarter VARCHAR(10), year INT); INSERT INTO ny_sales (revenue, quarter, year) VALUES (50000, \u0027Q1\u0027, 2022), (60000, \u0027Q1\u0027, 2022), (40000, \u0027Q1\u0027, 2022);", + "sql": "SELECT SUM(revenue) as total_revenue FROM ny_sales WHERE quarter \u003d \u0027Q1\u0027 AND year \u003d 2022;", + "sql_explanation": "This query calculates the total revenue generated by cannabis sales in New York in Q1 2022 by summing up the revenue of sales." +}, { + "id": "4284", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many dispensaries in Michigan have a loyalty program?", + "sql_context": "CREATE TABLE dispensaries (id INT, name VARCHAR(50), state VARCHAR(50), loyalty_program BOOLEAN);", + "sql": "SELECT COUNT(*) FROM dispensaries WHERE state \u003d \u0027MI\u0027 AND loyalty_program \u003d TRUE;", + "sql_explanation": "The query counts the number of dispensaries in Michigan with a loyalty program by filtering the \u0027dispensaries\u0027 table for rows with the state \u0027MI\u0027 and a loyalty_program value of true." +}, { + "id": "4836", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the \u0027purchase_date\u0027 of the customer with id 1 in the \u0027dispensary_sales\u0027 table to \u00272022-02-01\u0027", + "sql_context": "CREATE TABLE dispensary_sales (id INT, customer_name VARCHAR(50), state VARCHAR(20), purchase_date DATE); INSERT INTO dispensary_sales (id, customer_name, state, purchase_date) VALUES (1, \u0027John Doe\u0027, \u0027Colorado\u0027, \u00272022-01-01\u0027); INSERT INTO dispensary_sales (id, customer_name, state, purchase_date) VALUES (2, \u0027Jane Smith\u0027, \u0027California\u0027, \u00272022-01-02\u0027);", + "sql": "UPDATE dispensary_sales SET purchase_date \u003d \u00272022-02-01\u0027 WHERE id \u003d 1;", + "sql_explanation": "The SQL query updates the \u0027purchase_date\u0027 of the customer with id 1 in the \u0027dispensary_sales\u0027 table to \u00272022-02-01\u0027." +}, { + "id": "4931", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many grower licenses have been issued in Oregon since 2020?", + "sql_context": "CREATE TABLE GrowerLicenses (IssueDate DATE, LicenseNumber INTEGER); INSERT INTO GrowerLicenses (IssueDate, LicenseNumber) VALUES (\u00272019-01-01\u0027, 1001), (\u00272020-01-01\u0027, 2001), (\u00272021-01-01\u0027, 3001);", + "sql": "SELECT COUNT(*) FROM GrowerLicenses WHERE IssueDate \u003e\u003d \u00272020-01-01\u0027;", + "sql_explanation": "The SQL query counts the number of rows in the GrowerLicenses table where the IssueDate is on or after January 1, 2020." +}, { + "id": "5033", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records for \u0027Green Crack\u0027 from the strain testing results table.", + "sql_context": "CREATE TABLE StrainTestingResults (StrainName VARCHAR(255), Lab VARCHAR(255), THCResult DECIMAL(5,2), CBNResult DECIMAL(5,2), DateTested DATE);", + "sql": "DELETE FROM StrainTestingResults WHERE StrainName \u003d \u0027Green Crack\u0027;", + "sql_explanation": "This query deletes all records for \u0027Green Crack\u0027 from the StrainTestingResults table." +}, { + "id": "5541", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records from the \u0027dispensaries\u0027 table where the address is \u0027456 Elm St\u0027", + "sql_context": "CREATE TABLE dispensaries (dispensary_id INT, name VARCHAR(255), address VARCHAR(255));", + "sql": "DELETE FROM dispensaries WHERE address \u003d \u0027456 Elm St\u0027;", + "sql_explanation": "This query deletes records from the \u0027dispensaries\u0027 table where the address is \u0027456 Elm St\u0027." +}, { + "id": "5717", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all dispensaries from the state of \u0027OR\u0027.", + "sql_context": "CREATE TABLE dispensaries (id INT, name TEXT, state TEXT, revenue FLOAT); INSERT INTO dispensaries (id, name, state, revenue) VALUES (1, \u0027Dispensary A\u0027, \u0027CA\u0027, 200000.00), (2, \u0027Dispensary B\u0027, \u0027CA\u0027, 300000.00), (3, \u0027Dispensary C\u0027, \u0027OR\u0027, 400000.00), (4, \u0027Dispensary D\u0027, \u0027OR\u0027, 500000.00), (5, \u0027Dispensary E\u0027, \u0027WA\u0027, 600000.00), (6, \u0027Dispensary F\u0027, \u0027WA\u0027, 700000.00);", + "sql": "DELETE FROM dispensaries WHERE state \u003d \u0027OR\u0027;", + "sql_explanation": "This query deletes all rows from the dispensaries table where the state is OR." +}, { + "id": "2093", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the names, locations, and construction dates of all wind farms in Texas and Oklahoma, along with their respective power generation capacities and the number of wind turbines.", + "sql_context": "CREATE TABLE WindFarms (WindFarmID INT, Name VARCHAR(255), State VARCHAR(255), ConstructionDate DATE, PowerGenerationCapacity INT, WindTurbineCount INT); INSERT INTO WindFarms VALUES (1, \u0027Wind Farm A\u0027, \u0027Texas\u0027, \u00272010-05-15\u0027, 50, 10); INSERT INTO WindFarms VALUES (2, \u0027Wind Farm B\u0027, \u0027Oklahoma\u0027, \u00272012-02-23\u0027, 60, 12); INSERT INTO WindFarms VALUES (3, \u0027Wind Farm C\u0027, \u0027Texas\u0027, \u00272015-08-07\u0027, 40, 8);", + "sql": "SELECT Name, Location, ConstructionDate, PowerGenerationCapacity, WindTurbineCount FROM WindFarms WHERE State IN (\u0027Texas\u0027, \u0027Oklahoma\u0027);", + "sql_explanation": "This SQL query filters the WindFarms table by State, selecting the Name, Location, ConstructionDate, PowerGenerationCapacity, and WindTurbineCount columns for wind farms in Texas and Oklahoma." +}, { + "id": "2112", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many bridges were built in the Northeast region before 2010?", + "sql_context": "CREATE TABLE InfrastructureProjects (id INT, name VARCHAR(100), region VARCHAR(50), project_type VARCHAR(50), completion_date DATE); INSERT INTO InfrastructureProjects (id, name, region, project_type, completion_date) VALUES (1, \u0027Boston Bridge\u0027, \u0027Northeast\u0027, \u0027bridge\u0027, \u00272005-01-01\u0027);", + "sql": "SELECT COUNT(*) FROM InfrastructureProjects WHERE region \u003d \u0027Northeast\u0027 AND project_type \u003d \u0027bridge\u0027 AND completion_date \u003c \u00272010-01-01\u0027;", + "sql_explanation": "The SQL query counts the number of bridges built in the Northeast region before 2010 by using the COUNT function on the wildcard (*), filtering the data with the WHERE clause to only include bridges from the Northeast region and those completed before 2010." +}, { + "id": "2156", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many highways were built in the Eastern region before 2010?", + "sql_context": "CREATE TABLE InfrastructureProjects (id INT, name VARCHAR(100), region VARCHAR(50), project_type VARCHAR(50), completion_date DATE); INSERT INTO InfrastructureProjects (id, name, region, project_type, completion_date) VALUES (1, \u0027Philadelphia Highway\u0027, \u0027Eastern\u0027, \u0027highway\u0027, \u00272005-01-01\u0027);", + "sql": "SELECT COUNT(*) FROM InfrastructureProjects WHERE region \u003d \u0027Eastern\u0027 AND project_type \u003d \u0027highway\u0027 AND completion_date \u003c \u00272010-01-01\u0027;", + "sql_explanation": "The SQL query counts the number of highways built in the Eastern region before 2010 by using the COUNT function on the wildcard (*), filtering the data with the WHERE clause to only include highways from the Eastern region and those completed before 2010." +}, { + "id": "2844", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum cost of electrical substation projects in the Europe region?", + "sql_context": "CREATE TABLE InfrastructureProjects (id INT, name VARCHAR(100), region VARCHAR(50), project_type VARCHAR(50), cost FLOAT); INSERT INTO InfrastructureProjects (id, name, region, project_type, cost) VALUES (1, \u0027Paris Electrical Substation\u0027, \u0027Europe\u0027, \u0027electrical substation\u0027, 15000000);", + "sql": "SELECT MIN(cost) FROM InfrastructureProjects WHERE region \u003d \u0027Europe\u0027 AND project_type \u003d \u0027electrical substation\u0027;", + "sql_explanation": "This SQL query calculates the minimum cost of electrical substation projects in the Europe region by using the MIN function on the cost column, filtering the data with the WHERE clause to only include electrical substation projects from the Europe region." +}, { + "id": "3296", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many seismic retrofits were completed in \u0027Seattle\u0027 since 2017?", + "sql_context": "CREATE TABLE seismic_retrofits (id INT, retrofit_number TEXT, location TEXT, cost INT, completion_date DATE); INSERT INTO seismic_retrofits (id, retrofit_number, location, cost, completion_date) VALUES (1, \u0027SEA-1234\u0027, \u0027Seattle\u0027, 400000, \u00272017-09-15\u0027); INSERT INTO seismic_retrofits (id, retrofit_number, location, cost, completion_date) VALUES (2, \u0027SEA-5678\u0027, \u0027Seattle\u0027, 350000, \u00272018-04-07\u0027);", + "sql": "SELECT COUNT(*) FROM seismic_retrofits WHERE location \u003d \u0027Seattle\u0027 AND YEAR(completion_date) \u003e\u003d 2017;", + "sql_explanation": "The SQL query counts the number of seismic retrofits completed in Seattle since 2017 by using the COUNT function with no specified column (which counts rows) and filters the records using the WHERE clause to only include retrofits in Seattle with a completion_date since 2017." +}, { + "id": "3311", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total cost of bridge construction projects in \u0027Texas\u0027 in 2018?", + "sql_context": "CREATE TABLE bridge_construction (id INT, project_name TEXT, location TEXT, cost INT, completion_date DATE); INSERT INTO bridge_construction (id, project_name, location, cost, completion_date) VALUES (1, \u0027Houston Tunnel\u0027, \u0027Texas\u0027, 12000000, \u00272018-08-25\u0027); INSERT INTO bridge_construction (id, project_name, location, cost, completion_date) VALUES (2, \u0027Dallas Overpass\u0027, \u0027Texas\u0027, 9000000, \u00272018-12-11\u0027);", + "sql": "SELECT SUM(cost) FROM bridge_construction WHERE location \u003d \u0027Texas\u0027 AND YEAR(completion_date) \u003d 2018;", + "sql_explanation": "The SQL query calculates the total cost of bridge construction projects in Texas in 2018 by using the SUM function on the \u0027cost\u0027 column and filters the records using the WHERE clause to only include projects in Texas with a completion_date in 2018." +}, { + "id": "3334", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names, locations, and lengths of canals constructed between 1960 and 1980?", + "sql_context": "CREATE TABLE canals (id INT, name TEXT, location TEXT, length INT, type TEXT, year INT); INSERT INTO canals (id, name, location, length, type, year) VALUES (1, \u0027Panama\u0027, \u0027Panama\u0027, 81700, \u0027Transport\u0027, 1914); INSERT INTO canals (id, name, location, length, type, year) VALUES (2, \u0027Suez\u0027, \u0027Egypt\u0027, 120, \u0027Transport\u0027, 1869);", + "sql": "SELECT name, location, length FROM canals WHERE type \u003d \u0027Transport\u0027 AND year \u003e 1960 AND year \u003c 1980;", + "sql_explanation": "This query selects the name, location, and length of transport canals constructed between 1960 and 1980." +}, { + "id": "3409", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the sum of construction costs for pipelines in the West under 3 million?", + "sql_context": "CREATE TABLE Pipeline (pipeline_id INT, region VARCHAR(20), construction_cost DECIMAL(10,2)); INSERT INTO Pipeline (pipeline_id, region, construction_cost) VALUES (1, \u0027West\u0027, 2500000.00), (2, \u0027Northeast\u0027, 1500000.00);", + "sql": "SELECT SUM(construction_cost) FROM Pipeline WHERE region \u003d \u0027West\u0027 AND construction_cost \u003c 3000000;", + "sql_explanation": "The SQL query sums the construction_cost column for rows where the region column is \u0027West\u0027 and the construction_cost column is less than 3 million." +}, { + "id": "3414", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all airports with a runway length greater than 3000 meters", + "sql_context": "CREATE TABLE Airports (airport_id int, airport_name varchar(255), runway_length decimal(10,2), location varchar(255));", + "sql": "SELECT airport_id, airport_name, runway_length, location FROM Airports WHERE runway_length \u003e 3000;", + "sql_explanation": "This query retrieves all airports with a runway length greater than 3000 meters by filtering the runway_length column in the Airports table." +}, { + "id": "3459", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which infrastructure projects in \u0027London\u0027 have a budget greater than $2,000,000?", + "sql_context": "CREATE TABLE InfrastructureD(id INT, city VARCHAR(20), project VARCHAR(30), budget DECIMAL(10,2)); INSERT INTO InfrastructureD(id, city, project, budget) VALUES (1, \u0027London\u0027, \u0027Tunnel Construction\u0027, 2500000.00), (2, \u0027Rome\u0027, \u0027Railway Upgrade\u0027, 1000000.00);", + "sql": "SELECT city, project, budget FROM InfrastructureD WHERE budget \u003e 2000000.00 AND city \u003d \u0027London\u0027;", + "sql_explanation": "This query retrieves infrastructure projects in \u0027London\u0027 with a budget greater than $2,000,000 by selecting the \u0027city\u0027, \u0027project\u0027, and \u0027budget\u0027 columns where the \u0027budget\u0027 column is greater than 2,000,000.00 and the \u0027city\u0027 column is equal to \u0027London\u0027." +}, { + "id": "3518", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which engineering design standards are associated with water treatment facilities in the United States?", + "sql_context": "CREATE TABLE engineering_design_standards (id INT, standard_name VARCHAR(255), standard_type VARCHAR(255)); INSERT INTO engineering_design_standards (id, standard_name, standard_type) VALUES (1, \u0027ANSI/AWWA B100\u0027, \u0027Water Treatment\u0027), (2, \u0027ANSI/AWWA B120\u0027, \u0027Water Distribution\u0027), (3, \u0027ANSI/AWWA B124\u0027, \u0027Water Storage\u0027), (4, \u0027ANSI/AWWA B138\u0027, \u0027Water Transmission\u0027), (5, \u0027ANSI/AWWA B173\u0027, \u0027Water Treatment Residuals Management\u0027);", + "sql": "SELECT standard_name FROM engineering_design_standards WHERE standard_type \u003d \u0027Water Treatment\u0027;", + "sql_explanation": "This SQL query identifies the engineering design standards associated with water treatment facilities in the United States. It selects the standard_name from the engineering_design_standards table where the standard_type is Water Treatment." +}, { + "id": "3560", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the design standard used in the \u0027Dam567\u0027 project", + "sql_context": "CREATE TABLE design_standards (id INT, name VARCHAR(255), standard VARCHAR(255)); INSERT INTO design_standards (id, name, standard) VALUES (567, \u0027Dam567\u0027, \u0027ABC Standard\u0027), (890, \u0027RiverProject\u0027, \u0027XYZ Standard\u0027);", + "sql": "SELECT design_standards.standard FROM design_standards WHERE design_standards.name \u003d \u0027Dam567\u0027;", + "sql_explanation": "This SQL query selects the \u0027standard\u0027 field from the \u0027design_standards\u0027 table where the name is \u0027Dam567\u0027." +}, { + "id": "3584", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of hydroelectric dams in Washington", + "sql_context": "CREATE TABLE Infrastructure (id INT, name VARCHAR(100), type VARCHAR(50), location VARCHAR(100), state VARCHAR(50)); INSERT INTO Infrastructure (id, name, type, location, state) VALUES (12, \u0027Grand Coulee Dam\u0027, \u0027Hydroelectric Dam\u0027, \u0027Grand Coulee\u0027, \u0027Washington\u0027);", + "sql": "SELECT COUNT(*) FROM Infrastructure WHERE type \u003d \u0027Hydroelectric Dam\u0027 AND state \u003d \u0027Washington\u0027;", + "sql_explanation": "This query calculates the number of hydroelectric dams in Washington by filtering the Infrastructure table based on the type and state columns and then determining the count of the resulting records." +}, { + "id": "3647", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average cost of maintenance for bridges in the state of California?", + "sql_context": "CREATE TABLE Bridges (BridgeID int, Name varchar(100), Location varchar(100), Cost decimal(10,2), MaintenanceDate date); INSERT INTO Bridges VALUES (1, \u0027Bridge A\u0027, \u0027California\u0027, 50000, \u00272020-01-01\u0027); INSERT INTO Bridges VALUES (2, \u0027Bridge B\u0027, \u0027California\u0027, 75000, \u00272020-03-05\u0027);", + "sql": "SELECT AVG(Cost) FROM Bridges WHERE Location \u003d \u0027California\u0027 AND MaintenanceDate IS NOT NULL;", + "sql_explanation": "This query calculates the average cost of maintenance for bridges in California by selecting the AVG function on the Cost column, filtering the data by Location and ensuring MaintenanceDate is not null." +}, { + "id": "3825", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum and minimum number of stories for all buildings in the construction division?", + "sql_context": "CREATE TABLE buildings (id INT, name VARCHAR(50), division VARCHAR(50), num_stories INT); INSERT INTO buildings (id, name, division, num_stories) VALUES (1, \u0027Building A\u0027, \u0027Construction\u0027, 5), (2, \u0027Building B\u0027, \u0027Construction\u0027, 10), (3, \u0027Building C\u0027, \u0027Construction\u0027, 7);", + "sql": "SELECT MAX(num_stories), MIN(num_stories) FROM buildings WHERE division \u003d \u0027Construction\u0027;", + "sql_explanation": "This SQL query calculates the maximum and minimum number of stories for all buildings in the construction division by using the aggregate functions MAX and MIN on the number of stories column." +}, { + "id": "3869", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all the bridges built before 2010, sorted by construction date.", + "sql_context": "CREATE TABLE bridges (bridge_id INT, bridge_name VARCHAR(50), location VARCHAR(50), length DECIMAL(10,2), construction_date DATE);", + "sql": "SELECT * FROM bridges WHERE construction_date \u003c \u00272010-01-01\u0027 ORDER BY construction_date;", + "sql_explanation": "This query retrieves a list of all bridges that were built before the year 2010, as determined by the \u0027construction_date\u0027 column. The results are sorted in ascending order by \u0027construction_date\u0027 so that the oldest bridges are listed first." +}, { + "id": "4001", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum clearance height for bridges in Spain?", + "sql_context": "CREATE TABLE Bridge (id INT, name VARCHAR(50), clearance_height FLOAT, country VARCHAR(50)); INSERT INTO Bridge (id, name, clearance_height, country) VALUES (1, \u0027Puente de la ConstituciÃŗn\u0027, 5, \u0027Spain\u0027);", + "sql": "SELECT MIN(clearance_height) FROM Bridge WHERE country \u003d \u0027Spain\u0027 AND type \u003d \u0027Bridge\u0027;", + "sql_explanation": "This query calculates the minimum clearance height for bridges in Spain by filtering the Bridge table with the WHERE clause and then computing the minimum using the MIN() function." +}, { + "id": "4028", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many bridges are made of concrete and steel?", + "sql_context": "CREATE TABLE bridges (id INT PRIMARY KEY, name VARCHAR(255), length FLOAT, material VARCHAR(255), built_year INT); INSERT INTO bridges (id, name, length, material, built_year) VALUES (1, \u0027BridgeX\u0027, 300.5, \u0027steel\u0027, 2000), (2, \u0027BridgeY\u0027, 450.7, \u0027concrete\u0027, 1965), (3, \u0027BridgeZ\u0027, 120.3, \u0027steel\u0027, 1990);", + "sql": "SELECT COUNT(*) as bridge_count FROM bridges WHERE material IN (\u0027concrete\u0027, \u0027steel\u0027);", + "sql_explanation": "The SQL query counts the number of bridges grouped by material, and filters the results to only show groups with \u0027concrete\u0027 and \u0027steel\u0027 materials. It uses the IN clause to apply the filter on the material column." +}, { + "id": "4167", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average construction cost per project in the USA", + "sql_context": "CREATE TABLE infrastructure_projects (id INT, name TEXT, location TEXT, construction_cost FLOAT); INSERT INTO infrastructure_projects (id, name, location, construction_cost) VALUES (1, \u0027Brooklyn Bridge\u0027, \u0027USA\u0027, 15000000); INSERT INTO infrastructure_projects (id, name, location, construction_cost) VALUES (2, \u0027Chunnel\u0027, \u0027UK\u0027, 21000000); INSERT INTO infrastructure_projects (id, name, location, construction_cost) VALUES (3, \u0027Tokyo Tower\u0027, \u0027Japan\u0027, 33000000);", + "sql": "SELECT AVG(construction_cost) FROM infrastructure_projects WHERE location \u003d \u0027USA\u0027;", + "sql_explanation": "This SQL query calculates the average construction cost for projects located in the USA." +}, { + "id": "4203", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of Green_Infrastructure projects in \u0027City O\u0027 and \u0027City P\u0027?", + "sql_context": "CREATE TABLE Green_Infrastructure (id INT, project_name VARCHAR(50), location VARCHAR(50), cost FLOAT); INSERT INTO Green_Infrastructure (id, project_name, location, cost) VALUES (1, \u0027Urban Farming\u0027, \u0027City O\u0027, 2000000); INSERT INTO Green_Infrastructure (id, project_name, location, cost) VALUES (2, \u0027Community Gardens\u0027, \u0027City P\u0027, 3000000);", + "sql": "SELECT COUNT(*) FROM Green_Infrastructure WHERE location IN (\u0027City O\u0027, \u0027City P\u0027);", + "sql_explanation": "This SQL query counts the total number of Green_Infrastructure projects in \u0027City O\u0027 and \u0027City P\u0027 by selecting all rows with the \u0027*\u0027 symbol and counting the records where the \u0027location\u0027 is either \u0027City O\u0027 or \u0027City P\u0027." +}, { + "id": "4279", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "Which projects in \u0027Texas\u0027 have a cost greater than $10 million?", + "sql_context": "CREATE TABLE Infrastructure_Projects (id INT, name VARCHAR(100), state VARCHAR(50), cost FLOAT); INSERT INTO Infrastructure_Projects (id, name, state, cost) VALUES (1, \u0027Floodgate Construction\u0027, \u0027Texas\u0027, 12000000);", + "sql": "SELECT * FROM Infrastructure_Projects WHERE state \u003d \u0027Texas\u0027 AND cost \u003e 10000000;", + "sql_explanation": "This query selects all projects in \u0027Texas\u0027 with a cost greater than $10 million by filtering the \u0027Infrastructure_Projects\u0027 table where the \u0027state\u0027 is \u0027Texas\u0027 and the \u0027cost\u0027 is greater than 10000000." +}, { + "id": "4282", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get the number of bridges in Spain", + "sql_context": "CREATE TABLE Infrastructure (id INT, name VARCHAR(100), type VARCHAR(50), country VARCHAR(50)); INSERT INTO Infrastructure (id, name, type, country) VALUES (19, \u0027Puente de Rande\u0027, \u0027Bridge\u0027, \u0027Spain\u0027), (20, \u0027Viaducto de Montabliz\u0027, \u0027Bridge\u0027, \u0027Spain\u0027);", + "sql": "SELECT COUNT(*) FROM Infrastructure WHERE type \u003d \u0027Bridge\u0027 AND country \u003d \u0027Spain\u0027;", + "sql_explanation": "This query counts all the records in the Infrastructure table where the type is Bridge and the country is Spain." +}, { + "id": "4318", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the airports in Florida with over 10,000 passengers per day.", + "sql_context": "CREATE TABLE Airports (Name VARCHAR(255), Daily_passengers INT, State VARCHAR(255)); INSERT INTO Airports (Name, Daily_passengers, State) VALUES (\u0027Orlando International Airport\u0027, 12000, \u0027Florida\u0027);", + "sql": "SELECT Name FROM Airports WHERE Daily_passengers \u003e 10000 AND State \u003d \u0027Florida\u0027;", + "sql_explanation": "The SQL query selects the names of airports from the Airports table where the daily passengers is over 10,000 and the state is Florida." +}, { + "id": "4348", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average budget for resilience projects in the \u0027Water\u0027 sector?", + "sql_context": "CREATE TABLE ResilienceProjects (ProjectID int, Sector varchar(10), Budget int); INSERT INTO ResilienceProjects (ProjectID, Sector, Budget) VALUES (1, \u0027Water\u0027, 500000), (2, \u0027Transport\u0027, 800000), (3, \u0027Energy\u0027, 600000);", + "sql": "SELECT AVG(Budget) AS AvgBudget FROM ResilienceProjects WHERE Sector \u003d \u0027Water\u0027;", + "sql_explanation": "This query filters the rows in the \u0027ResilienceProjects\u0027 table by the \u0027Sector\u0027 column, then calculates the average value of the \u0027Budget\u0027 column for the filtered rows." +}, { + "id": "4402", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the number of bridges and total bridge length for each state, in descending order by the total bridge length.", + "sql_context": "CREATE TABLE states (id INT, name VARCHAR(20), num_bridges INT, total_length FLOAT); INSERT INTO states (id, name, num_bridges, total_length) VALUES (1, \u0027California\u0027, 25000, 500000), (2, \u0027Texas\u0027, 50000, 1000000), (3, \u0027New York\u0027, 30000, 750000);", + "sql": "SELECT name, num_bridges, total_length FROM states ORDER BY total_length DESC;", + "sql_explanation": "Select the state name, number of bridges, and total bridge length, ordered by the total bridge length in descending order." +}, { + "id": "4403", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the tunnels in the United Kingdom and their total length.", + "sql_context": "CREATE TABLE tunnels_uk (country VARCHAR(50), name VARCHAR(50), length INT); INSERT INTO tunnels_uk (country, name, length) VALUES (\u0027United Kingdom\u0027, \u0027Severn Tunnel\u0027, 7011), (\u0027United Kingdom\u0027, \u0027Channel Tunnel\u0027, 50498);", + "sql": "SELECT COUNT(*), SUM(length) FROM tunnels_uk WHERE country \u003d \u0027United Kingdom\u0027;", + "sql_explanation": "The query counts the records and sums the \u0027length\u0027 column in the \u0027tunnels_uk\u0027 table where \u0027country\u0027 is \u0027United Kingdom\u0027." +}, { + "id": "4636", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum age requirement for all engineers in the city of Sydney who are authorized to sign off on civil engineering designs?", + "sql_context": "CREATE TABLE engineer (id INT, name TEXT, city TEXT, age INT, authorization BOOLEAN); INSERT INTO engineer (id, name, city, age, authorization) VALUES (1, \u0027Engineer A\u0027, \u0027Sydney\u0027, 30, 1); INSERT INTO engineer (id, name, city, age, authorization) VALUES (2, \u0027Engineer B\u0027, \u0027Sydney\u0027, 28, 0);", + "sql": "SELECT MIN(age) FROM engineer WHERE city \u003d \u0027Sydney\u0027 AND authorization \u003d 1;", + "sql_explanation": "This query calculates the minimum age requirement for all engineers in the city of Sydney who are authorized to sign off on civil engineering designs by selecting the MIN function on the \u0027age\u0027 column, filtering the data by the \u0027city\u0027 and \u0027authorization\u0027 columns." +}, { + "id": "4725", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "Which projects in \u0027bridge_data\u0027 have a \u0027construction_year\u0027 between 2010 and 2020?", + "sql_context": "CREATE TABLE bridge_data (id INT, bridge_name VARCHAR(50), construction_year INT); INSERT INTO bridge_data (id, bridge_name, construction_year) VALUES (1, \u0027Golden Gate Bridge\u0027, 1937), (2, \u0027Sydney Harbour Bridge\u0027, 1932); INSERT INTO bridge_data (id, bridge_name, construction_year) VALUES (3, \u0027New Bridge\u0027, 2015);", + "sql": "SELECT * FROM bridge_data WHERE construction_year BETWEEN 2010 AND 2020;", + "sql_explanation": "This query selects all records from \u0027bridge_data\u0027 with \u0027construction_year\u0027 values between 2010 and 2020." +}, { + "id": "4728", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total budget for all resilience projects in \u0027city_projects\u0027 table?", + "sql_context": "CREATE TABLE city_projects (project_id INT, project_name VARCHAR(50), location VARCHAR(50), budget DECIMAL(10,2), project_type VARCHAR(50));", + "sql": "SELECT SUM(budget) FROM city_projects WHERE project_type \u003d \u0027Resilience\u0027;", + "sql_explanation": "This query calculates the total budget for all projects with a project_type of \u0027Resilience\u0027 in the \u0027city_projects\u0027 table. It does this by using the SUM() function to add up all the values in the \u0027budget\u0027 column where \u0027project_type\u0027 is \u0027Resilience\u0027." +}, { + "id": "4783", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the variance of construction costs for highways in the South?", + "sql_context": "CREATE TABLE Highway (highway_id INT, region VARCHAR(20), construction_cost DECIMAL(10,2)); INSERT INTO Highway (highway_id, region, construction_cost) VALUES (1, \u0027South\u0027, 12000000.00), (2, \u0027Northeast\u0027, 10000000.00);", + "sql": "SELECT VARIANCE(construction_cost) FROM Highway WHERE region \u003d \u0027South\u0027;", + "sql_explanation": "The SQL query calculates the variance of the construction_cost column for rows where the region column is \u0027South\u0027." +}, { + "id": "4805", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the total number of bridges in \u0027Northeast\u0027 and \u0027Midwest\u0027 regions.", + "sql_context": "CREATE TABLE Bridges (id INT, name TEXT, region TEXT, length FLOAT); INSERT INTO Bridges (id, name, region, length) VALUES (1, \u0027BridgeX\u0027, \u0027Northeast\u0027, 2500.00), (2, \u0027BridgeY\u0027, \u0027Midwest\u0027, 3100.50), (3, \u0027BridgeZ\u0027, \u0027Midwest\u0027, 1800.25);", + "sql": "SELECT COUNT(*) FROM Bridges WHERE region IN (\u0027Northeast\u0027, \u0027Midwest\u0027);", + "sql_explanation": "This query identifies the total number of bridges in the \u0027Northeast\u0027 and \u0027Midwest\u0027 regions. It does this by using the COUNT(*) function to count the number of rows where the \u0027region\u0027 column is either equal to \u0027Northeast\u0027 or \u0027Midwest\u0027." +}, { + "id": "4809", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum cost of utility projects in the infrastructure database?", + "sql_context": "CREATE TABLE Utility_Projects (Project_ID INT, Project_Name VARCHAR(50), Project_Type VARCHAR(50), Cost FLOAT); INSERT INTO Utility_Projects (Project_ID, Project_Name, Project_Type, Cost) VALUES (1, \u0027Water_Main_Replacement\u0027, \u0027Utility\u0027, 4000000.00), (2, \u0027Sewer_System_Upgrade\u0027, \u0027Utility\u0027, 6000000.00), (3, \u0027Gas_Line_Extension\u0027, \u0027Utility\u0027, 2000000.00);", + "sql": "SELECT MAX(Cost) FROM Utility_Projects WHERE Project_Type \u003d \u0027Utility\u0027;", + "sql_explanation": "This query finds the maximum cost of utility projects by selecting the maximum \u0027Cost\u0027 where \u0027Project_Type\u0027 is \u0027Utility\u0027." +}, { + "id": "4843", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which projects were started before 1990 and completed after 2010?", + "sql_context": "CREATE TABLE Projects (name TEXT, start_year INT, end_year INT, location TEXT);", + "sql": "SELECT name FROM Projects WHERE start_year \u003c 1990 AND end_year \u003e 2010;", + "sql_explanation": "This query selects projects started before 1990 and completed after 2010 using comparison operators with the start_year and end_year columns." +}, { + "id": "4863", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum construction cost for a road in the \u0027Northeast\u0027 region?", + "sql_context": "CREATE TABLE Roads (RoadID int, Name varchar(100), Location varchar(100), ConstructionCost decimal(10,2)); INSERT INTO Roads VALUES (1, \u0027Road A\u0027, \u0027Northeast\u0027, 2000000); INSERT INTO Roads VALUES (2, \u0027Road B\u0027, \u0027Northeast\u0027, 1500000);", + "sql": "SELECT MAX(ConstructionCost) FROM Roads WHERE Location \u003d \u0027Northeast\u0027;", + "sql_explanation": "This query calculates the maximum construction cost for a road in the \u0027Northeast\u0027 region by selecting the MAX function on the ConstructionCost column and filtering the data by Location." +}, { + "id": "4926", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all dams located in the province of Quebec that have exceeded their maximum design capacity at any point in time.", + "sql_context": "CREATE TABLE dam (id INT, name TEXT, province TEXT, design_capacity FLOAT, max_exceeded INT); INSERT INTO dam (id, name, province, design_capacity, max_exceeded) VALUES (1, \u0027Dam A\u0027, \u0027Quebec\u0027, 5000000, 1); INSERT INTO dam (id, name, province, design_capacity, max_exceeded) VALUES (2, \u0027Dam B\u0027, \u0027Quebec\u0027, 6000000, 0);", + "sql": "SELECT name FROM dam WHERE province \u003d \u0027Quebec\u0027 AND max_exceeded \u003d 1;", + "sql_explanation": "This query lists all dams located in the province of Quebec that have exceeded their maximum design capacity at any point in time by selecting the \u0027name\u0027 column, filtering the data by the \u0027province\u0027 and \u0027max_exceeded\u0027 columns." +}, { + "id": "5006", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total cost of projects in the \u0027Water Supply\u0027 category?", + "sql_context": "CREATE TABLE project_info (id INT, name VARCHAR(50), category VARCHAR(50), cost INT); INSERT INTO project_info (id, name, category, cost) VALUES (1, \u0027Test1\u0027, \u0027Water Supply\u0027, 3000000);", + "sql": "SELECT SUM(cost) FROM project_info WHERE category \u003d \u0027Water Supply\u0027;", + "sql_explanation": "This SQL query calculates the total cost of projects in the \u0027Water Supply\u0027 category by summing up the \u0027cost\u0027 values in the \u0027project_info\u0027 table." +}, { + "id": "5130", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average cost of projects in the \u0027transportation\u0027 table in \u0027Chicago\u0027?", + "sql_context": "CREATE TABLE transportation (id INT, project_name VARCHAR(50), location VARCHAR(50), cost FLOAT); INSERT INTO transportation (id, project_name, location, cost) VALUES (1, \u0027Bridge\u0027, \u0027Los Angeles\u0027, 3000000); INSERT INTO transportation (id, project_name, location, cost) VALUES (2, \u0027Highway\u0027, \u0027Chicago\u0027, 12000000);", + "sql": "SELECT AVG(cost) FROM transportation WHERE location \u003d \u0027Chicago\u0027;", + "sql_explanation": "This SQL query calculates the average cost of projects in the \u0027transportation\u0027 table in \u0027Chicago\u0027 by finding the mean of the \u0027cost\u0027 column where the \u0027location\u0027 equals \u0027Chicago\u0027." +}, { + "id": "5147", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which public works projects were completed in \u0027City C\u0027?", + "sql_context": "CREATE TABLE Public_Works (id INT, project_name VARCHAR(50), location VARCHAR(50), completion_date DATE); INSERT INTO Public_Works (id, project_name, location, completion_date) VALUES (1, \u0027Road Construction\u0027, \u0027City C\u0027, \u00272021-06-30\u0027); INSERT INTO Public_Works (id, project_name, location, completion_date) VALUES (2, \u0027Bridge Replacement\u0027, \u0027City D\u0027, \u00272022-04-15\u0027);", + "sql": "SELECT project_name FROM Public_Works WHERE location \u003d \u0027City C\u0027;", + "sql_explanation": "This SQL query retrieves the project names from the Public_Works table that were completed in \u0027City C\u0027 by filtering the \u0027location\u0027 column values." +}, { + "id": "5211", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum total cost of projects in the water division?", + "sql_context": "CREATE TABLE Projects (id INT, division VARCHAR(20), total_cost FLOAT); INSERT INTO Projects (id, division, total_cost) VALUES (1, \u0027water\u0027, 500000), (2, \u0027transportation\u0027, 300000), (3, \u0027water\u0027, 750000);", + "sql": "SELECT MAX(total_cost) FROM Projects WHERE division \u003d \u0027water\u0027;", + "sql_explanation": "This query calculates the maximum total cost of projects in the water division. It does this by using the MAX function on the total_cost column, where the division is \u0027water\u0027." +}, { + "id": "5272", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the incident_date of the record with id 2 in the \u0027damages\u0027 table to \u00272018-05-22\u0027", + "sql_context": "CREATE TABLE damages (id INT, incident_date DATE, reported_cost INT);", + "sql": "UPDATE damages SET incident_date \u003d \u00272018-05-22\u0027 WHERE id \u003d 2;", + "sql_explanation": "*Filter the records in the \u0027damages\u0027 table by id; then, update the incident_date of the matching record.*" +}, { + "id": "5360", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total length of all the rail tracks in \u0027Asia\u0027?", + "sql_context": "CREATE TABLE RailTracks (TrackID int, Location varchar(100), Length decimal(10,2)); INSERT INTO RailTracks VALUES (1, \u0027Asia\u0027, 500); INSERT INTO RailTracks VALUES (2, \u0027Asia\u0027, 700);", + "sql": "SELECT SUM(Length) FROM RailTracks WHERE Location \u003d \u0027Asia\u0027;", + "sql_explanation": "This query calculates the total length of all the rail tracks in \u0027Asia\u0027 by selecting the SUM function on the Length column and filtering the data by Location." +}, { + "id": "5448", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of solar_farm projects in \u0027Texas\u0027.", + "sql_context": "CREATE TABLE solar_farm (id INT, name VARCHAR(255), location VARCHAR(255)); INSERT INTO solar_farm (id, name, location) VALUES (1, \u0027Sample Solar Farm\u0027, \u0027Texas\u0027);", + "sql": "SELECT COUNT(*) FROM solar_farm WHERE location \u003d \u0027Texas\u0027;", + "sql_explanation": "* Counts all rows * Filters rows where the \u0027location\u0027 is \u0027Texas\u0027" +}, { + "id": "5468", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum height of buildings in Sydney, Australia?", + "sql_context": "CREATE TABLE Buildings (BuildingID INT, Name TEXT, Height INT, City TEXT, Country TEXT); INSERT INTO Buildings (BuildingID, Name, Height, City, Country) VALUES (1, \u0027BuildingA\u0027, 200, \u0027Sydney\u0027, \u0027Australia\u0027); INSERT INTO Buildings (BuildingID, Name, Height, City, Country) VALUES (2, \u0027BuildingB\u0027, 250, \u0027Sydney\u0027, \u0027Australia\u0027); INSERT INTO Buildings (BuildingID, Name, Height, City, Country) VALUES (3, \u0027BuildingC\u0027, 300, \u0027Melbourne\u0027, \u0027Australia\u0027);", + "sql": "SELECT MIN(Height) FROM Buildings WHERE City \u003d \u0027Sydney\u0027;", + "sql_explanation": "This SQL query calculates the minimum height of buildings in Sydney, Australia by using the MIN function on the \u0027Height\u0027 column and filtering the data with the WHERE clause to only consider buildings in Sydney." +}, { + "id": "5592", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names of all projects in \u0027City B\u0027?", + "sql_context": "CREATE TABLE project (id INT, name VARCHAR(50), location VARCHAR(50), start_date DATE, end_date DATE); INSERT INTO project (id, name, location, start_date, end_date) VALUES (2, \u0027Road Expansion\u0027, \u0027City B\u0027, \u00272019-01-01\u0027, \u00272019-12-31\u0027);", + "sql": "SELECT name FROM project WHERE location \u003d \u0027City B\u0027;", + "sql_explanation": "This query retrieves the names of projects located in City B." +}, { + "id": "5765", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names of the projects in the \u0027Energy_Storage\u0027 table?", + "sql_context": "CREATE TABLE Energy_Storage (project_id INT, project_name VARCHAR(50), location VARCHAR(50)); INSERT INTO Energy_Storage (project_id, project_name, location) VALUES (1, \u0027Battery Storage Installation\u0027, \u0027Hawaii\u0027); INSERT INTO Energy_Storage (project_id, project_name, location) VALUES (2, \u0027Pumped Hydro Storage\u0027, \u0027Washington\u0027);", + "sql": "SELECT project_name FROM Energy_Storage;", + "sql_explanation": "This SQL query retrieves the names of the projects in the \u0027Energy_Storage\u0027 table by selecting the \u0027project_name\u0027 column." +}, { + "id": "5789", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average height of hydro_plants?", + "sql_context": "CREATE TABLE hydro_plants (id INT, name VARCHAR(255), height INT); INSERT INTO hydro_plants (id, name, height) VALUES (1, \u0027Sample Hydro Plant\u0027, 200);", + "sql": "SELECT AVG(height) FROM hydro_plants;", + "sql_explanation": "* Selects the average of the \u0027height\u0027 column" +}, { + "id": "5829", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average length of a tunnel in the \u0027tunnels\u0027 table?", + "sql_context": "CREATE TABLE tunnels (tunnel_id INT, tunnel_name VARCHAR(50), location VARCHAR(50), length DECIMAL(10,2));", + "sql": "SELECT AVG(length) FROM tunnels;", + "sql_explanation": "This query calculates the average length of a tunnel in the \u0027tunnels\u0027 table. It does this by using the AVG() function to find the mean value in the \u0027length\u0027 column." +}, { + "id": "5842", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum length of a dam in the \u0027dams\u0027 table?", + "sql_context": "CREATE TABLE dams (dam_id INT, dam_name VARCHAR(50), location VARCHAR(50), length DECIMAL(10,2), reservoir_capacity INT);", + "sql": "SELECT MAX(length) FROM dams;", + "sql_explanation": "This query calculates the maximum length of a dam in the \u0027dams\u0027 table. It does this by using the MAX() function to find the largest value in the \u0027length\u0027 column." +}, { + "id": "5843", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average cost of projects in the \u0027energy\u0027 table?", + "sql_context": "CREATE TABLE energy (id INT, project_name VARCHAR(50), location VARCHAR(50), cost FLOAT); INSERT INTO energy (id, project_name, location, cost) VALUES (1, \u0027Wind Farm\u0027, \u0027Region C\u0027, 15000000.00), (2, \u0027Solar Power Plant\u0027, \u0027City D\u0027, 20000000.00), (3, \u0027Geothermal Plant\u0027, \u0027Area E\u0027, 18000000.00);", + "sql": "SELECT AVG(cost) FROM energy;", + "sql_explanation": "This query calculates the average cost of projects in the \u0027energy\u0027 table by averaging the \u0027cost\u0027 column values." +}, { + "id": "5847", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Delete the view recent_standards", + "sql_context": "CREATE VIEW recent_standards AS SELECT * FROM design_standards WHERE design_standards.id \u003e 3;", + "sql": "DROP VIEW recent_standards;", + "sql_explanation": "This query deletes the view \u0027recent_standards\u0027." +}, { + "id": "1032", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total cost of agricultural innovation projects in the province of Saskatchewan in 2020?", + "sql_context": "CREATE TABLE agricultural_projects (id INT, province VARCHAR(50), cost FLOAT, project_type VARCHAR(50), start_date DATE, end_date DATE); INSERT INTO agricultural_projects (id, province, cost, project_type, start_date, end_date) VALUES (1, \u0027Saskatchewan\u0027, 60000.00, \u0027Precision Agriculture\u0027, \u00272020-01-01\u0027, \u00272020-12-31\u0027);", + "sql": "SELECT SUM(cost) FROM agricultural_projects WHERE province \u003d \u0027Saskatchewan\u0027 AND start_date \u003c\u003d \u00272020-12-31\u0027 AND end_date \u003e\u003d \u00272020-01-01\u0027 AND project_type \u003d \u0027Precision Agriculture\u0027;", + "sql_explanation": "This query calculates the total cost of Precision Agriculture projects in the province of Saskatchewan in 2020 by summing the \u0027cost\u0027 column where the \u0027province\u0027 is \u0027Saskatchewan\u0027, the \u0027start_date\u0027 is on or before \u00272020-12-31\u0027 and the \u0027end_date\u0027 is on or after \u00272020-01-01\u0027." +}, { + "id": "1081", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total value of \u0027rural infrastructure grants\u0027 for \u0027Indigenous communities\u0027 in \u0027Canada\u0027 since 2010?", + "sql_context": "CREATE TABLE grants (id INT, name TEXT, region TEXT, type TEXT, value FLOAT, date DATE); INSERT INTO grants (id, name, region, type, value, date) VALUES (1, \u0027Grant 1\u0027, \u0027Canada\u0027, \u0027rural infrastructure\u0027, 500000, \u00272011-01-01\u0027), (2, \u0027Grant 2\u0027, \u0027Canada\u0027, \u0027urban development\u0027, 750000, \u00272012-01-01\u0027), (3, \u0027Grant 3\u0027, \u0027Canada\u0027, \u0027rural infrastructure\u0027, 250000, \u00272010-01-01\u0027);", + "sql": "SELECT SUM(grants.value) FROM grants WHERE grants.region \u003d \u0027Canada\u0027 AND grants.type \u003d \u0027rural infrastructure\u0027 AND grants.date \u003e\u003d \u00272010-01-01\u0027 AND grants.name LIKE \u0027%Indigenous%\u0027;", + "sql_explanation": "This query calculates the total value of \u0027rural infrastructure grants\u0027 for \u0027Indigenous communities\u0027 in \u0027Canada\u0027 since 2010. It does this by selecting all rows from the \u0027grants\u0027 table where the region is \u0027Canada\u0027, the type is \u0027rural infrastructure\u0027, the date is on or after \u00272010-01-01\u0027, and the name contains \u0027Indigenous\u0027, and then calculating the sum of the \u0027value\u0027 column for those rows." +}, { + "id": "1176", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total cost of all agricultural innovation projects in the province of Mpumalanga in 2020?", + "sql_context": "CREATE TABLE agricultural_projects (id INT, province VARCHAR(50), cost FLOAT, project_type VARCHAR(50), start_date DATE); INSERT INTO agricultural_projects (id, province, cost, project_type, start_date) VALUES (1, \u0027Mpumalanga\u0027, 50000.00, \u0027Drip Irrigation\u0027, \u00272020-01-01\u0027);", + "sql": "SELECT SUM(cost) FROM agricultural_projects WHERE province \u003d \u0027Mpumalanga\u0027 AND start_date \u003e\u003d \u00272020-01-01\u0027 AND start_date \u003c \u00272021-01-01\u0027 AND project_type \u003d \u0027Drip Irrigation\u0027;", + "sql_explanation": "This query calculates the total cost of Drip Irrigation projects in the province of Mpumalanga in 2020 by summing the \u0027cost\u0027 column where the \u0027province\u0027 is \u0027Mpumalanga\u0027, the \u0027start_date\u0027 is in 2020, and the \u0027project_type\u0027 is \u0027Drip Irrigation\u0027." +}, { + "id": "1211", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new rural infrastructure project with the given details and assign it a unique id.", + "sql_context": "CREATE TABLE rural_infrastructure_projects (id INT, project_name VARCHAR(255), location VARCHAR(255), sector VARCHAR(255), cost FLOAT);", + "sql": "INSERT INTO rural_infrastructure_projects (project_name, location, sector, cost) VALUES (\u0027Waste Management System\u0027, \u0027Village E\u0027, \u0027Infrastructure\u0027, 60000.00) RETURNING id;", + "sql_explanation": "This query inserts a new record into the \u0027rural_infrastructure_projects\u0027 table using the VALUES clause, and returns the assigned id with the RETURNING keyword and id column." +}, { + "id": "1389", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total investment in agricultural innovation projects in Tanzania between 2015 and 2017, and how many were implemented?", + "sql_context": "CREATE TABLE agri_innovation_tanzania (project VARCHAR(50), country VARCHAR(50), start_year INT, end_year INT, investment FLOAT); INSERT INTO agri_innovation_tanzania (project, country, start_year, end_year, investment) VALUES (\u0027Conservation Agriculture\u0027, \u0027Tanzania\u0027, 2015, 2017, 1000000), (\u0027Crop Diversification\u0027, \u0027Tanzania\u0027, 2015, 2017, 1500000);", + "sql": "SELECT SUM(investment), COUNT(*) FROM agri_innovation_tanzania WHERE country \u003d \u0027Tanzania\u0027 AND start_year BETWEEN 2015 AND 2017 AND end_year BETWEEN 2015 AND 2017;", + "sql_explanation": "The SQL query calculates the total investment in agricultural innovation projects in Tanzania between 2015 and 2017 and counts the number of projects implemented during the same period using the SUM() and COUNT() functions." +}, { + "id": "1573", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total budget for agricultural innovation projects in Southeast Asia implemented after 2017?", + "sql_context": "CREATE TABLE agri_innovation (id INT, name TEXT, location TEXT, implementation_year INT, budget INT); INSERT INTO agri_innovation (id, name, location, implementation_year, budget) VALUES (1, \u0027Precision Agriculture\u0027, \u0027Southeast Asia\u0027, 2018, 500000), (2, \u0027Smart Farming\u0027, \u0027Southeast Asia\u0027, 2019, 700000);", + "sql": "SELECT SUM(agri_innovation.budget) FROM agri_innovation WHERE agri_innovation.location \u003d \u0027Southeast Asia\u0027 AND agri_innovation.implementation_year \u003e 2017;", + "sql_explanation": "This query calculates the total budget for agricultural innovation projects in Southeast Asia implemented after 2017 by summing the \u0027budget\u0027 column and filtering the results based on the implementation_year." +}, { + "id": "1722", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total investment in agricultural innovation in Kenya in 2021?\u0027", + "sql_context": "CREATE TABLE AgriculturalInvestments (id INT, country VARCHAR(50), investment_name VARCHAR(100), investment_type VARCHAR(50), amount FLOAT, start_date DATE, end_date DATE); INSERT INTO AgriculturalInvestments (id, country, investment_name, investment_type, amount, start_date, end_date) VALUES (1, \u0027Kenya\u0027, \u0027Precision Farming\u0027, \u0027Agricultural Innovation\u0027, 100000.0, \u00272021-01-01\u0027, \u00272021-12-31\u0027);", + "sql": "SELECT SUM(amount) FROM AgriculturalInvestments WHERE country \u003d \u0027Kenya\u0027 AND investment_type \u003d \u0027Agricultural Innovation\u0027 AND YEAR(start_date) \u003d 2021;", + "sql_explanation": "The SQL query calculates the total investment in agricultural innovation in Kenya in 2021 by summing the \u0027amount\u0027 column for records where the \u0027country\u0027 is \u0027Kenya\u0027, the \u0027investment_type\u0027 is \u0027Agricultural Innovation\u0027, and the \u0027start_date\u0027 is in 2021." +}, { + "id": "1776", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the focus area for the \u0027Women in Sustainable Agriculture\u0027 initiative in Kenya to \u0027Climate Change Mitigation\u0027.", + "sql_context": "CREATE TABLE initiatives (id INT, initiative_name VARCHAR(100), location VARCHAR(50), budget DECIMAL(10,2), focus_area VARCHAR(50)); INSERT INTO initiatives (id, initiative_name, location, budget, focus_area) VALUES (1, \u0027Women in Sustainable Agriculture\u0027, \u0027Kenya\u0027, 75000.00, \u0027Women Empowerment\u0027);", + "sql": "UPDATE initiatives SET focus_area \u003d \u0027Climate Change Mitigation\u0027 WHERE initiative_name \u003d \u0027Women in Sustainable Agriculture\u0027 AND location \u003d \u0027Kenya\u0027;", + "sql_explanation": "Update the focus area for the \u0027Women in Sustainable Agriculture\u0027 initiative in Kenya to Climate Change Mitigation." +}, { + "id": "1798", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a table named \u0027rural_infrastructure\u0027", + "sql_context": "CREATE TABLE rural_infrastructure (id INT PRIMARY KEY, name VARCHAR(100), location VARCHAR(50), type VARCHAR(50), start_date DATE, end_date DATE);", + "sql": "CREATE TABLE rural_infrastructure (id INT PRIMARY KEY, name VARCHAR(100), location VARCHAR(50), type VARCHAR(50), start_date DATE, end_date DATE);", + "sql_explanation": "1. The \u0027CREATE TABLE\u0027 command is used to create a new table in the database. 2. \u0027rural_infrastructure\u0027 is the name of the new table. 3. \u0027id\u0027 is an integer type column and is the primary key. 4. \u0027name\u0027, \u0027location\u0027, \u0027type\u0027, \u0027start_date\u0027, and \u0027end_date\u0027 are additional columns with specified data types." +}, { + "id": "1990", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new records into the \u0027agricultural_innovations\u0027 table for a new crop variety in India", + "sql_context": "CREATE TABLE agricultural_innovations (id INT, innovation_name VARCHAR(255), country VARCHAR(255), sector VARCHAR(255));", + "sql": "INSERT INTO agricultural_innovations (id, innovation_name, country, sector) VALUES (1, \u0027Drought-resistant Maize\u0027, \u0027India\u0027, \u0027Agriculture\u0027);", + "sql_explanation": "The INSERT statement is used to add a new record into the \u0027agricultural_innovations\u0027 table, specifying the column values for the new crop variety in India." +}, { + "id": "1999", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all community development initiatives in the Appalachian region that were funded by the federal government and non-profit organizations.", + "sql_context": "CREATE TABLE community_development (id INT, name TEXT, location TEXT, funder TEXT); INSERT INTO community_development (id, name, location, funder) VALUES (1, \u0027Housing Renovation\u0027, \u0027Appalachian region\u0027, \u0027Federal Government\u0027), (2, \u0027Education Center\u0027, \u0027Appalachian region\u0027, \u0027Non-profit Organization\u0027);", + "sql": "SELECT * FROM community_development WHERE location \u003d \u0027Appalachian region\u0027 AND funder IN (\u0027Federal Government\u0027, \u0027Non-profit Organization\u0027);", + "sql_explanation": "This query selects all columns from the \u0027community_development\u0027 table where the location is the Appalachian region and the funder is either the Federal Government or a Non-profit Organization." +}, { + "id": "2018", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the average investment in rural infrastructure projects in Africa from 2018-2020?", + "sql_context": "CREATE TABLE infrastructure_projects (project_id INT, project_type VARCHAR(255), investment INT, country VARCHAR(255), year INT); INSERT INTO infrastructure_projects (project_id, project_type, investment, country, year) VALUES (1, \u0027Irrigation System\u0027, 50000, \u0027Kenya\u0027, 2018), (2, \u0027Rural Road\u0027, 75000, \u0027Nigeria\u0027, 2019), (3, \u0027Electricity Grid\u0027, 100000, \u0027South Africa\u0027, 2020);", + "sql": "SELECT AVG(investment) FROM infrastructure_projects WHERE country IN (\u0027Kenya\u0027, \u0027Nigeria\u0027, \u0027South Africa\u0027) AND year BETWEEN 2018 AND 2020;", + "sql_explanation": "Calculate the average investment in rural infrastructure projects in Africa from 2018-2020 by filtering the infrastructure_projects table by country and year, and then applying the AVG function to the investment column." +}, { + "id": "2370", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new community development initiative in \u0027Amazonas\u0027 region with ID 3, name \u0027Cultural Center\u0027, and status \u0027planning\u0027 into the \u0027community_development\u0027 table.", + "sql_context": "CREATE TABLE community_development(id INT, region TEXT, initiative_name TEXT, status TEXT);", + "sql": "INSERT INTO community_development (id, region, initiative_name, status) VALUES (3, \u0027Amazonas\u0027, \u0027Cultural Center\u0027, \u0027planning\u0027);", + "sql_explanation": "We insert a new record into the \u0027community_development\u0027 table with an ID of 3, a region of \u0027Amazonas\u0027, an initiative name of \u0027Cultural Center\u0027, and a status of \u0027planning\u0027. The result is the new record in the \u0027community_development\u0027 table." +}, { + "id": "2372", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total cost of all infrastructure projects in the rural development sector in South Africa in 2020?\u0027", + "sql_context": "CREATE TABLE infrastructure_projects (id INT, country VARCHAR(255), year INT, sector VARCHAR(255), cost FLOAT); INSERT INTO infrastructure_projects (id, country, year, sector, cost) VALUES (1, \u0027South Africa\u0027, 2020, \u0027Rural Development\u0027, 500000.00);", + "sql": "SELECT SUM(cost) FROM infrastructure_projects WHERE country \u003d \u0027South Africa\u0027 AND year \u003d 2020 AND sector \u003d \u0027Rural Development\u0027;", + "sql_explanation": "This SQL query calculates the total cost of all infrastructure projects in the rural development sector in South Africa in 2020 by summing up the \u0027cost\u0027 column where the \u0027country\u0027 is \u0027South Africa\u0027, the \u0027year\u0027 is 2020, and the \u0027sector\u0027 is \u0027Rural Development\u0027." +}, { + "id": "2437", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many agricultural innovation projects were completed in rural areas of India in the last 5 years?", + "sql_context": "CREATE TABLE india_projects (project_id INT, project_name VARCHAR(50), location VARCHAR(20), completion_date DATE); INSERT INTO india_projects (project_id, project_name, location, completion_date) VALUES (1, \u0027Solar Pump Irrigation\u0027, \u0027rural\u0027, \u00272020-01-01\u0027), (2, \u0027Organic Farming Training\u0027, \u0027urban\u0027, \u00272019-12-31\u0027), (3, \u0027Agroforestry Development\u0027, \u0027rural\u0027, \u00272018-01-01\u0027);", + "sql": "SELECT COUNT(*) FROM india_projects WHERE location \u003d \u0027rural\u0027 AND completion_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 5 YEAR);", + "sql_explanation": "This query counts the number of agricultural innovation projects completed in rural areas of India in the last 5 years. It filters the records where location is \u0027rural\u0027 and completion_date is within the last 5 years, and then counts the number of records using the COUNT function." +}, { + "id": "2490", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total budget for \u0027infrastructure projects\u0027 in \u0027South America\u0027?", + "sql_context": "CREATE TABLE projects (id INT, name TEXT, region TEXT, budget FLOAT); INSERT INTO projects (id, name, region, budget) VALUES (1, \u0027Project 1\u0027, \u0027South America\u0027, 500000), (2, \u0027Project 2\u0027, \u0027North America\u0027, 750000), (3, \u0027Project 3\u0027, \u0027South America\u0027, 1000000);", + "sql": "SELECT SUM(projects.budget) FROM projects WHERE projects.region \u003d \u0027South America\u0027 AND projects.name LIKE \u0027infrastructure%\u0027;", + "sql_explanation": "This query calculates the total budget for \u0027infrastructure projects\u0027 in \u0027South America\u0027. It does this by selecting all rows from the \u0027projects\u0027 table where the region is \u0027South America\u0027 and the name starts with \u0027infrastructure\u0027, and then calculating the sum of the \u0027budget\u0027 column for those rows." +}, { + "id": "2649", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the economic diversification efforts in West Africa with an investment of over 5 million USD.", + "sql_context": "CREATE TABLE DiversificationEfforts (id INT, effort_name TEXT, location TEXT, investment FLOAT); INSERT INTO DiversificationEfforts (id, effort_name, location, investment) VALUES (1, \u0027Renewable Energy West Africa\u0027, \u0027West Africa\u0027, 5500000); INSERT INTO DiversificationEfforts (id, effort_name, location, investment) VALUES (2, \u0027Tourism Development West Africa\u0027, \u0027West Africa\u0027, 4000000); INSERT INTO DiversificationEfforts (id, effort_name, location, investment) VALUES (3, \u0027Mining West Africa\u0027, \u0027West Africa\u0027, 3500000);", + "sql": "SELECT effort_name, location FROM DiversificationEfforts WHERE investment \u003e 5000000 AND location LIKE \u0027%West Africa%\u0027;", + "sql_explanation": "This SQL query selects the effort_name and location columns from the DiversificationEfforts table, filters the rows where the investment is greater than 5 million USD and where the location contains \u0027West Africa\u0027 (representing economic diversification efforts in West Africa with an investment of over 5 million USD)." +}, { + "id": "2705", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total cost of rural infrastructure projects that are related to \u0027Agriculture\u0027 in \u0027South America\u0027?", + "sql_context": "CREATE TABLE Rural_Infrastructure(project_id INT, project_name VARCHAR(50), country VARCHAR(50), cost FLOAT, sector VARCHAR(50)); INSERT INTO Rural_Infrastructure(project_id, project_name, country, cost, sector) VALUES (1, \u0027Road Construction\u0027, \u0027Argentina\u0027, 1000000, \u0027Agriculture\u0027), (2, \u0027Bridge Building\u0027, \u0027Brazil\u0027, 1200000, \u0027Transportation\u0027);", + "sql": "SELECT SUM(cost) as total_cost FROM Rural_Infrastructure WHERE country \u003d \u0027South America\u0027 AND sector \u003d \u0027Agriculture\u0027;", + "sql_explanation": "This query selects the sum of the \u0027cost\u0027 column from the \u0027Rural Infrastructure\u0027 table, filters for \u0027country\u0027 values of \u0027South America\u0027 and \u0027sector\u0027 values of \u0027Agriculture\u0027, and calculates the sum for the resulting records." +}, { + "id": "2793", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of community development initiatives in Country V?", + "sql_context": "CREATE TABLE rural_communities (id INT, community_name VARCHAR(255), location VARCHAR(255), country VARCHAR(255), initiative_type VARCHAR(255), start_date DATE, end_date DATE); INSERT INTO rural_communities (id, community_name, location, country, initiative_type, start_date, end_date) VALUES (1, \u0027Community A\u0027, \u0027Village B, Country V\u0027, \u0027Country V\u0027, \u0027Community Development\u0027, \u00272019-01-01\u0027, \u00272023-12-31\u0027);", + "sql": "SELECT COUNT(*) FROM rural_communities WHERE country \u003d \u0027Country V\u0027 AND initiative_type \u003d \u0027Community Development\u0027;", + "sql_explanation": "This query counts all records in the \u0027rural_communities\u0027 table where the country is \u0027Country V\u0027 and the initiative_type is \u0027Community Development\u0027." +}, { + "id": "2803", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the average budget for community development initiatives in Thailand in 2018?", + "sql_context": "CREATE TABLE community_development_initiatives (id INT, country VARCHAR(50), initiative_name VARCHAR(100), start_date DATE, end_date DATE, budget DECIMAL(10,2));", + "sql": "SELECT AVG(budget) FROM community_development_initiatives WHERE country \u003d \u0027Thailand\u0027 AND YEAR(start_date) \u003d 2018;", + "sql_explanation": "This query calculates the average budget for community development initiatives in Thailand in 2018 by averaging the budget column where the country is Thailand and start date year is 2018." +}, { + "id": "2909", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names and adoption rates of agricultural innovations in rural communities in Bangladesh?", + "sql_context": "CREATE TABLE names (id INT, innovation TEXT, community TEXT, adoption_rate FLOAT); INSERT INTO names (id, innovation, community, adoption_rate) VALUES (1, \u0027SRI\u0027, \u0027Rural Community A\u0027, 0.9), (2, \u0027Hybrid Seeds\u0027, \u0027Rural Community B\u0027, 0.7);", + "sql": "SELECT innovation, adoption_rate FROM names WHERE community LIKE \u0027Rural Community%\u0027 AND country \u003d \u0027Bangladesh\u0027;", + "sql_explanation": "This query selects the names and adoption rates of agricultural innovations in rural communities in Bangladesh by filtering for communities starting with \u0027Rural Community\u0027 and the country in the names table." +}, { + "id": "2924", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total funding (in USD) received by agricultural innovation projects in Mexico led by Indigenous people?", + "sql_context": "CREATE TABLE Agricultural_Projects (id INT, project_name TEXT, lead_gender TEXT, country TEXT, funding_amount FLOAT); INSERT INTO Agricultural_Projects (id, project_name, lead_gender, country, funding_amount) VALUES (1, \u0027Green Innovations\u0027, \u0027Female\u0027, \u0027Mexico\u0027, 50000.00), (2, \u0027AgriTech Solutions\u0027, \u0027Male\u0027, \u0027Mexico\u0027, 75000.00), (3, \u0027Sustainable Farming\u0027, \u0027Indigenous\u0027, \u0027Mexico\u0027, 80000.00);", + "sql": "SELECT SUM(funding_amount) FROM Agricultural_Projects WHERE lead_gender \u003d \u0027Indigenous\u0027 AND country \u003d \u0027Mexico\u0027;", + "sql_explanation": "This SQL query calculates the total funding received by agricultural innovation projects in Mexico led by Indigenous people by using the SUM function on the funding_amount column, while filtering for projects led by Indigenous people in Mexico." +}, { + "id": "2960", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of agricultural innovation projects in the province of \"Bukidnon\" from the \"rural_projects\" table", + "sql_context": "CREATE TABLE rural_projects (id INT, province VARCHAR(255), project_type VARCHAR(255), start_date DATE);", + "sql": "SELECT COUNT(*) FROM rural_projects WHERE province \u003d \u0027Bukidnon\u0027 AND project_type \u003d \u0027Agricultural Innovation\u0027;", + "sql_explanation": "This query filters the \"rural_projects\" table to only show records where the province is \u0027Bukidnon\u0027 and the project_type is \u0027Agricultural Innovation\u0027. It then counts the number of records returned." +}, { + "id": "2970", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which agricultural innovations were implemented in Bangladesh in or after 2018?", + "sql_context": "CREATE TABLE agricultural_innovations (innovation_id INT, country TEXT, innovation TEXT, implementation_year INT); INSERT INTO agricultural_innovations (innovation_id, country, innovation, implementation_year) VALUES (1, \u0027Bangladesh\u0027, \u0027Precision agriculture\u0027, 2017), (2, \u0027Bangladesh\u0027, \u0027Drip irrigation\u0027, 2018), (3, \u0027Bangladesh\u0027, \u0027Vertical farming\u0027, 2019), (4, \u0027Bangladesh\u0027, \u0027Automated harvesting\u0027, 2020);", + "sql": "SELECT innovation FROM agricultural_innovations WHERE country \u003d \u0027Bangladesh\u0027 AND implementation_year \u003e\u003d 2018;", + "sql_explanation": "This query filters the agricultural_innovations table for the records with country as Bangladesh and implementation_year greater than or equal to 2018 and returns the innovation values." +}, { + "id": "3055", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the unique economic diversification initiatives from the \u0027diversification_projects\u0027 table, excluding those that have a budget less than 25000 or are located in \u0027Asia\u0027.", + "sql_context": "CREATE TABLE diversification_projects (id INT, initiative_name VARCHAR(50), budget INT, location VARCHAR(50)); INSERT INTO diversification_projects VALUES (1, \u0027Handicraft Production\u0027, 30000, \u0027Asia\u0027), (2, \u0027Eco-Tourism\u0027, 50000, \u0027Africa\u0027), (3, \u0027Livestock Farming\u0027, 75000, \u0027Europe\u0027);", + "sql": "SELECT DISTINCT initiative_name FROM diversification_projects WHERE budget \u003e 25000 AND location !\u003d \u0027Asia\u0027;", + "sql_explanation": "The SQL query selects distinct \u0027initiative_name\u0027 values from the \u0027diversification_projects\u0027 table, filtering rows where \u0027budget\u0027 is greater than 25000 and \u0027location\u0027 is not \u0027Asia\u0027." +}, { + "id": "3061", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total cost of all completed rural infrastructure projects in Kenya in 2020?", + "sql_context": "CREATE TABLE projects (id INT, country VARCHAR(50), start_date DATE, end_date DATE, cost FLOAT); INSERT INTO projects (id, country, start_date, end_date, cost) VALUES (1, \u0027Kenya\u0027, \u00272018-01-01\u0027, \u00272019-12-31\u0027, 500000), (2, \u0027Kenya\u0027, \u00272019-01-01\u0027, \u00272020-12-31\u0027, 750000), (3, \u0027Kenya\u0027, \u00272020-01-01\u0027, \u00272021-12-31\u0027, 800000);", + "sql": "SELECT SUM(cost) FROM projects WHERE country \u003d \u0027Kenya\u0027 AND YEAR(end_date) \u003d 2020 AND end_date IS NOT NULL;", + "sql_explanation": "This query calculates the total cost of completed rural infrastructure projects in Kenya in 2020 by summing the \u0027cost\u0027 column for records where the \u0027country\u0027 is \u0027Kenya\u0027, the end date is in 2020, and there is a valid end date." +}, { + "id": "3132", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total cost of community development initiatives in Zimbabwe from 2015 to 2018?", + "sql_context": "CREATE TABLE initiatives (id INT, country VARCHAR(50), start_date DATE, end_date DATE, cost FLOAT); INSERT INTO initiatives (id, country, start_date, end_date, cost) VALUES (1, \u0027Zimbabwe\u0027, \u00272015-01-01\u0027, \u00272016-12-31\u0027, 200000), (2, \u0027Zimbabwe\u0027, \u00272016-01-01\u0027, \u00272017-12-31\u0027, 250000), (3, \u0027Zimbabwe\u0027, \u00272017-01-01\u0027, \u00272018-12-31\u0027, 300000), (4, \u0027Zimbabwe\u0027, \u00272018-01-01\u0027, \u00272019-12-31\u0027, 350000);", + "sql": "SELECT SUM(cost) FROM initiatives WHERE country \u003d \u0027Zimbabwe\u0027 AND YEAR(start_date) BETWEEN 2015 AND 2018;", + "sql_explanation": "This query calculates the total cost of community development initiatives in Zimbabwe from 2015 to 2018 by summing the \u0027cost\u0027 column for records where the \u0027country\u0027 is \u0027Zimbabwe\u0027 and the start date is between 2015 and 2018." +}, { + "id": "3190", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the status of completed economic diversification projects in Europe to \u0027closed\u0027.", + "sql_context": "CREATE TABLE economic_diversification (id INT, name VARCHAR(255), region VARCHAR(255), status VARCHAR(255)); INSERT INTO economic_diversification (id, name, region, status) VALUES (1, \u0027Renewable Energy\u0027, \u0027Europe\u0027, \u0027completed\u0027);", + "sql": "UPDATE economic_diversification SET status \u003d \u0027closed\u0027 WHERE region \u003d \u0027Europe\u0027 AND status \u003d \u0027completed\u0027;", + "sql_explanation": "This query updates the status of completed economic diversification projects in Europe to \u0027closed\u0027 by filtering the \u0027economic_diversification\u0027 table based on the region and status, then updating the \u0027status\u0027 field for the records that match the filter criteria." +}, { + "id": "3271", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average amount of fertilizer used per acre in Sub-Saharan Africa in 2021?", + "sql_context": "CREATE TABLE FertilizerUse (region TEXT, country TEXT, year INTEGER, acres INTEGER, fertilizer_amount INTEGER); INSERT INTO FertilizerUse (region, country, year, acres, fertilizer_amount) VALUES (\u0027Sub-Saharan Africa\u0027, \u0027Kenya\u0027, 2021, 1000, 500), (\u0027Sub-Saharan Africa\u0027, \u0027Nigeria\u0027, 2021, 1500, 700), (\u0027Sub-Saharan Africa\u0027, \u0027Tanzania\u0027, 2021, 1200, 600);", + "sql": "SELECT AVG(fertilizer_amount) FROM FertilizerUse WHERE region \u003d \u0027Sub-Saharan Africa\u0027 AND year \u003d 2021;", + "sql_explanation": "Calculates the average fertilizer amount used per acre in Sub-Saharan Africa in 2021 by summing up the fertilizer amounts and dividing by the total number of acres." +}, { + "id": "3280", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the community development initiatives in the \u0027community_development\u0027 table that were implemented after 2010.", + "sql_context": "CREATE TABLE community_development (id INT, initiative_name VARCHAR(50), location VARCHAR(50), implementation_date DATE); INSERT INTO community_development (id, initiative_name, location, implementation_date) VALUES (1, \u0027Green Spaces\u0027, \u0027India\u0027, \u00272012-05-01\u0027);", + "sql": "SELECT initiative_name, location FROM community_development WHERE implementation_date \u003e \u00272010-01-01\u0027;", + "sql_explanation": "The SQL query lists community development initiatives by selecting the \u0027initiative_name\u0027 and \u0027location\u0027 columns from the \u0027community_development\u0027 table where the \u0027implementation_date\u0027 is later than \u00272010-01-01\u0027." +}, { + "id": "3317", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total investment in agricultural innovation in \u0027Africa\u0027 up to 2021?", + "sql_context": "CREATE TABLE agricultural_innovation (innovation_id INT, innovation_name TEXT, region TEXT, investment_amount INT, year INT); INSERT INTO agricultural_innovation (innovation_id, innovation_name, region, investment_amount, year) VALUES (1, \u0027Drought-Resistant Crops\u0027, \u0027Africa\u0027, 2000000, 2020); INSERT INTO agricultural_innovation (innovation_id, innovation_name, region, investment_amount, year) VALUES (2, \u0027Precision Farming\u0027, \u0027Asia\u0027, 3000000, 2021);", + "sql": "SELECT SUM(investment_amount) FROM agricultural_innovation WHERE year \u003c\u003d 2021 AND region \u003d \u0027Africa\u0027;", + "sql_explanation": "The SQL query calculates the total investment in agricultural innovation in \u0027Africa\u0027 up to 2021 by selecting the sum of the investment_amount column where the year is less than or equal to 2021 and the region is \u0027Africa\u0027." +}, { + "id": "3338", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total budget for agricultural projects in \u0027rural_development\u0027 database that were completed after 2020?", + "sql_context": "CREATE TABLE agricultural_projects (id INT, project_name TEXT, budget FLOAT, completion_date DATE); INSERT INTO agricultural_projects (id, project_name, budget, completion_date) VALUES (1, \u0027AgriProjectA\u0027, 100000, \u00272021-01-01\u0027); INSERT INTO agricultural_projects (id, project_name, budget, completion_date) VALUES (2, \u0027AgriProjectB\u0027, 150000, \u00272022-03-15\u0027);", + "sql": "SELECT SUM(budget) as total_budget FROM agricultural_projects WHERE completion_date \u003e \u00272020-12-31\u0027;", + "sql_explanation": "This query calculates the total budget for agricultural projects in the \u0027agricultural_projects\u0027 table that were completed after 2020. It uses the SUM() function to add up the \u0027budget\u0027 column for all records where the completion date is after 2020. The WHERE clause is used to filter out any records where the completion date is before 2021." +}, { + "id": "3668", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many rural infrastructure projects were planned in 2018 and 2019?", + "sql_context": "CREATE TABLE rural_infrastructure (id INT, year INT, project VARCHAR(50), status VARCHAR(20)); INSERT INTO rural_infrastructure (id, year, project, status) VALUES (1, 2018, \u0027Road Construction\u0027, \u0027Planned\u0027), (2, 2019, \u0027Water Supply\u0027, \u0027In Progress\u0027), (3, 2020, \u0027Electrification\u0027, \u0027Completed\u0027);", + "sql": "SELECT COUNT(*) FROM rural_infrastructure WHERE year IN (2018, 2019) AND status \u003d \u0027Planned\u0027;", + "sql_explanation": "The SQL query counts the number of planned rural infrastructure projects in 2018 and 2019 by using the COUNT(*) function and filtering the records with a year of 2018 or 2019 and a status of \u0027Planned\u0027 using the IN operator." +}, { + "id": "3744", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many community development initiatives were completed in 2020 and 2021 in the \u0027rural_development\u0027 database?", + "sql_context": "CREATE TABLE community_initiative (initiative_id INT, initiative_name VARCHAR(50), year INT, completed BOOLEAN); INSERT INTO community_initiative (initiative_id, initiative_name, year, completed) VALUES (1, \u0027Community Health Center\u0027, 2020, true);", + "sql": "SELECT COUNT(*) FROM community_initiative WHERE year IN (2020, 2021) AND completed \u003d true;", + "sql_explanation": "This query counts the number of rows in the \u0027community_initiative\u0027 table where the \u0027year\u0027 column is either 2020 or 2021 and the \u0027completed\u0027 column is true. This provides the number of community development initiatives completed in those two years." +}, { + "id": "3842", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total budget for all agricultural innovation projects in Kenya in 2020?", + "sql_context": "CREATE TABLE AgriculturalInnovations (id INT, country VARCHAR(50), project VARCHAR(50), budget FLOAT, year INT); INSERT INTO AgriculturalInnovations (id, country, project, budget, year) VALUES (1, \u0027Kenya\u0027, \u0027AgriTech App Development\u0027, 250000, 2020), (2, \u0027Kenya\u0027, \u0027Modern Irrigation Systems\u0027, 500000, 2020), (3, \u0027Uganda\u0027, \u0027Solar Powered Pumps\u0027, 300000, 2019);", + "sql": "SELECT SUM(budget) FROM AgriculturalInnovations WHERE country \u003d \u0027Kenya\u0027 AND year \u003d 2020;", + "sql_explanation": "This SQL query calculates the total budget for agricultural innovation projects in Kenya in 2020 by summing the budget column where country is \u0027Kenya\u0027 and year is 2020." +}, { + "id": "3971", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names of all rural infrastructure projects in the \u0027rural_infrastructure_2\u0027 table?", + "sql_context": "CREATE TABLE rural_infrastructure_2 (id INT, project_name VARCHAR(50), sector VARCHAR(50)); INSERT INTO rural_infrastructure_2 (id, project_name, sector) VALUES (3, \u0027Smart Irrigation\u0027, \u0027Rural Infrastructure\u0027), (4, \u0027Rural Connectivity\u0027, \u0027Rural Infrastructure\u0027);", + "sql": "SELECT project_name FROM rural_infrastructure_2 WHERE sector \u003d \u0027Rural Infrastructure\u0027;", + "sql_explanation": "The SQL query selects the \u0027project_name\u0027 column from the \u0027rural_infrastructure_2\u0027 table, filtering rows with a \u0027sector\u0027 value of \u0027Rural Infrastructure\u0027 to get the project names." +}, { + "id": "3974", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total cost of all agricultural innovation projects, ordered by the project cost in descending order?", + "sql_context": "CREATE TABLE agri_innovation_projects (id INT, project_name VARCHAR(255), location VARCHAR(255), sector VARCHAR(255), cost FLOAT); INSERT INTO agri_innovation_projects (id, project_name, location, sector, cost) VALUES (1, \u0027Precision Agriculture\u0027, \u0027Village X\u0027, \u0027Agriculture\u0027, 35000.00), (2, \u0027Drip Irrigation\u0027, \u0027Village Y\u0027, \u0027Agriculture\u0027, 28000.00), (3, \u0027Solar Powered Cold Storage\u0027, \u0027Village Z\u0027, \u0027Agriculture\u0027, 52000.00);", + "sql": "SELECT SUM(cost) as total_cost FROM agri_innovation_projects ORDER BY total_cost DESC;", + "sql_explanation": "This query calculates the total cost of all agricultural innovation projects by summing the \u0027cost\u0027 column. The result is then ordered in descending order by the total cost." +}, { + "id": "3976", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many farmers in Nepal adopted innovative agricultural practices in 2019?", + "sql_context": "CREATE TABLE Farmers (Farmer_ID INT, Farmer_Name TEXT, Location TEXT, Innovations_Adopted INT, Year INT); INSERT INTO Farmers (Farmer_ID, Farmer_Name, Location, Innovations_Adopted, Year) VALUES (1, \u0027Ram Bahadur\u0027, \u0027Nepal\u0027, 1, 2019);", + "sql": "SELECT SUM(Innovations_Adopted) FROM Farmers WHERE Year \u003d 2019 AND Location \u003d \u0027Nepal\u0027;", + "sql_explanation": "This query calculates the number of farmers in Nepal who adopted innovative agricultural practices in 2019. It filters the Farmers table by the specified conditions (Year \u003d 2019 and Location \u003d \u0027Nepal\u0027) and then sums the Innovations_Adopted column values for the selected records." +}, { + "id": "4129", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the locations of rural infrastructure projects with a completion percentage greater than 70?", + "sql_context": "CREATE TABLE RuralInfrastructure ( id INT, project_id INT, project_type VARCHAR(255), completion_percentage INT);", + "sql": "SELECT DISTINCT location FROM RuralInfrastructure WHERE completion_percentage \u003e 70;", + "sql_explanation": "This query returns the distinct location from the RuralInfrastructure table if the completion percentage is greater than 70." +}, { + "id": "4271", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of rural infrastructure projects initiated in Africa?", + "sql_context": "CREATE TABLE IF NOT EXISTS projects (id INT, location VARCHAR(255)); INSERT INTO projects (id, location) VALUES (1, \u0027Kenya\u0027), (2, \u0027Nigeria\u0027), (3, \u0027Senegal\u0027);", + "sql": "SELECT COUNT(*) FROM projects WHERE location IN (\u0027Kenya\u0027, \u0027Nigeria\u0027, \u0027Senegal\u0027);", + "sql_explanation": "This query counts the total number of rows in the \u0027projects\u0027 table where the \u0027location\u0027 is either \u0027Kenya\u0027, \u0027Nigeria\u0027, or \u0027Senegal\u0027. It\u0027s equivalent to counting the total rural projects in Africa." +}, { + "id": "4317", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which community development initiatives have budget allocations over 75000 in the \u0027community_development\u0027 table?", + "sql_context": "CREATE TABLE community_development (id INT, initiative_name VARCHAR(50), budget DECIMAL(10, 2)); INSERT INTO community_development (id, initiative_name, budget) VALUES (1, \u0027Clean Water Initiative\u0027, 50000.00), (2, \u0027Renewable Energy\u0027, 100000.00);", + "sql": "SELECT initiative_name, budget FROM community_development WHERE budget \u003e 75000;", + "sql_explanation": "The SQL query selects the \u0027initiative_name\u0027 and \u0027budget\u0027 columns from the \u0027community_development\u0027 table, filtering rows with a \u0027budget\u0027 value greater than 75000 to get the community development initiatives with budgets over 75000." +}, { + "id": "4360", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which rural infrastructure projects have an estimated cost greater than $600,000?", + "sql_context": "CREATE TABLE RuralInfrastructure (id INT PRIMARY KEY, name VARCHAR(50), location VARCHAR(50), type VARCHAR(20), cost FLOAT, completion_date DATE); INSERT INTO RuralInfrastructure (id, name, location, type, cost, completion_date) VALUES (1, \u0027Water Reservoir\u0027, \u0027Rural India\u0027, \u0027Water Resource\u0027, 500000, \u00272021-06-30\u0027), (2, \u0027Electric Grid\u0027, \u0027Rural Indonesia\u0027, \u0027Power Supply\u0027, 800000, \u00272020-12-31\u0027), (3, \u0027Broadband Internet\u0027, \u0027Rural Colombia\u0027, \u0027Telecommunications\u0027, 700000, \u00272023-05-31\u0027);", + "sql": "SELECT name, location, type, cost FROM RuralInfrastructure WHERE cost \u003e 600000;", + "sql_explanation": "This query retrieves the names, locations, types, and costs of rural infrastructure projects with an estimated cost greater than $600,000." +}, { + "id": "4410", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the names and start dates of all rural projects in the \"Mindanao\" region from the \"rural_projects\" table", + "sql_context": "CREATE TABLE rural_projects (id INT, province VARCHAR(255), project_type VARCHAR(255), region VARCHAR(255), start_date DATE);", + "sql": "SELECT project_type, start_date FROM rural_projects WHERE region \u003d \u0027Mindanao\u0027;", + "sql_explanation": "This query filters the \"rural_projects\" table to only show records where the \u0027region\u0027 is \u0027Mindanao\u0027. It then selects the \u0027project_type\u0027 and \u0027start_date\u0027 columns." +}, { + "id": "4502", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average budget for rural infrastructure projects in Africa.", + "sql_context": "CREATE TABLE rural_infrastructure (id INT, project_name VARCHAR(50), location VARCHAR(50), budget FLOAT); INSERT INTO rural_infrastructure (id, project_name, location, budget) VALUES (1, \u0027Precision Agriculture\u0027, \u0027Nigeria\u0027, 300000.00);", + "sql": "SELECT AVG(budget) FROM rural_infrastructure WHERE location LIKE \u0027%Africa%\u0027;", + "sql_explanation": "The SQL query calculates the average budget for rural infrastructure projects in Africa by averaging the \u0027budget\u0027 column values in the \u0027rural_infrastructure\u0027 table where the \u0027location\u0027 contains \u0027Africa\u0027." +}, { + "id": "4561", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "Identify the agricultural innovation projects in \u0027RuralDev\u0027 database that have been decommissioned or discontinued.", + "sql_context": "CREATE TABLE agricultural_innovation_status (id INT, name VARCHAR(255), status VARCHAR(255)); INSERT INTO agricultural_innovation_status (id, name, status) VALUES (1, \u0027Precision Agriculture\u0027, \u0027Active\u0027), (2, \u0027Organic Farming\u0027, \u0027Active\u0027), (3, \u0027Genetic Engineering\u0027, \u0027Discontinued\u0027);", + "sql": "SELECT * FROM agricultural_innovation_status WHERE status \u003d \u0027Discontinued\u0027;", + "sql_explanation": "The SQL query uses the WHERE clause to filter records from the \u0027agricultural_innovation_status\u0027 table based on the \u0027status\u0027 column value \u0027Discontinued\u0027. This returns all agricultural innovation projects that have been decommissioned or discontinued in the \u0027RuralDev\u0027 database." +}, { + "id": "4620", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many agricultural innovation projects were implemented in the \u0027ruraldev\u0027 schema before 2018?", + "sql_context": "CREATE TABLE ruraldev.innovation_projects (id INT, project_name VARCHAR(50), start_year INT); INSERT INTO ruraldev.innovation_projects (id, project_name, start_year) VALUES (1, \u0027Precision Farming\u0027, 2015), (2, \u0027Drip Irrigation\u0027, 2017), (3, \u0027Vertical Farming\u0027, 2020);", + "sql": "SELECT COUNT(*) FROM ruraldev.innovation_projects WHERE start_year \u003c 2018;", + "sql_explanation": "This query counts the number of records in the \u0027innovation_projects\u0027 table where the start_year is before 2018, providing the number of agricultural innovation projects implemented in the \u0027ruraldev\u0027 schema before that year." +}, { + "id": "4870", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of women-led agricultural businesses in the \u0027business_data\u0027 table?", + "sql_context": "CREATE TABLE business_data (business_id INT, business_name VARCHAR(50), gender VARCHAR(10)); INSERT INTO business_data (business_id, business_name, gender) VALUES (1, \u0027Green Acres\u0027, \u0027female\u0027), (2, \u0027Brown Farms\u0027, \u0027male\u0027), (3, \u0027Eco Harvest\u0027, \u0027non-binary\u0027);", + "sql": "SELECT COUNT(business_id) FROM business_data WHERE gender \u003d \u0027female\u0027;", + "sql_explanation": "The query counts the number of women-led agricultural businesses in the \u0027business_data\u0027 table by filtering the data where the gender column is equal to \u0027female\u0027 and then counting the number of rows." +}, { + "id": "5273", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of rural infrastructure projects in the \u0027rural_infrastructure\u0027 table, ordered by the start date.", + "sql_context": "CREATE TABLE rural_infrastructure (id INT, project_name VARCHAR(255), start_date DATE); INSERT INTO rural_infrastructure (id, project_name, start_date) VALUES (1, \u0027Road Construction\u0027, \u00272021-01-01\u0027), (2, \u0027Bridge Building\u0027, \u00272020-06-15\u0027);", + "sql": "SELECT COUNT(*) FROM rural_infrastructure ORDER BY start_date", + "sql_explanation": "Count the number of projects in the rural_infrastructure table and order the results by start date." +}, { + "id": "5455", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all agricultural innovation metrics in the \u0027innovation\u0027 schema, along with their respective types.", + "sql_context": "CREATE SCHEMA innovation; CREATE TABLE metrics (metric_name VARCHAR(50), metric_type VARCHAR(50)); INSERT INTO metrics (metric_name, metric_type) VALUES (\u0027Crop Yield\u0027, \u0027Quantitative\u0027), (\u0027Cultural Acceptance\u0027, \u0027Qualitative\u0027), (\u0027Sustainability\u0027, \u0027Qualitative\u0027);", + "sql": "SELECT metric_name, metric_type FROM innovation.metrics;", + "sql_explanation": "This query lists all agricultural innovation metrics in the \u0027innovation\u0027 schema, along with their respective types. It selects both columns from the \u0027metrics\u0027 table." +}, { + "id": "5536", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records in the \u0027rural_infrastructure\u0027 table where the budget is less than 50000.", + "sql_context": "CREATE TABLE rural_infrastructure (id INT, project_name VARCHAR(255), budget INT);", + "sql": "DELETE FROM rural_infrastructure WHERE budget \u003c 50000;", + "sql_explanation": "This query deletes all records in the \u0027rural_infrastructure\u0027 table where the budget is less than 50000." +}, { + "id": "5635", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the types of crops grown in the \u0027crop_data\u0027 table and their respective planting seasons.", + "sql_context": "CREATE TABLE crop_data (crop_id INT, crop_name VARCHAR(20), planting_season VARCHAR(20)); INSERT INTO crop_data (crop_id, crop_name, planting_season) VALUES (1, \u0027corn\u0027, \u0027spring\u0027), (2, \u0027rice\u0027, \u0027summer\u0027), (3, \u0027wheat\u0027, \u0027winter\u0027);", + "sql": "SELECT crop_name, planting_season FROM crop_data;", + "sql_explanation": "The query simply selects the crop_name and planting_season columns from the \u0027crop_data\u0027 table, listing the types of crops grown and their respective planting seasons." +}, { + "id": "5646", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all economic diversification efforts that were completed before 2018.", + "sql_context": "CREATE TABLE efforts (effort_id INT, effort_name VARCHAR(255), completion_year INT);", + "sql": "DELETE FROM efforts WHERE completion_year \u003c 2018;", + "sql_explanation": "The SQL query deletes all records from the \u0027efforts\u0027 table where the completion_year is before 2018." +}, { + "id": "5653", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum budget for any agricultural innovation project?", + "sql_context": "CREATE TABLE agricultural_innovation (id INT, region VARCHAR(50), budget DECIMAL(10, 2)); INSERT INTO agricultural_innovation (id, region, budget) VALUES (1, \u0027region_1\u0027, 100000.00); INSERT INTO agricultural_innovation (id, region, budget) VALUES (2, \u0027region_2\u0027, 150000.00);", + "sql": "SELECT MIN(budget) FROM agricultural_innovation;", + "sql_explanation": "This query calculates the minimum budget for any agricultural innovation project by selecting the \u0027budget\u0027 column from the \u0027agricultural_innovation\u0027 table and using the MIN() function." +}, { + "id": "5715", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the types of crops grown in the rural communities of \u0027RuralDev\u0027 database along with the number of farmers growing them.", + "sql_context": "CREATE TABLE crops_farmers (name VARCHAR(255), type VARCHAR(255), num_farmers INT); INSERT INTO crops_farmers (name, type, num_farmers) VALUES (\u0027Corn\u0027, \u0027Grain\u0027, 50), (\u0027Soybean\u0027, \u0027Legume\u0027, 35), (\u0027Cotton\u0027, \u0027Fiber\u0027, 40);", + "sql": "SELECT type, num_farmers FROM crops_farmers;", + "sql_explanation": "The SQL query uses the SELECT clause to retrieve records from the \u0027crops_farmers\u0027 table in the \u0027RuralDev\u0027 database. This returns the types of crops grown in the rural communities along with the number of farmers growing them." +}, { + "id": "5803", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records in the \"Livestock\" table where age is greater than 5", + "sql_context": "CREATE TABLE Livestock (id INT PRIMARY KEY, breed VARCHAR(255), age INT);", + "sql": "DELETE FROM Livestock WHERE age \u003e 5;", + "sql_explanation": "* This SQL query deletes all records in the \"Livestock\" table where the age is greater than 5." +}, { + "id": "2589", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many successful Mars missions were conducted by the Soviet Union?", + "sql_context": "CREATE TABLE Missions (id INT, name VARCHAR(100), country VARCHAR(100), mission_status VARCHAR(50)); INSERT INTO Missions (id, name, country, mission_status) VALUES (1, \u0027Mars 2\u0027, \u0027Soviet Union\u0027, \u0027Success\u0027), (2, \u0027Mars 3\u0027, \u0027Soviet Union\u0027, \u0027Success\u0027), (3, \u0027Mars 5\u0027, \u0027Soviet Union\u0027, \u0027Failure\u0027);", + "sql": "SELECT COUNT(*) FROM Missions WHERE country \u003d \u0027Soviet Union\u0027 AND mission_status \u003d \u0027Success\u0027 AND target_planet \u003d \u0027Mars\u0027;", + "sql_explanation": "This query counts the number of successful Mars missions conducted by the Soviet Union by selecting the COUNT function on all rows with the specified conditions in the WHERE clause." +}, { + "id": "3121", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total budget allocated for astrophysics research in countries with a population over 100 million?", + "sql_context": "CREATE TABLE BudgetAllocation (id INT, country VARCHAR(255), research_area VARCHAR(255), budget FLOAT, population INT);", + "sql": "SELECT SUM(budget) FROM BudgetAllocation WHERE research_area \u003d \u0027astrophysics\u0027 AND population \u003e 100000000;", + "sql_explanation": "This SQL query calculates the total budget allocated for astrophysics research in countries with a population over 100 million by filtering the \u0027BudgetAllocation\u0027 table for rows where the \u0027research_area\u0027 column is equal to \u0027astrophysics\u0027 and the \u0027population\u0027 column is greater than 100 million. The query then uses the SUM() function to find the sum of the \u0027budget\u0027 column for these rows." +}, { + "id": "3532", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average mass of spacecraft manufactured by SpaceCorp in 2022?", + "sql_context": "CREATE TABLE SpacecraftManufacturing (ID INT, Manufacturer VARCHAR(255), Mass INT, Year INT); INSERT INTO SpacecraftManufacturing (ID, Manufacturer, Mass, Year) VALUES (1, \u0027SpaceCorp\u0027, 3000, 2022), (2, \u0027SpaceCorp\u0027, 4000, 2022), (3, \u0027Galactic\u0027, 5000, 2022);", + "sql": "SELECT AVG(Mass) FROM SpacecraftManufacturing WHERE Manufacturer \u003d \u0027SpaceCorp\u0027 AND Year \u003d 2022;", + "sql_explanation": "The SQL query calculates the average mass of spacecraft manufactured by SpaceCorp in 2022 by filtering the SpacecraftManufacturing table for SpaceCorp records from 2022 and calculating the average mass." +}, { + "id": "3613", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the total number of spacecraft manufactured by Boeing and SpaceX?", + "sql_context": "CREATE TABLE spacecraft_manufacturing (manufacturer VARCHAR(50), spacecraft VARCHAR(50)); INSERT INTO spacecraft_manufacturing (manufacturer, spacecraft) VALUES (\u0027Boeing\u0027, \u0027Starliner\u0027), (\u0027SpaceX\u0027, \u0027Dragon\u0027), (\u0027SpaceX\u0027, \u0027Starship\u0027), (\u0027Boeing\u0027, \u0027702SP\u0027), (\u0027SpaceX\u0027, \u0027Falcon\u0027);", + "sql": "SELECT SUM(manufacturer \u003d \u0027Boeing\u0027 OR manufacturer \u003d \u0027SpaceX\u0027) FROM spacecraft_manufacturing;", + "sql_explanation": "This query calculates the total number of spacecraft manufactured by Boeing and SpaceX, by summing a boolean expression that checks if each row\u0027s manufacturer is either Boeing or SpaceX." +}, { + "id": "4335", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average cost of spacecraft manufactured by SpaceTech Inc.?", + "sql_context": "CREATE TABLE SpacecraftManufacturing (company VARCHAR(255), spacecraft_model VARCHAR(255), cost INT); INSERT INTO SpacecraftManufacturing (company, spacecraft_model, cost) VALUES (\u0027SpaceTech Inc.\u0027, \u0027Voyager\u0027, 800000), (\u0027SpaceTech Inc.\u0027, \u0027Galileo\u0027, 1200000), (\u0027SpaceTech Inc.\u0027, \u0027Cassini\u0027, 1500000);", + "sql": "SELECT AVG(cost) FROM SpacecraftManufacturing WHERE company \u003d \u0027SpaceTech Inc.\u0027;", + "sql_explanation": "This SQL query calculates the average cost of spacecraft manufactured by SpaceTech Inc. It does this by selecting the average value of the \u0027cost\u0027 column for all rows where the \u0027company\u0027 column is equal to \u0027SpaceTech Inc.\u0027" +}, { + "id": "4536", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum medical age limit for astronauts?", + "sql_context": "CREATE TABLE medical_regulations (id INT, parameter VARCHAR(50), value INT); INSERT INTO medical_regulations (id, parameter, value) VALUES (1, \u0027medical_age_limit\u0027, 60);", + "sql": "SELECT value FROM medical_regulations WHERE parameter \u003d \u0027medical_age_limit\u0027;", + "sql_explanation": "This query retrieves the maximum medical age limit for astronauts. It uses the SELECT statement to filter the \u0027medical_regulations\u0027 table by the \u0027parameter\u0027 column with a value of \u0027medical_age_limit\u0027, returning only the corresponding \u0027value\u0027 from the matched record." +}, { + "id": "4608", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the earliest launch date of a mission for \u0027GalacticExplorers\u0027?", + "sql_context": "CREATE TABLE Missions (id INT, name VARCHAR(50), company VARCHAR(50), launch_date DATE); INSERT INTO Missions (id, name, company, launch_date) VALUES (1, \u0027Pegasus 1\u0027, \u0027GalacticExplorers\u0027, \u00272025-04-10\u0027), (2, \u0027Pegasus 2\u0027, \u0027GalacticExplorers\u0027, \u00272027-07-04\u0027), (3, \u0027Pegasus 3\u0027, \u0027GalacticExplorers\u0027, \u00272029-10-31\u0027);", + "sql": "SELECT MIN(launch_date) FROM Missions WHERE company \u003d \u0027GalacticExplorers\u0027;", + "sql_explanation": "The SQL query calculates the earliest launch date of a mission for \u0027GalacticExplorers\u0027 by filtering the Missions table for the company \u0027GalacticExplorers\u0027 and then computing the minimum (MIN) of the launch_date column." +}, { + "id": "4669", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total mass of all spacecraft that use a specific type of propulsion system?", + "sql_context": "CREATE TABLE Spacecraft (SpacecraftID INT, PropulsionSystem VARCHAR, Mass FLOAT);", + "sql": "SELECT SUM(Mass) FROM Spacecraft WHERE PropulsionSystem \u003d \u0027Ion Thruster\u0027;", + "sql_explanation": "This query calculates the total mass of all spacecraft that use a specific type of propulsion system (in this case, Ion Thruster). It does so by filtering the Spacecraft table to only include rows where PropulsionSystem is \u0027Ion Thruster\u0027, then calculating the sum of the Mass column for those rows." +}, { + "id": "4864", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all spacecraft that use a specific type of propulsion system", + "sql_context": "CREATE TABLE Spacecraft (id INT, name VARCHAR(30), propulsion_system VARCHAR(30)); INSERT INTO Spacecraft (id, name, propulsion_system) VALUES (1, \u0027Nebula\u0027, \u0027Ion Thruster\u0027); INSERT INTO Spacecraft (id, name, propulsion_system) VALUES (2, \u0027Pulsar\u0027, \u0027Chemical Rocket\u0027); INSERT INTO Spacecraft (id, name, propulsion_system) VALUES (3, \u0027Voyager\u0027, \u0027Gravitational Assist\u0027);", + "sql": "SELECT name FROM Spacecraft WHERE propulsion_system \u003d \u0027Ion Thruster\u0027;", + "sql_explanation": "This SQL query lists all spacecraft that use a specific type of propulsion system by selecting the names from the Spacecraft table where the propulsion system is equal to the specified value." +}, { + "id": "4901", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total cost of \u0027Mars Rover\u0027 missions?", + "sql_context": "CREATE TABLE ProjectCosts (project VARCHAR(50), cost FLOAT); INSERT INTO ProjectCosts (project, cost) VALUES (\u0027Ares\u0027, 2000), (\u0027Artemis\u0027, 1500), (\u0027Mars Rover\u0027, 2500), (\u0027Curiosity\u0027, 1800);", + "sql": "SELECT SUM(cost) FROM ProjectCosts WHERE project LIKE \u0027%Mars Rover%\u0027;", + "sql_explanation": "This query calculates the total cost of \u0027Mars Rover\u0027 missions by selecting the cost column from the ProjectCosts table where the project contains the string \u0027Mars Rover\u0027, and then computing the sum (SUM) of those values." +}, { + "id": "4923", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum age of astronauts at their first spaceflight?", + "sql_context": "CREATE TABLE astronauts (id INT, name VARCHAR(255), birth_date DATE, first_flight_date DATE); INSERT INTO astronauts (id, name, birth_date, first_flight_date) VALUES (1, \u0027Alan Shepard\u0027, \u00271923-11-18\u0027, \u00271961-05-05\u0027), (2, \u0027Gus Grissom\u0027, \u00271926-04-03\u0027, \u00271961-07-21\u0027);", + "sql": "SELECT MIN(DATEDIFF(first_flight_date, birth_date)) FROM astronauts;", + "sql_explanation": "This query calculates the minimum difference in days between the birth_date and first_flight_date for all astronauts using the MIN and DATEDIFF functions." +}, { + "id": "5140", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the minimum distance from the sun of objects in the Oort Cloud", + "sql_context": "CREATE TABLE objects (id INT, name VARCHAR(50), distance DECIMAL(10,2), category VARCHAR(50));", + "sql": "SELECT MIN(distance) FROM objects WHERE category \u003d \u0027Oort Cloud\u0027;", + "sql_explanation": "The SQL query calculates the minimum distance from the sun of objects in the Oort Cloud by selecting the minimum distance value." +}, { + "id": "5185", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the astronauts that have participated in space missions longer than 250 days?", + "sql_context": "CREATE TABLE space_missions (id INT, mission_name VARCHAR(255), astronaut_name VARCHAR(255), duration INT); INSERT INTO space_missions (id, mission_name, astronaut_name, duration) VALUES (1, \u0027Apollo 11\u0027, \u0027Neil Armstrong\u0027, 195), (2, \u0027Apollo 12\u0027, \u0027Jane Foster\u0027, 244), (3, \u0027Ares 3\u0027, \u0027Mark Watney\u0027, 568), (4, \u0027Apollo 18\u0027, \u0027Anna Mitchell\u0027, 205);", + "sql": "SELECT astronaut_name FROM space_missions WHERE duration \u003e 250;", + "sql_explanation": "This SQL query retrieves the names of astronauts who have participated in space missions longer than 250 days by filtering the \u0027space_missions\u0027 table on the \u0027duration\u0027 column." +}, { + "id": "5276", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total mass of all spacecraft built for astrophysics research?", + "sql_context": "CREATE TABLE Spacecraft (type VARCHAR(20), name VARCHAR(30), mass FLOAT); INSERT INTO Spacecraft (type, name, mass) VALUES (\u0027Astrophysics\u0027, \u0027Chandra\u0027, 4850.0), (\u0027Astrophysics\u0027, \u0027Spitzer\u0027, 1940.0);", + "sql": "SELECT SUM(mass) FROM Spacecraft WHERE type \u003d \u0027Astrophysics\u0027;", + "sql_explanation": "This query calculates the total mass of all spacecraft built for astrophysics research." +}, { + "id": "5330", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total mass of spacecraft that have landed on Mars?", + "sql_context": "CREATE TABLE spacecraft(id INT, name VARCHAR(50), destination VARCHAR(50), mass FLOAT); INSERT INTO spacecraft VALUES(1, \u0027Curiosity Rover\u0027, \u0027Mars\u0027, 900.), (2, \u0027InSight Lander\u0027, \u0027Mars\u0027, 360.);", + "sql": "SELECT SUM(mass) FROM spacecraft WHERE destination \u003d \u0027Mars\u0027;", + "sql_explanation": "This query calculates the total mass of spacecraft that have landed on Mars by using the SUM function on the mass column and filtering the data for \u0027Mars\u0027 in the destination column." +}, { + "id": "5386", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which spacecraft were launched before 2010?", + "sql_context": "CREATE TABLE Spacecraft (Id INT, Name VARCHAR(50), LaunchDate DATE);", + "sql": "SELECT Name FROM Spacecraft WHERE YEAR(LaunchDate) \u003c 2010;", + "sql_explanation": "The SQL query selects the Name column from the Spacecraft table, filtering results by the LaunchDate column earlier than 2010." +}, { + "id": "5500", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete spacecraft records with mass greater than 10000 kg?", + "sql_context": "CREATE TABLE SpacecraftManufacturing (ID INT, Manufacturer VARCHAR(255), Mass INT); INSERT INTO SpacecraftManufacturing (ID, Manufacturer, Mass) VALUES (1, \u0027SpaceCorp\u0027, 5000), (2, \u0027SpaceCorp\u0027, 15000);", + "sql": "DELETE FROM SpacecraftManufacturing WHERE Mass \u003e 10000;", + "sql_explanation": "The SQL query deletes records from the SpacecraftManufacturing table by filtering for spacecraft with a mass greater than 10000 kg." +}, { + "id": "5678", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete the medical record of astronaut Mae Jemison.", + "sql_context": "CREATE TABLE MedicalData (id INT, astronaut_id INT, medical_condition VARCHAR(255), medical_examination_date DATE, age INT); INSERT INTO MedicalData (id, astronaut_id, medical_condition, medical_examination_date, age) VALUES (1, 1, \u0027None\u0027, \u00272020-01-01\u0027, 71); INSERT INTO MedicalData (id, astronaut_id, medical_condition, medical_examination_date, age) VALUES (2, 2, \u0027Asthma\u0027, \u00272019-10-15\u0027, N/A); INSERT INTO MedicalData (id, astronaut_id, medical_condition, medical_examination_date, age) VALUES (3, 3, \u0027Back pain\u0027, \u00272021-05-24\u0027, 56); INSERT INTO MedicalData (id, astronaut_id, medical_condition, medical_examination_date, age) VALUES (4, 4, \u0027Sinusitis\u0027, \u00272021-04-01\u0027, 62);", + "sql": "DELETE FROM MedicalData WHERE astronaut_id \u003d 4;", + "sql_explanation": "This query deletes the medical record of astronaut Mae Jemison by removing all records where the astronaut_id is 4." +}, { + "id": "5696", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all space mission records from the year 2025.", + "sql_context": "CREATE TABLE space_missions (id INT, mission_name TEXT, year INT, country TEXT); INSERT INTO space_missions (id, mission_name, year, country) VALUES (1, \u0027Artemis III\u0027, 2025, \u0027USA\u0027);", + "sql": "DELETE FROM space_missions WHERE year \u003d 2025;", + "sql_explanation": "This SQL query deletes all space mission records from the year 2025 by filtering the space_missions table for rows where the year column is equal to 2025 and then removing those rows from the table." +}, { + "id": "5766", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum duration of a space mission?", + "sql_context": "CREATE TABLE SpaceMissions (ID INT, MissionName VARCHAR(50), Duration INT); INSERT INTO SpaceMissions VALUES (1, \u0027Apollo 11\u0027, 195), (2, \u0027Apollo 13\u0027, 142);", + "sql": "SELECT MIN(Duration) FROM SpaceMissions;", + "sql_explanation": "This query calculates the minimum duration of a space mission in the SpaceMissions table." +}, { + "id": "5784", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many astronauts are there in the medical database?", + "sql_context": "CREATE TABLE medical_database (id INT, name VARCHAR(50), age INT, medical_conditions VARCHAR(50)); INSERT INTO medical_database (id, name, age, medical_conditions) VALUES (1, \u0027John Doe\u0027, 45, \u0027None\u0027), (2, \u0027Jane Smith\u0027, 38, \u0027Asthma\u0027);", + "sql": "SELECT COUNT(*) FROM medical_database;", + "sql_explanation": "This query counts the total number of astronauts in the medical database. It uses the COUNT() function on the \u0027*\u0027 wildcard, which represents all records in the \u0027medical_database\u0027 table." +}, { + "id": "5820", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum mass of a spacecraft?", + "sql_context": "CREATE TABLE Spacecraft (SpacecraftID INT, Manufacturer VARCHAR(50), Model VARCHAR(50), Mass FLOAT);", + "sql": "SELECT MAX(Mass) FROM Spacecraft;", + "sql_explanation": "This query finds the maximum mass of a spacecraft by selecting the maximum value of the \u0027Mass\u0027 column in the \u0027Spacecraft\u0027 table." +}, { + "id": "2098", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average transaction value for the month of January 2022?", + "sql_context": "CREATE TABLE transactions (transaction_id INT, transaction_date DATE, transaction_category VARCHAR(255), transaction_value DECIMAL(10,2)); INSERT INTO transactions (transaction_id, transaction_date, transaction_category, transaction_value) VALUES (1, \u00272022-01-02\u0027, \u0027Food\u0027, 75.00), (2, \u00272022-01-05\u0027, \u0027Electronics\u0027, 350.00), (3, \u00272022-01-10\u0027, \u0027Clothing\u0027, 200.00);", + "sql": "SELECT AVG(transaction_value) as avg_transaction_value FROM transactions WHERE transaction_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-01-31\u0027;", + "sql_explanation": "The SQL query calculates the average transaction value for the month of January 2022 by using the AVG() function and the WHERE clause to filter the transactions by date range." +}, { + "id": "2827", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total value of transactions for wholesale customers in the North America region?", + "sql_context": "CREATE TABLE transactions (id INT, customer_type VARCHAR(20), region VARCHAR(20), transaction_amount DECIMAL(10, 2)); INSERT INTO transactions (id, customer_type, region, transaction_amount) VALUES (1, \u0027retail\u0027, \u0027Latin America\u0027, 100.00), (2, \u0027wholesale\u0027, \u0027North America\u0027, 500.00), (3, \u0027retail\u0027, \u0027Europe\u0027, 200.00), (4, \u0027wholesale\u0027, \u0027Asia-Pacific\u0027, 300.00);", + "sql": "SELECT SUM(transaction_amount) FROM transactions WHERE customer_type \u003d \u0027wholesale\u0027 AND region \u003d \u0027North America\u0027;", + "sql_explanation": "This SQL query calculates the total value of transactions for wholesale customers in the North America region by using the SUM function on the transaction_amount column, and filtering the records by the customer_type and region columns with the specified values." +}, { + "id": "3009", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many high-risk accounts are in the Asia-Pacific region with a balance greater than $100,000?", + "sql_context": "CREATE TABLE accounts (id INT, region VARCHAR(20), risk_level VARCHAR(10), balance DECIMAL(15, 2)); INSERT INTO accounts (id, region, risk_level, balance) VALUES (1, \u0027Asia-Pacific\u0027, \u0027high\u0027, 120000.00), (2, \u0027Europe\u0027, \u0027medium\u0027, 80000.00), (3, \u0027North America\u0027, \u0027low\u0027, 50000.00), (4, \u0027Asia-Pacific\u0027, \u0027high\u0027, 150000.00);", + "sql": "SELECT COUNT(*) FROM accounts WHERE region \u003d \u0027Asia-Pacific\u0027 AND risk_level \u003d \u0027high\u0027 AND balance \u003e 100000.00;", + "sql_explanation": "This SQL query counts the number of high-risk accounts in the Asia-Pacific region with a balance greater than $100,000 by using the COUNT function with the asterisk (*) symbol, and filtering the records by the region, risk_level, and balance columns with the specified values." +}, { + "id": "3469", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of fraud cases detected in Q2 2022?", + "sql_context": "CREATE TABLE fraud_detection (fraud_id INT, case_number INT, detection_date DATE); INSERT INTO fraud_detection (fraud_id, case_number, detection_date) VALUES (1, 123, \u00272022-04-05\u0027), (2, 456, \u00272022-05-10\u0027), (3, 789, \u00272022-06-15\u0027);", + "sql": "SELECT COUNT(*) FROM fraud_detection WHERE detection_date BETWEEN \u00272022-04-01\u0027 AND \u00272022-06-30\u0027;", + "sql_explanation": "This SQL query calculates the total number of fraud cases detected during the second quarter of 2022. It filters the records based on the \u0027detection_date\u0027 column using the WHERE clause, selecting only those records where the detection date falls within the range of April 1, 2022 and June 30, 2022. Finally, the COUNT function calculates the total number of records (i.e., fraud cases) that meet this criteria." +}, { + "id": "3538", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total assets value for customers in the financial services industry who have assets greater than 250000?", + "sql_context": "CREATE TABLE customers (id INT, name VARCHAR(255), industry VARCHAR(255), assets DECIMAL(10, 2)); INSERT INTO customers (id, name, industry, assets) VALUES (1, \u0027John Doe\u0027, \u0027Financial Services\u0027, 150000.00), (2, \u0027Jane Smith\u0027, \u0027Financial Services\u0027, 200000.00), (3, \u0027Alice Johnson\u0027, \u0027Financial Services\u0027, 250000.00), (4, \u0027Bob Brown\u0027, \u0027Financial Services\u0027, 300000.00), (5, \u0027Charlie Davis\u0027, \u0027Retail\u0027, 50000.00), (6, \u0027Diana Green\u0027, \u0027Healthcare\u0027, 75000.00);", + "sql": "SELECT SUM(assets) FROM customers WHERE industry \u003d \u0027Financial Services\u0027 AND assets \u003e 250000.00;", + "sql_explanation": "This query calculates the sum of the \u0027assets\u0027 column for customers in the \u0027Financial Services\u0027 industry who have assets greater than 250000.00." +}, { + "id": "3819", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total balance of all customers who have a savings account in the New York branch?", + "sql_context": "CREATE TABLE accounts (customer_id INT, account_type VARCHAR(20), branch VARCHAR(20), balance DECIMAL(10,2)); INSERT INTO accounts (customer_id, account_type, branch, balance) VALUES (1, \u0027Savings\u0027, \u0027New York\u0027, 5000.00), (2, \u0027Checking\u0027, \u0027New York\u0027, 7000.00);", + "sql": "SELECT SUM(balance) FROM accounts WHERE account_type \u003d \u0027Savings\u0027 AND branch \u003d \u0027New York\u0027;", + "sql_explanation": "This query adds up the balance column for all rows where the account_type is \u0027Savings\u0027 and the branch is \u0027New York\u0027." +}, { + "id": "3983", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names and transaction dates of all transactions that occurred in the United States or Canada?", + "sql_context": "CREATE TABLE transactions (id INT, transaction_date DATE, country VARCHAR(255), amount DECIMAL(10,2)); INSERT INTO transactions (id, transaction_date, country, amount) VALUES (1, \u00272022-01-01\u0027, \u0027USA\u0027, 100.00), (2, \u00272022-01-02\u0027, \u0027Canada\u0027, 200.00), (3, \u00272022-01-03\u0027, \u0027USA\u0027, 300.00);", + "sql": "SELECT country, transaction_date FROM transactions WHERE country IN (\u0027USA\u0027, \u0027Canada\u0027);", + "sql_explanation": "This SQL query selects the country and transaction_date columns from the transactions table, and filters the results to only include rows where the country is either \u0027USA\u0027 or \u0027Canada\u0027. The IN keyword is used to specify multiple values for the country column." +}, { + "id": "4290", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the earliest transaction date for account number 999999999?", + "sql_context": "CREATE TABLE transactions (transaction_id INT, account_number INT, amount DECIMAL(10,2), transaction_date DATE); INSERT INTO transactions (transaction_id, account_number, amount, transaction_date) VALUES (1, 999999999, 50.00, \u00272022-05-01\u0027), (2, 999999999, 50.00, \u00272022-05-02\u0027);", + "sql": "SELECT MIN(transaction_date) FROM transactions WHERE account_number \u003d 999999999;", + "sql_explanation": "This SQL query retrieves the earliest transaction date for account number 999999999 by using the MIN function on the transaction_date column, filtering the data where the account_number column equals 999999999." +}, { + "id": "4848", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average account balance for customers in the Southeast region?", + "sql_context": "CREATE TABLE customers (customer_id INT, name VARCHAR(50), region VARCHAR(20), account_balance DECIMAL(10, 2));", + "sql": "SELECT AVG(account_balance) FROM customers WHERE region \u003d \u0027Southeast\u0027;", + "sql_explanation": "This SQL query calculates the average account balance for customers in the Southeast region by using the AVG function on the account_balance column, and filtering the customers table with a WHERE clause for the Southeast region." +}, { + "id": "4854", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum investment value in the energy sector?", + "sql_context": "CREATE TABLE investments (investment_id INT, investor_id INT, sector VARCHAR(20), investment_value DECIMAL(10,2)); INSERT INTO investments (investment_id, investor_id, sector, investment_value) VALUES (1, 1, \u0027technology\u0027, 5000.00), (2, 2, \u0027finance\u0027, 3000.00), (3, 3, \u0027energy\u0027, 1000.00);", + "sql": "SELECT MIN(investment_value) FROM investments WHERE sector \u003d \u0027energy\u0027;", + "sql_explanation": "This query calculates the minimum investment value in the energy sector. It does this by using the MIN() function on the investment_value column, and filtering for rows where the sector is \u0027energy\u0027." +}, { + "id": "4887", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average account balance for customers in the \u0027New York\u0027 region?", + "sql_context": "CREATE TABLE customers (id INT, name VARCHAR(50), region VARCHAR(50), account_balance DECIMAL(10,2)); INSERT INTO customers (id, name, region, account_balance) VALUES (1, \u0027John Doe\u0027, \u0027New York\u0027, 15000.50); INSERT INTO customers (id, name, region, account_balance) VALUES (2, \u0027Jane Smith\u0027, \u0027California\u0027, 22000.00);", + "sql": "SELECT AVG(account_balance) FROM customers WHERE region \u003d \u0027New York\u0027;", + "sql_explanation": "This SQL query calculates the average account balance for customers in the \u0027New York\u0027 region. It does this by using the AVG function on the account_balance column, while filtering for customers in the \u0027New York\u0027 region using the WHERE clause." +}, { + "id": "4939", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the sum of transactions for all accounts in the \u0027High Value\u0027 category?", + "sql_context": "CREATE TABLE accounts (account_id INT, account_type TEXT, balance DECIMAL(10, 2)); INSERT INTO accounts (account_id, account_type, balance) VALUES (1, \u0027High Value\u0027, 100000.00); INSERT INTO accounts (account_id, account_type, balance) VALUES (2, \u0027Standard\u0027, 5000.00);", + "sql": "SELECT SUM(balance) FROM accounts WHERE account_type \u003d \u0027High Value\u0027;", + "sql_explanation": "This SQL query calculates the sum of transactions for all accounts in the \u0027High Value\u0027 category by using the SUM aggregate function on the \u0027balance\u0027 column. The WHERE clause filters for accounts with an account type of \u0027High Value\u0027." +}, { + "id": "4963", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all clients who have accounts with a balance greater than $20,000 and their account balance.", + "sql_context": "CREATE TABLE clients (id INT PRIMARY KEY, name VARCHAR(255), age INT, city VARCHAR(255), account_id INT, balance DECIMAL(10,2)); INSERT INTO clients (id, name, age, city, account_id, balance) VALUES (1001, \u0027Jacob Smith\u0027, 34, \u0027New York\u0027, 1, 5000.00), (1002, \u0027Sophia Johnson\u0027, 45, \u0027Los Angeles\u0027, 2, 25000.00), (1003, \u0027Ethan Williams\u0027, 29, \u0027Chicago\u0027, 3, 8000.00), (1004, \u0027Aria Patel\u0027, 36, \u0027Toronto\u0027, 4, 12000.00), (1005, \u0027Mateo Davis\u0027, 42, \u0027Miami\u0027, 5, 22000.00);", + "sql": "SELECT c.name, c.balance FROM clients c WHERE c.balance \u003e 20000.00;", + "sql_explanation": "This query lists all clients who have accounts with a balance greater than $20,000 and their account balance by filtering the clients table based on balance." +}, { + "id": "5026", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total assets value for all customers from the \u0027New York\u0027 region?", + "sql_context": "CREATE TABLE customers (customer_id INT, name TEXT, region TEXT, assets_value DECIMAL); INSERT INTO customers (customer_id, name, region, assets_value) VALUES (1, \u0027John Doe\u0027, \u0027New York\u0027, 50000.00), (2, \u0027Jane Smith\u0027, \u0027California\u0027, 75000.00);", + "sql": "SELECT SUM(assets_value) FROM customers WHERE region \u003d \u0027New York\u0027;", + "sql_explanation": "This query calculates the total assets value for all customers from the \u0027New York\u0027 region by summing up the assets_value column where the region is \u0027New York\u0027." +}, { + "id": "5181", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the total assets of the customers who have a savings account?", + "sql_context": "CREATE TABLE Accounts (CustomerID INT, AccountType VARCHAR(50), Balance DECIMAL(10,2)); INSERT INTO Accounts (CustomerID, AccountType, Balance) VALUES (1, \u0027Savings\u0027, 10000); INSERT INTO Accounts (CustomerID, AccountType, Balance) VALUES (2, \u0027Checking\u0027, 5000);", + "sql": "SELECT SUM(Balance) FROM Accounts WHERE AccountType \u003d \u0027Savings\u0027", + "sql_explanation": "This query calculates the total assets of customers who have a savings account by summing the balance for all rows with AccountType \u003d \u0027Savings\u0027." +}, { + "id": "5307", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total assets value for customers from the USA as of 2022-01-01?", + "sql_context": "CREATE TABLE customer (customer_id INT, name VARCHAR(100), country VARCHAR(50), assets_value DECIMAL(18,2)); INSERT INTO customer (customer_id, name, country, assets_value) VALUES (1, \u0027John Doe\u0027, \u0027USA\u0027, 50000.00), (2, \u0027Jane Smith\u0027, \u0027Canada\u0027, 75000.00);", + "sql": "SELECT SUM(assets_value) FROM customer WHERE country \u003d \u0027USA\u0027;", + "sql_explanation": "This SQL query calculates the total assets value for customers from the USA by summing up the \u0027assets_value\u0027 column where \u0027country\u0027 is \u0027USA\u0027." +}, { + "id": "5311", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum transaction amount for clients aged 60 or older?", + "sql_context": "CREATE TABLE clients (id INT, name TEXT, age INT, state TEXT, transaction_amount DECIMAL(10,2)); INSERT INTO clients (id, name, age, state, transaction_amount) VALUES (1, \u0027Elizabeth Brown\u0027, 65, \u0027New York\u0027, 150.00); INSERT INTO clients (id, name, age, state, transaction_amount) VALUES (2, \u0027David Johnson\u0027, 60, \u0027New York\u0027, 200.50);", + "sql": "SELECT MIN(transaction_amount) FROM clients WHERE age \u003e\u003d 60;", + "sql_explanation": "The SQL query calculates the minimum transaction amount for clients aged 60 or older by using the MIN function on the \u0027transaction_amount\u0027 column, while filtering the records for clients with an age of 60 or older by using the WHERE clause." +}, { + "id": "5322", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all the unique investment strategies and their associated risk levels.", + "sql_context": "CREATE TABLE investment_strategies (id INT, name VARCHAR(50), risk_level VARCHAR(50)); INSERT INTO investment_strategies (id, name, risk_level) VALUES (1, \u0027Conservative\u0027, \u0027Low\u0027), (2, \u0027Moderate\u0027, \u0027Medium\u0027), (3, \u0027Aggressive\u0027, \u0027High\u0027);", + "sql": "SELECT DISTINCT name, risk_level FROM investment_strategies;", + "sql_explanation": "The SQL query selects distinct values from the name and risk_level fields in the investment_strategies table. This will return all unique investment strategies and their associated risk levels." +}, { + "id": "5354", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average account balance for customers in \u0027New York\u0027?", + "sql_context": "CREATE TABLE customers (id INT, name VARCHAR(50), city VARCHAR(50), balance DECIMAL(10,2)); INSERT INTO customers (id, name, city, balance) VALUES (1, \u0027John Doe\u0027, \u0027New York\u0027, 15000.50); INSERT INTO customers (id, name, city, balance) VALUES (2, \u0027Jane Smith\u0027, \u0027Los Angeles\u0027, 12000.00);", + "sql": "SELECT AVG(balance) FROM customers WHERE city \u003d \u0027New York\u0027;", + "sql_explanation": "This query calculates the average balance of customers who live in \u0027New York\u0027. It uses the AVG function to find the average value of the \u0027balance\u0027 column for the rows where \u0027city\u0027 is \u0027New York\u0027." +}, { + "id": "5437", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the count of transactions for clients living in Australia?", + "sql_context": "CREATE TABLE clients (client_id INT, name TEXT, country TEXT, transaction_amount DECIMAL); INSERT INTO clients (client_id, name, country, transaction_amount) VALUES (1, \u0027John Doe\u0027, \u0027Australia\u0027, 500.00); INSERT INTO clients (client_id, name, country, transaction_amount) VALUES (2, \u0027Jane Smith\u0027, \u0027United States\u0027, 350.00); INSERT INTO clients (client_id, name, country, transaction_amount) VALUES (3, \u0027Mike Johnson\u0027, \u0027Canada\u0027, 400.00);", + "sql": "SELECT COUNT(*) FROM clients WHERE country \u003d \u0027Australia\u0027;", + "sql_explanation": "The SQL query calculates the count of transactions by filtering the \u0027clients\u0027 table for rows with the country value \u0027Australia\u0027 and then applying the COUNT() function on all columns using the asterisk symbol (*)." +}, { + "id": "5552", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete the security with a security symbol of \u0027ABC\u0027 from the \u0027securities\u0027 table", + "sql_context": "CREATE TABLE securities (security_id INT PRIMARY KEY, security_symbol VARCHAR(10), security_name VARCHAR(100));", + "sql": "DELETE FROM securities WHERE security_symbol \u003d \u0027ABC\u0027;", + "sql_explanation": "This query deletes the record from the \u0027securities\u0027 table with a specified security symbol." +}, { + "id": "5580", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete transactions from clients living in the United States.", + "sql_context": "CREATE TABLE clients (client_id INT, name TEXT, country TEXT, transaction_amount DECIMAL); INSERT INTO clients (client_id, name, country, transaction_amount) VALUES (1, \u0027John Doe\u0027, \u0027United States\u0027, 500.00); INSERT INTO clients (client_id, name, country, transaction_amount) VALUES (2, \u0027Jane Smith\u0027, \u0027Canada\u0027, 350.00); INSERT INTO clients (client_id, name, country, transaction_amount) VALUES (3, \u0027Mike Johnson\u0027, \u0027Mexico\u0027, 400.00); INSERT INTO clients (client_id, name, country, transaction_amount) VALUES (4, \u0027Sara Doe\u0027, \u0027United States\u0027, 600.00);", + "sql": "DELETE FROM clients WHERE country \u003d \u0027United States\u0027;", + "sql_explanation": "The SQL query removes all rows with the country value \u0027United States\u0027 using the DELETE statement and the WHERE clause with the equals operator." +}, { + "id": "5750", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "When did the first fraud alert occur?", + "sql_context": "CREATE TABLE fraud_alerts (id INT, account_number VARCHAR(20), alert_type VARCHAR(20), alert_date DATE); INSERT INTO fraud_alerts (id, account_number, alert_type, alert_date) VALUES (1, \u00271234567890\u0027, \u0027Suspicious Activity\u0027, \u00272022-01-01\u0027); INSERT INTO fraud_alerts (id, account_number, alert_type, alert_date) VALUES (2, \u00270987654321\u0027, \u0027Identity Theft\u0027, \u00272022-01-02\u0027);", + "sql": "SELECT MIN(alert_date) FROM fraud_alerts;", + "sql_explanation": "This SQL query selects the earliest alert date from \u0027fraud_alerts\u0027 table." +}, { + "id": "410", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new carbon offset project in the \u0027carbon_offset_projects\u0027 table, with the following details: country \u0027Brazil\u0027, name \u0027Amazon Rainforest Protection\u0027, offset_type \u0027Forestation\u0027, total_offset_quantity 1000000, offset_price 15.00, start_date \u00272022-01-01\u0027, end_date \u00272030-12-31\u0027", + "sql_context": "CREATE TABLE carbon_offset_projects (id INT, country VARCHAR(255), name VARCHAR(255), offset_type VARCHAR(255), total_offset_quantity INT, offset_price FLOAT, start_date DATE, end_date DATE);", + "sql": "INSERT INTO carbon_offset_projects (country, name, offset_type, total_offset_quantity, offset_price, start_date, end_date) VALUES (\u0027Brazil\u0027, \u0027Amazon Rainforest Protection\u0027, \u0027Forestation\u0027, 1000000, 15.00, \u00272022-01-01\u0027, \u00272030-12-31\u0027);", + "sql_explanation": "This query inserts a new record into the carbon_offset_projects table, with the specified values for each column." +}, { + "id": "1305", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many solar energy projects were implemented in Nigeria, Kenya, and Tanzania between 2017 and 2022?", + "sql_context": "CREATE TABLE solar_energy_projects (project_id INT, country VARCHAR(50), start_year INT, end_year INT); INSERT INTO solar_energy_projects (project_id, country, start_year, end_year) VALUES (1, \u0027Nigeria\u0027, 2018, 2022), (2, \u0027Kenya\u0027, 2019, 2021), (3, \u0027Tanzania\u0027, 2017, 2020), (4, \u0027Nigeria\u0027, 2020, 2023), (5, \u0027Kenya\u0027, 2018, 2022), (6, \u0027Tanzania\u0027, 2019, 2022), (7, \u0027Nigeria\u0027, 2017, 2019);", + "sql": "SELECT COUNT(*) FROM solar_energy_projects WHERE country IN (\u0027Nigeria\u0027, \u0027Kenya\u0027, \u0027Tanzania\u0027) AND start_year BETWEEN 2017 AND 2022 AND end_year BETWEEN 2017 AND 2022;", + "sql_explanation": "This SQL query calculates the number of solar energy projects implemented in Nigeria, Kenya, and Tanzania between 2017 and 2022 by counting the number of projects where the country is either Nigeria, Kenya, or Tanzania and the start and end years are between 2017 and 2022." +}, { + "id": "2198", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many renewable energy projects were completed in Texas between 2015 and 2018?", + "sql_context": "CREATE TABLE projects (id INT, state VARCHAR(50), project_type VARCHAR(50), completion_year INT); INSERT INTO projects (id, state, project_type, completion_year) VALUES (1, \u0027Texas\u0027, \u0027Solar\u0027, 2016), (2, \u0027Texas\u0027, \u0027Wind\u0027, 2017), (3, \u0027California\u0027, \u0027Geothermal\u0027, 2015), (4, \u0027Texas\u0027, \u0027Wind\u0027, 2018), (5, \u0027Texas\u0027, \u0027Solar\u0027, 2015);", + "sql": "SELECT COUNT(*) FROM projects WHERE state \u003d \u0027Texas\u0027 AND project_type IN (\u0027Solar\u0027, \u0027Wind\u0027) AND completion_year BETWEEN 2015 AND 2018;", + "sql_explanation": "We determine the number of renewable energy projects completed in Texas between 2015 and 2018 by using the COUNT function to count the number of rows in the projects table, filtering rows with the WHERE clause to only include projects in Texas, with project_type of either Solar or Wind, and with completion_year between 2015 and 2018." +}, { + "id": "2334", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new hydroelectric project \u0027Rocky Dam\u0027 with 350 MW capacity in Washington state", + "sql_context": "CREATE TABLE hydro_energy_projects (id INT PRIMARY KEY, project_name VARCHAR(255), state VARCHAR(2), capacity FLOAT, status VARCHAR(50));", + "sql": "INSERT INTO hydro_energy_projects (project_name, state, capacity, status) VALUES (\u0027Rocky Dam\u0027, \u0027Washington\u0027, 350.0, \u0027Planning\u0027);", + "sql_explanation": "The INSERT statement adds a new record to the hydro_energy_projects table with the specified values for project_name, state, capacity, and status." +}, { + "id": "2371", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new carbon offset program for \u0027Community Tree Planting\u0027 in \u0027Urban Area X\u0027", + "sql_context": "CREATE TABLE carbon_offset_programs (program_id INT, program_name TEXT, location TEXT);", + "sql": "INSERT INTO carbon_offset_programs (program_id, program_name, location) VALUES (4, \u0027Community Tree Planting\u0027, \u0027Urban Area X\u0027);", + "sql_explanation": "This query inserts a new record into the carbon_offset_programs table for a community tree planting program in Urban Area X with a program_id of 4." +}, { + "id": "2576", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all renewable energy projects and their locations", + "sql_context": "CREATE TABLE renewable_projects_3 (id INT, name VARCHAR(255), location VARCHAR(255), capacity FLOAT, technology VARCHAR(255));", + "sql": "SELECT name, location FROM renewable_projects_3 WHERE technology IN (\u0027Solar\u0027, \u0027Wind\u0027, \u0027Hydro\u0027, \u0027Geothermal\u0027, \u0027Biomass\u0027);", + "sql_explanation": "The SQL query selects the name and location columns for all records in the renewable_projects_3 table where the technology is either Solar, Wind, Hydro, Geothermal, or Biomass." +}, { + "id": "2625", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many renewable energy projects were initiated in New York between 2015 and 2020?", + "sql_context": "CREATE TABLE renewable_energy_projects (project_id INT, state VARCHAR(20), start_year INT, end_year INT, project_type VARCHAR(20));", + "sql": "SELECT COUNT(project_id) FROM renewable_energy_projects WHERE state \u003d \u0027New York\u0027 AND start_year BETWEEN 2015 AND 2020;", + "sql_explanation": "This query calculates the number of renewable energy projects initiated in New York between 2015 and 2020 by counting the project_id values in the renewable_energy_projects table where the state is New York and the start year is between 2015 and 2020." +}, { + "id": "2682", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total CO2 emission reduction (in metric tons) for carbon offset programs in the province of Ontario that have a target reduction of at least 50,000 metric tons?", + "sql_context": "CREATE TABLE on_co2_emission_reduction (id INT, program_id VARCHAR(255), province VARCHAR(255), target_reduction INT, actual_reduction INT);", + "sql": "SELECT SUM(actual_reduction) FROM on_co2_emission_reduction WHERE province \u003d \u0027Ontario\u0027 AND target_reduction \u003e\u003d 50000;", + "sql_explanation": "The SQL query calculates the total CO2 emission reduction (in metric tons) for carbon offset programs in Ontario that have a target reduction of at least 50,000 metric tons. It uses the SUM() function to find the total actual reduction in metric tons." +}, { + "id": "2800", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total CO2 emissions savings (in metric tons) of renewable projects in Texas and Oklahoma", + "sql_context": "CREATE TABLE project (id INT, name TEXT, state TEXT, type TEXT, co2_savings INT); INSERT INTO project (id, name, state, type, co2_savings) VALUES (16, \u0027Texas Wind\u0027, \u0027Texas\u0027, \u0027Wind\u0027, 765432), (17, \u0027Oklahoma Solar\u0027, \u0027Oklahoma\u0027, \u0027Solar\u0027, 345678), (18, \u0027Texas Solar\u0027, \u0027Texas\u0027, \u0027Solar\u0027, 987654), (19, \u0027Oklahoma Wind\u0027, \u0027Oklahoma\u0027, \u0027Wind\u0027, 234567);", + "sql": "SELECT SUM(co2_savings) FROM project WHERE (state \u003d \u0027Texas\u0027 OR state \u003d \u0027Oklahoma\u0027) AND type IN (\u0027Wind\u0027, \u0027Solar\u0027);", + "sql_explanation": "This query calculates the total CO2 emissions savings of renewable projects in Texas and Oklahoma by summing up the co2_savings values in the \u0027project\u0027 table where the state is either Texas or Oklahoma and type is either Wind or Solar." +}, { + "id": "2895", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average energy efficiency rating of residential buildings in Canada?", + "sql_context": "CREATE TABLE buildings (building_id INT, building_type TEXT, country TEXT, energy_efficiency_rating FLOAT); INSERT INTO buildings (building_id, building_type, country, energy_efficiency_rating) VALUES (1, \u0027Residential\u0027, \u0027Canada\u0027, 75.5), (2, \u0027Commercial\u0027, \u0027Canada\u0027, 65.3);", + "sql": "SELECT AVG(energy_efficiency_rating) FROM buildings WHERE building_type \u003d \u0027Residential\u0027 AND country \u003d \u0027Canada\u0027;", + "sql_explanation": "This SQL query calculates the average energy efficiency rating of residential buildings in Canada by averaging the energy_efficiency_rating values in the buildings table where the building_type is \u0027Residential\u0027 and the country is \u0027Canada\u0027." +}, { + "id": "3054", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the name of the Wind Farm in Germany with the highest capacity", + "sql_context": "CREATE TABLE wind_farms (id INT, name VARCHAR(100), country VARCHAR(50), capacity_mw FLOAT); INSERT INTO wind_farms (id, name, country, capacity_mw) VALUES (1, \u0027Windfarm 1\u0027, \u0027Germany\u0027, 120.5), (2, \u0027Windfarm 2\u0027, \u0027Germany\u0027, 250.3);", + "sql": "UPDATE wind_farms SET name \u003d \u0027Super Windfarm\u0027 WHERE country \u003d \u0027Germany\u0027 ORDER BY capacity_mw DESC LIMIT 1;", + "sql_explanation": "This SQL query updates the name of the Wind Farm in Germany with the highest capacity. It does this by updating the name column of the wind_farms table where the country is Germany, ordered by capacity_mw in descending order." +}, { + "id": "3184", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names and locations of the 5 least energy-efficient countries in the \u0027GreenEnergy\u0027 schema?", + "sql_context": "CREATE SCHEMA GreenEnergy; CREATE TABLE Countries (country_id INT, country_name VARCHAR(100), energy_efficiency_rating INT); INSERT INTO Countries (country_id, country_name, energy_efficiency_rating) VALUES (1, \u0027USA\u0027, 70), (2, \u0027Canada\u0027, 75), (3, \u0027Mexico\u0027, 65), (4, \u0027Brazil\u0027, 80), (5, \u0027Argentina\u0027, 85), (6, \u0027Chile\u0027, 90), (7, \u0027Peru\u0027, 95);", + "sql": "SELECT country_name, location FROM GreenEnergy.Countries ORDER BY energy_efficiency_rating ASC LIMIT 5;", + "sql_explanation": "This query retrieves the names and locations of the 5 least energy-efficient countries from the \u0027Countries\u0027 table in the \u0027GreenEnergy\u0027 schema by ordering them in ascending order by the \u0027energy_efficiency_rating\u0027 column and limiting the result set to 5 rows." +}, { + "id": "3343", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average energy efficiency rating for residential buildings in the City of Toronto?", + "sql_context": "CREATE TABLE Residential_Buildings (id INT, building_type VARCHAR(20), location VARCHAR(20), energy_efficiency_rating FLOAT); INSERT INTO Residential_Buildings (id, building_type, location, energy_efficiency_rating) VALUES (1, \u0027Apartment\u0027, \u0027City of Toronto\u0027, 82.5), (2, \u0027Townhouse\u0027, \u0027City of Toronto\u0027, 85.0), (3, \u0027Single Family Home\u0027, \u0027City of Toronto\u0027, 78.0);", + "sql": "SELECT AVG(energy_efficiency_rating) FROM Residential_Buildings WHERE location \u003d \u0027City of Toronto\u0027;", + "sql_explanation": "This SQL query calculates the average energy efficiency rating for residential buildings in the City of Toronto by using the AVG() function." +}, { + "id": "3388", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the maximum energy efficiency improvement in the US transportation sector?", + "sql_context": "CREATE TABLE energy_efficiency (id INT, sector VARCHAR(50), year INT, improvement FLOAT);", + "sql": "SELECT MAX(improvement) FROM energy_efficiency WHERE sector \u003d \u0027transportation\u0027 AND country \u003d \u0027US\u0027;", + "sql_explanation": "This query finds the maximum energy efficiency improvement in the US transportation sector by selecting the maximum value from the improvement column for records where the sector is \u0027transportation\u0027 and country is \u0027US\u0027." +}, { + "id": "3695", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average energy efficiency rating of renewable energy projects implemented in the last two years?", + "sql_context": "CREATE TABLE projects (project_id INT, name TEXT, rating FLOAT, implementation_date DATE); INSERT INTO projects (project_id, name, rating, implementation_date) VALUES (1, \u0027Solar Farm\u0027, 1.8, \u00272020-01-01\u0027), (2, \u0027Wind Turbine\u0027, 2.2, \u00272019-01-01\u0027), (3, \u0027Geothermal Plant\u0027, 2.0, \u00272021-01-01\u0027), (4, \u0027Hydro Plant\u0027, 1.9, \u00272018-01-01\u0027);", + "sql": "SELECT AVG(rating) FROM projects WHERE implementation_date \u003e\u003d DATEADD(year, -2, GETDATE());", + "sql_explanation": "Calculate the average energy efficiency rating for projects implemented in the last two years." +}, { + "id": "3808", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average energy rating for the \u0027residential\u0027 sector in the \u0027efficiency\u0027 schema?", + "sql_context": "CREATE SCHEMA efficiency; CREATE TABLE energy_efficiency (sector VARCHAR(255), energy_rating DECIMAL(3,2)); INSERT INTO energy_efficiency (sector, energy_rating) VALUES (\u0027commercial\u0027, 85.50), (\u0027residential\u0027, 72.30), (\u0027industrial\u0027, 68.90);", + "sql": "SELECT AVG(energy_rating) FROM efficiency.energy_efficiency WHERE sector \u003d \u0027residential\u0027;", + "sql_explanation": "This query calculates the average energy rating for the \u0027residential\u0027 sector in the \u0027efficiency\u0027 schema by averaging the \u0027energy_rating\u0027 column in the \u0027energy_efficiency\u0027 table where the \u0027sector\u0027 is \u0027residential\u0027." +}, { + "id": "3827", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total CO2 emission reduction from recycling programs in Japan and South Korea?", + "sql_context": "CREATE TABLE recycling_reduction (country VARCHAR(30), reduction FLOAT); INSERT INTO recycling_reduction (country, reduction) VALUES (\u0027Japan\u0027, 1.6), (\u0027Japan\u0027, 1.8), (\u0027South Korea\u0027, 0.9), (\u0027South Korea\u0027, 1.2);", + "sql": "SELECT SUM(reduction) FROM recycling_reduction WHERE country IN (\u0027Japan\u0027, \u0027South Korea\u0027);", + "sql_explanation": "This query calculates the total CO2 emission reduction from recycling programs in Japan and South Korea using the SUM function and filtering the recycling_reduction table for the desired countries." +}, { + "id": "3951", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the \u0027end_year\u0027 of all records in the \u0027clean_energy_policy_trends\u0027 table where the \u0027policy_type\u0027 is \u0027Incentive\u0027 to 2025", + "sql_context": "CREATE TABLE clean_energy_policy_trends (id INT, policy_type VARCHAR(255), start_year INT, end_year INT, description TEXT);", + "sql": "UPDATE clean_energy_policy_trends SET end_year \u003d 2025 WHERE policy_type \u003d \u0027Incentive\u0027;", + "sql_explanation": "* This SQL query updates the \u0027end_year\u0027 of all records in the \u0027clean_energy_policy_trends\u0027 table where the \u0027policy_type\u0027 is \u0027Incentive\u0027 to 2025. * It uses the UPDATE statement, specifying the table name and new \u0027end_year\u0027 value, and the condition in the WHERE clause." +}, { + "id": "3969", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total installed capacity of solar panels in South Africa?", + "sql_context": "CREATE TABLE south_africa_solar (id INT, name TEXT, country TEXT, installed_capacity INT); INSERT INTO south_africa_solar (id, name, country, installed_capacity) VALUES (1, \u0027Jasper\u0027, \u0027South Africa\u0027, 96000), (2, \u0027Kaxu\u0027, \u0027South Africa\u0027, 100000);", + "sql": "SELECT SUM(installed_capacity) FROM south_africa_solar WHERE country \u003d \u0027South Africa\u0027;", + "sql_explanation": "The SQL query calculates the total installed capacity of solar panels in South Africa by using the SUM function on the installed_capacity column, filtering the data for South Africa." +}, { + "id": "4021", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total installed capacity (in MW) of all hydro power projects that were completed after the year 2010?", + "sql_context": "CREATE TABLE projects (id INT, name TEXT, completed_year INT, capacity_mw FLOAT); INSERT INTO projects (id, name, completed_year, capacity_mw) VALUES (1, \u0027Hydro Project 1\u0027, 2012, 200.5); INSERT INTO projects (id, name, completed_year, capacity_mw) VALUES (2, \u0027Hydro Project 2\u0027, 2005, 150.3);", + "sql": "SELECT SUM(capacity_mw) FROM projects WHERE type \u003d \u0027hydro\u0027 AND completed_year \u003e 2010;", + "sql_explanation": "The SQL query calculates the total installed capacity of all hydro power projects that were completed after the year 2010 by summing up the capacity_mw values in the projects table where the type is \u0027hydro\u0027 and the completed_year is greater than 2010." +}, { + "id": "4039", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum energy efficiency (in %) of wind farms in \u0027Europe\u0027 that were built after \u00272015\u0027?", + "sql_context": "CREATE TABLE wind_farms (id INT, name VARCHAR(50), region VARCHAR(50), built_year INT, efficiency FLOAT); INSERT INTO wind_farms (id, name, region, built_year, efficiency) VALUES (1, \u0027WindFarm1\u0027, \u0027Europe\u0027, 2016, 0.45), (2, \u0027WindFarm2\u0027, \u0027Europe\u0027, 2017, 0.50);", + "sql": "SELECT MAX(efficiency) FROM wind_farms WHERE region \u003d \u0027Europe\u0027 AND built_year \u003e 2015;", + "sql_explanation": "This query calculates the maximum energy efficiency (in %) of wind farms in \u0027Europe\u0027 that were built after \u00272015\u0027 by filtering the records based on the region and built_year values and then finding the maximum of the efficiency values for the filtered records." +}, { + "id": "4339", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum capacity (in MW) of hydroelectric power plants in \u0027Brazil\u0027?", + "sql_context": "CREATE TABLE hydroelectric_power_plants (id INT, name TEXT, location TEXT, capacity FLOAT); INSERT INTO hydroelectric_power_plants (id, name, location, capacity) VALUES (1, \u0027Plant A\u0027, \u0027Brazil\u0027, 3000.5), (2, \u0027Plant B\u0027, \u0027Canada\u0027, 4000.2);", + "sql": "SELECT MAX(capacity) FROM hydroelectric_power_plants WHERE location \u003d \u0027Brazil\u0027;", + "sql_explanation": "This query calculates the maximum capacity of hydroelectric power plants in Brazil by filtering the \u0027location\u0027 column in the \u0027hydroelectric_power_plants\u0027 table and returning the maximum value in the \u0027capacity\u0027 column." +}, { + "id": "4394", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average energy efficiency rating of solar farms in India?", + "sql_context": "CREATE TABLE solar_farms (id INT, name TEXT, country TEXT, energy_efficiency_rating FLOAT); INSERT INTO solar_farms (id, name, country, energy_efficiency_rating) VALUES (1, \u0027Kamuthi\u0027, \u0027India\u0027, 0.18), (2, \u0027Bhadla\u0027, \u0027India\u0027, 0.19);", + "sql": "SELECT AVG(energy_efficiency_rating) FROM solar_farms WHERE country \u003d \u0027India\u0027;", + "sql_explanation": "The SQL query calculates the average energy efficiency rating of solar farms in India by using the AVG function on the energy_efficiency_rating column, filtering the data for India." +}, { + "id": "4494", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the average carbon offsets in kg for projects in Canada", + "sql_context": "CREATE TABLE carbon_offsets (project_id INT, name VARCHAR(50), location VARCHAR(50), carbon_offsets_kg FLOAT); INSERT INTO carbon_offsets (project_id, name, location, carbon_offsets_kg) VALUES (1, \u0027Canada Carbon Offset Project\u0027, \u0027Canada\u0027, 1000.0);", + "sql": "SELECT AVG(carbon_offsets_kg) FROM carbon_offsets WHERE location \u003d \u0027Canada\u0027;", + "sql_explanation": "This query shows the average carbon offsets in kg for projects in Canada by calculating the average of the carbon_offsets_kg column for projects in Canada." +}, { + "id": "4703", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total energy consumption in Brazil by sector (residential, commercial, industrial)?", + "sql_context": "CREATE TABLE energy_consumption (country VARCHAR(255), sector VARCHAR(255), consumption INT); INSERT INTO energy_consumption (country, sector, consumption) VALUES (\u0027Brazil\u0027, \u0027Residential\u0027, 5000), (\u0027Brazil\u0027, \u0027Commercial\u0027, 7000), (\u0027Brazil\u0027, \u0027Industrial\u0027, 10000);", + "sql": "SELECT SUM(consumption) FROM energy_consumption WHERE country \u003d \u0027Brazil\u0027;", + "sql_explanation": "This query calculates the total energy consumption in Brazil by sector by summing the consumption column in the energy_consumption table where the country is \u0027Brazil\u0027." +}, { + "id": "4821", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total installed capacity of renewable energy projects in Texas", + "sql_context": "CREATE TABLE renewable_projects (id INT, name TEXT, location TEXT, capacity INT);", + "sql": "SELECT SUM(capacity) FROM renewable_projects WHERE location \u003d \u0027Texas\u0027;", + "sql_explanation": "This query calculates the total installed capacity of renewable energy projects in Texas by summing the capacity column where the location is \u0027Texas\u0027." +}, { + "id": "4988", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average energy efficiency (in kWh/m2) of buildings in the city of London?", + "sql_context": "CREATE TABLE buildings (building_id INT, city VARCHAR(255), energy_efficiency FLOAT); INSERT INTO buildings (building_id, city, energy_efficiency) VALUES (1, \u0027Berlin\u0027, 150), (2, \u0027Berlin\u0027, 160), (3, \u0027Berlin\u0027, 140), (4, \u0027Berlin\u0027, 170), (5, \u0027Berlin\u0027, 130), (6, \u0027Paris\u0027, 180), (7, \u0027Paris\u0027, 190), (8, \u0027Paris\u0027, 175), (9, \u0027London\u0027, 190), (10, \u0027London\u0027, 180), (11, \u0027London\u0027, 170), (12, \u0027London\u0027, 160), (13, \u0027London\u0027, 200), (14, \u0027London\u0027, 210), (15, \u0027London\u0027, 150);", + "sql": "SELECT AVG(energy_efficiency) FROM buildings WHERE city \u003d \u0027London\u0027;", + "sql_explanation": "The SQL query calculates the average energy efficiency (in kWh/m2) of buildings in London by averaging the energy_efficiency values in the buildings table where the city column is equal to \u0027London\u0027." +}, { + "id": "5070", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average energy consumption per household in the Wind Turbine Projects?", + "sql_context": "CREATE TABLE Wind_Turbine_Projects (project_id INT, location VARCHAR(50), household_count INT, average_energy_consumption FLOAT); INSERT INTO Wind_Turbine_Projects (project_id, location, household_count, average_energy_consumption) VALUES (1, \u0027Alberta\u0027, 500, 2500.0), (2, \u0027Saskatchewan\u0027, 300, 2000.0);", + "sql": "SELECT AVG(average_energy_consumption) FROM Wind_Turbine_Projects;", + "sql_explanation": "Calculates the average energy consumption per household across all wind turbine projects by taking the average of the \u0027average_energy_consumption\u0027 column." +}, { + "id": "5118", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the installed capacity for each renewable energy project in the \u0027EcoPower\u0027 schema?", + "sql_context": "CREATE SCHEMA EcoPower; CREATE TABLE RenewableProjects (project_id INT, name VARCHAR(100), type VARCHAR(50), installed_capacity INT); INSERT INTO RenewableProjects (project_id, name, type, installed_capacity) VALUES (1, \u0027SolarFarm 1\u0027, \u0027Solar\u0027, 100000), (2, \u0027WindFarm 2\u0027, \u0027Wind\u0027, 80000), (3, \u0027HydroProject 1\u0027, \u0027Hydro\u0027, 120000), (4, \u0027Geothermal 1\u0027, \u0027Geothermal\u0027, 90000);", + "sql": "SELECT name, installed_capacity FROM EcoPower.RenewableProjects;", + "sql_explanation": "This query retrieves the names and installed capacities of all renewable energy projects in the \u0027RenewableProjects\u0027 table in the \u0027EcoPower\u0027 schema." +}, { + "id": "5526", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total CO2 emissions reduction (in metric tons) for the projects in the \u0027co2_emissions_reduction\u0027 table?", + "sql_context": "CREATE TABLE co2_emissions_reduction (id INT, project_name TEXT, location TEXT, reduction_mt FLOAT); INSERT INTO co2_emissions_reduction (id, project_name, location, reduction_mt) VALUES (1, \u0027Karita Wind Farm\u0027, \u0027Mexico\u0027, 24000.0);", + "sql": "SELECT SUM(reduction_mt) FROM co2_emissions_reduction;", + "sql_explanation": "The SQL query calculates the total CO2 emissions reduction (in metric tons) for all projects by using the SUM function with the reduction_mt column." +}, { + "id": "5533", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total capacity of wind farms in the \u0027renewable_energy\u0027 schema?", + "sql_context": "CREATE SCHEMA renewable_energy; CREATE TABLE wind_farms (id INT, name VARCHAR(100), capacity FLOAT); INSERT INTO wind_farms (id, name, capacity) VALUES (1, \u0027Wind Farm A\u0027, 50.0), (2, \u0027Wind Farm B\u0027, 75.0);", + "sql": "SELECT SUM(capacity) FROM renewable_energy.wind_farms;", + "sql_explanation": "This SQL query calculates the total capacity of wind farms in the \u0027renewable_energy\u0027 schema by summing up the \u0027capacity\u0027 values in the \u0027wind_farms\u0027 table." +}, { + "id": "5558", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all the projects in India with their types", + "sql_context": "CREATE TABLE project_types (project_id INT, name VARCHAR(50), location VARCHAR(50), project_type VARCHAR(50)); INSERT INTO project_types (project_id, name, location, project_type) VALUES (1, \u0027India Project 1\u0027, \u0027India\u0027, \u0027Solar\u0027);", + "sql": "SELECT * FROM project_types WHERE location \u003d \u0027India\u0027;", + "sql_explanation": "This query lists all projects in India by selecting all columns for projects in India." +}, { + "id": "5637", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the earliest completion year for any renewable energy project in the \u0027project_summary\u0027 table?", + "sql_context": "CREATE TABLE project_summary (project_id INT, project_type VARCHAR(50), completion_year INT); INSERT INTO project_summary (project_id, project_type, completion_year) VALUES (1, \u0027Wind\u0027, 2010), (2, \u0027Solar\u0027, 2012), (3, \u0027Hydro\u0027, 2005);", + "sql": "SELECT MIN(completion_year) FROM project_summary;", + "sql_explanation": "This SQL query calculates the earliest completion year for any renewable energy project in the \u0027project_summary\u0027 table by finding the minimum \u0027completion_year\u0027 value in the \u0027project_summary\u0027 table." +}, { + "id": "5706", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the names of all wind farms in the \u0027renewable_energy\u0027 schema.", + "sql_context": "CREATE SCHEMA renewable_energy; CREATE TABLE wind_farms (id INT, name VARCHAR(100), capacity FLOAT); INSERT INTO wind_farms (id, name, capacity) VALUES (1, \u0027Wind Farm S\u0027, 140.0), (2, \u0027Wind Farm T\u0027, 150.0);", + "sql": "SELECT name FROM renewable_energy.wind_farms;", + "sql_explanation": "This SQL query retrieves the \u0027name\u0027 column from the \u0027wind_farms\u0027 table in the \u0027renewable_energy\u0027 schema, providing information about the names of all wind farms in the dataset." +}, { + "id": "5760", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete renewable energy projects that have an efficiency rating below 1.5.", + "sql_context": "CREATE TABLE projects (project_id INT, name TEXT, rating FLOAT); INSERT INTO projects (project_id, name, rating) VALUES (1, \u0027Solar Farm\u0027, 1.8), (2, \u0027Wind Turbine\u0027, 1.1), (3, \u0027Geothermal Plant\u0027, 2.0), (4, \u0027Hydro Plant\u0027, 1.9);", + "sql": "DELETE FROM projects WHERE rating \u003c 1.5;", + "sql_explanation": "Delete renewable energy projects with an efficiency rating below 1.5." +}, { + "id": "5813", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all types of renewable energy projects in the database", + "sql_context": "CREATE TABLE projects (id INT, name TEXT, type TEXT);", + "sql": "SELECT DISTINCT type FROM projects;", + "sql_explanation": "This query lists all types of renewable energy projects in the database by selecting distinct type values from the projects table." +}, { + "id": "5817", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete the renewable energy project with id 1", + "sql_context": "CREATE TABLE projects (id INT, name TEXT, capacity INT, location TEXT); INSERT INTO projects (id, name, capacity, location) VALUES (1, \u0027Solar Farm 1.0\u0027, 1000, \u0027California\u0027); INSERT INTO projects (id, name, capacity, location) VALUES (2, \u0027Wind Farm 1.0\u0027, 2000, \u0027Texas\u0027); INSERT INTO projects (id, name, capacity, location) VALUES (3, \u0027Hydro Plant 1.0\u0027, 1500, \u0027Oregon\u0027);", + "sql": "DELETE FROM projects WHERE id \u003d 1;", + "sql_explanation": "The SQL query deletes the renewable energy project with id 1 by using the DELETE statement and specifying the filter with the WHERE clause." +}, { + "id": "898", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add a new network investment to the \u0027network_investments\u0027 table", + "sql_context": "CREATE TABLE network_investments (investment_id INT, investment_name VARCHAR(255), investment_type VARCHAR(255), investment_amount DECIMAL(10,2), date DATE);", + "sql": "INSERT INTO network_investments (investment_id, investment_name, investment_type, investment_amount, date) VALUES (4001, \u00275G Network Upgrade\u0027, \u0027Infrastructure\u0027, 5000000.00, \u00272023-01-01\u0027);", + "sql_explanation": "This SQL query inserts a new network investment into the \u0027network_investments\u0027 table with a unique investment_id, investment_name, investment_type, investment_amount, and date." +}, { + "id": "1192", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new compliance record into the compliance table", + "sql_context": "CREATE TABLE compliance (compliance_id INT, regulation VARCHAR(100), description VARCHAR(255), compliance_date DATE);", + "sql": "INSERT INTO compliance (compliance_id, regulation, description, compliance_date) VALUES (4001, \u0027Net Neutrality\u0027, \u0027Complied with net neutrality regulations\u0027, \u00272021-12-31\u0027);", + "sql_explanation": "This SQL query inserts a new record into the \u0027compliance\u0027 table for a compliance record on December 31, 2021, for complying with net neutrality regulations." +}, { + "id": "1244", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Find broadband subscribers with no data allowance and insert them into mobile_subscribers table", + "sql_context": "CREATE TABLE mobile_subscribers (id INT, name VARCHAR(255), data_allowance INT, contract_start DATE); INSERT INTO mobile_subscribers (id, name, data_allowance, contract_start) VALUES (1, \u0027John Doe\u0027, 5000, \u00272020-01-01\u0027); CREATE TABLE broadband_subscribers (id INT, name VARCHAR(255), speed INT, contract_start DATE); INSERT INTO broadband_subscribers (id, name, speed, contract_start) VALUES (1, \u0027Jane Doe\u0027, NULL, \u00272019-01-01\u0027);", + "sql": "INSERT INTO mobile_subscribers (id, name, data_allowance, contract_start) SELECT id, name, 2000, contract_start FROM broadband_subscribers WHERE data_allowance IS NULL;", + "sql_explanation": "Insert records with id, name, 2000 data allowance, and contract_start date from broadband_subscribers table into mobile_subscribers table where data_allowance is NULL." +}, { + "id": "1327", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a table for storing customer complaints", + "sql_context": "CREATE TABLE customer_complaints (id INT PRIMARY KEY, complaint TEXT, date DATE, resolved BOOLEAN);", + "sql": "CREATE TABLE customer_complaints AS SELECT * FROM (VALUES (1, \u0027Data billing issue\u0027, \u00272021-10-01\u0027, false), (2, \u0027Internet connectivity problem\u0027, \u00272021-10-02\u0027, true));", + "sql_explanation": "A new table named \u0027customer_complaints\u0027 is created with columns: id, complaint, date, and resolved. The provided SQL INSERTs two records as initial data." +}, { + "id": "1420", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total network investment in the telecom sector for the current year?", + "sql_context": "CREATE TABLE network_investments (investment_id INT, investment_amount DECIMAL(10,2), investment_date DATE, sector VARCHAR(255));", + "sql": "SELECT SUM(investment_amount) as total_investment FROM network_investments WHERE sector \u003d \u0027telecom\u0027 AND investment_date \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 1 YEAR);", + "sql_explanation": "This query calculates the total network investment for the telecom sector for the current year. It filters the records to only include those from the current year by checking the investment_date. Finally, it calculates the total investment amount." +}, { + "id": "1444", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue generated from roaming charges for the Latin America region in the last quarter?", + "sql_context": "CREATE TABLE calls(id INT, subscriber_id INT, type VARCHAR(50), duration INT, roaming_charge FLOAT, date DATE);", + "sql": "SELECT SUM(roaming_charge) FROM calls WHERE type \u003d \u0027roaming\u0027 AND date \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 3 MONTH) AND EXTRACT(REGION FROM date) \u003d \u0027America/Latin\u0027;", + "sql_explanation": "The SQL query calculates the total revenue generated from roaming charges for the Latin America region in the last quarter by using the SUM function on the roaming_charge column, filtering the rows for roaming type calls, and using the date functions to filter the results for the last quarter and the EXTRACT function to filter the results for the Latin America region." +}, { + "id": "1552", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which mobile subscribers have not made any calls in the last 6 months, and what is their last call date?", + "sql_context": "CREATE TABLE mobile_subscribers (subscriber_id INT, last_call_date DATE); INSERT INTO mobile_subscribers (subscriber_id, last_call_date) VALUES (1, \u00272021-06-15\u0027), (2, \u00272021-07-20\u0027), (3, \u00272021-05-01\u0027);", + "sql": "SELECT subscriber_id, last_call_date FROM mobile_subscribers WHERE last_call_date \u003c\u003d DATE_SUB(CURDATE(), INTERVAL 6 MONTH) AND last_call_date IS NOT NULL;", + "sql_explanation": "The SQL query selects all the mobile subscribers who have not made any calls in the last 6 months by checking if \u0027last_call_date\u0027 is less than or equal to the date 6 months ago from today\u0027s date. It returns \u0027subscriber_id\u0027 and \u0027last_call_date\u0027 columns." +}, { + "id": "2135", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which mobile subscribers have not used their data plan in the last 30 days?", + "sql_context": "CREATE TABLE mobile_subscribers (subscriber_id INT, last_data_usage DATETIME); INSERT INTO mobile_subscribers (subscriber_id, last_data_usage) VALUES (1, \u00272022-01-15\u0027), (2, \u00272022-02-03\u0027), (3, NULL), (4, \u00272022-01-20\u0027);", + "sql": "SELECT subscriber_id FROM mobile_subscribers WHERE last_data_usage IS NULL OR last_data_usage \u003c DATE_SUB(CURDATE(), INTERVAL 30 DAY);", + "sql_explanation": "The query selects the subscriber_id from the mobile_subscribers table where the last_data_usage is either null or older than 30 days." +}, { + "id": "2207", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the \u0027customers\u0027 table", + "sql_context": "CREATE TABLE customers (customer_id INT, first_name VARCHAR(255), last_name VARCHAR(255), email VARCHAR(255));", + "sql": "INSERT INTO customers (customer_id, first_name, last_name, email) VALUES (1001, \u0027Jamila\u0027, \u0027Bennett\u0027, \u0027jamila.bennett@example.com\u0027);", + "sql_explanation": "This SQL query inserts a new record into the \u0027customers\u0027 table with a unique customer_id, first name, last name, and email." +}, { + "id": "2571", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average network investment per day for the past year?", + "sql_context": "CREATE TABLE network_investments (investment_id INT, investment_amount DECIMAL(10,2), investment_date DATE); INSERT INTO network_investments (investment_id, investment_amount, investment_date) VALUES (1, 25000.00, \u00272021-12-25\u0027), (2, 30000.00, \u00272022-01-07\u0027), (3, 15000.00, \u00272022-02-01\u0027);", + "sql": "SELECT AVG(investment_amount) FROM network_investments WHERE investment_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR);", + "sql_explanation": "This query calculates the average network investment per day for the past year. It does so by using the AVG function to find the mean value of the investment_amount column, filtered for rows where the investment_date is within the past year." +}, { + "id": "2582", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add new network investments for the Northwest region.", + "sql_context": "CREATE TABLE network_investments (investment_id INT, area VARCHAR(20), budgeted_cost FLOAT, actual_cost FLOAT);", + "sql": "INSERT INTO network_investments (investment_id, area, budgeted_cost, actual_cost) VALUES (5, \u0027Northwest\u0027, 75000, 70000);", + "sql_explanation": "This query inserts a new record into the network_investments table, representing a network investment for the Northwest region with a budgeted cost of 75000 and an actual cost of 70000." +}, { + "id": "2678", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average network latency in the state of Texas for the last month?", + "sql_context": "CREATE TABLE network_latency (latency_id INT, state VARCHAR(50), latency FLOAT, measurement_date DATE); INSERT INTO network_latency (latency_id, state, latency, measurement_date) VALUES (1, \u0027Texas\u0027, 20, \u00272022-01-01\u0027); INSERT INTO network_latency (latency_id, state, latency, measurement_date) VALUES (2, \u0027Texas\u0027, 25, \u00272022-01-02\u0027);", + "sql": "SELECT AVG(latency) FROM network_latency WHERE state \u003d \u0027Texas\u0027 AND measurement_date \u003e\u003d DATEADD(month, -1, GETDATE());", + "sql_explanation": "This SQL query returns the average network latency in the state of Texas for the last month. It starts by selecting the average of the latency column from the network_latency table where the state is \u0027Texas\u0027 and the measurement_date is greater than or equal to the current date minus one month." +}, { + "id": "2989", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which broadband customers have been non-compliant with net neutrality regulations in the state of California?", + "sql_context": "CREATE TABLE broadband_customers (customer_id INT, net_neutrality_compliant BOOLEAN, state VARCHAR(20)); INSERT INTO broadband_customers (customer_id, net_neutrality_compliant, state) VALUES (1, FALSE, \u0027California\u0027), (2, TRUE, \u0027California\u0027);", + "sql": "SELECT customer_id FROM broadband_customers WHERE net_neutrality_compliant \u003d FALSE AND state \u003d \u0027California\u0027;", + "sql_explanation": "This SQL query selects the customer IDs of broadband customers in California who have not been compliant with net neutrality regulations by filtering for the state of California and net_neutrality_compliant set to FALSE." +}, { + "id": "3032", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue generated from the South region in Q2 2022?", + "sql_context": "CREATE TABLE regions (region_id INT, region_name VARCHAR(255)); INSERT INTO regions (region_id, region_name) VALUES (1, \u0027North\u0027), (2, \u0027South\u0027), (3, \u0027East\u0027), (4, \u0027West\u0027); CREATE TABLE sales (sale_id INT, region_id INT, sale_amount DECIMAL(10, 2), sale_date DATE); INSERT INTO sales (sale_id, region_id, sale_amount, sale_date) VALUES (1, 2, 5000, \u00272022-04-01\u0027), (2, 2, 6000, \u00272022-05-01\u0027), (3, 2, 7000, \u00272022-06-01\u0027);", + "sql": "SELECT SUM(sale_amount) FROM sales WHERE sale_date BETWEEN \u00272022-04-01\u0027 AND \u00272022-06-30\u0027 AND region_id \u003d 2;", + "sql_explanation": "Sum all sale_amounts where the sale_date is between the start and end of Q2 2022 and the region_id is 2 (South)." +}, { + "id": "3040", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all countries that have made network infrastructure investments in the last 6 months, excluding duplicates.", + "sql_context": "CREATE TABLE investments (id INT, country VARCHAR(20), investment_date DATE); INSERT INTO investments (id, country, investment_date) VALUES (1, \u0027Japan\u0027, \u00272022-01-01\u0027), (2, \u0027China\u0027, \u00272022-03-15\u0027), (3, \u0027Japan\u0027, \u00272022-04-05\u0027);", + "sql": "SELECT DISTINCT country FROM investments WHERE investment_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 6 MONTH);", + "sql_explanation": "This query selects the distinct country values from the investments table with an investment_date within the last 6 months." +}, { + "id": "3164", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which mobile subscribers have not updated their billing information in the last 6 months?", + "sql_context": "CREATE TABLE mobile_subscribers (subscriber_id INT, name VARCHAR(50), billing_updated_date DATE); INSERT INTO mobile_subscribers (subscriber_id, name, billing_updated_date) VALUES (1, \u0027John Doe\u0027, \u00272021-01-15\u0027); INSERT INTO mobile_subscribers (subscriber_id, name, billing_updated_date) VALUES (2, \u0027Jane Smith\u0027, \u00272021-07-22\u0027);", + "sql": "SELECT * FROM mobile_subscribers WHERE billing_updated_date \u003c\u003d DATE_SUB(CURRENT_DATE, INTERVAL 6 MONTH);", + "sql_explanation": "The SQL query selects all records from the mobile_subscribers table where the billing_updated_date is older than 6 months from the current date." +}, { + "id": "3247", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a new table \u0027customer_complaints\u0027 with columns \u0027customer_id\u0027, \u0027complaint_type\u0027, \u0027complaint_date\u0027", + "sql_context": "CREATE SCHEMA telecom; CREATE TABLE customer_complaints (customer_id INT, complaint_type TEXT, complaint_date DATE);", + "sql": "CREATE TABLE telecom.customer_complaints (customer_id INT, complaint_type TEXT, complaint_date DATE);", + "sql_explanation": "* A new table \u0027customer_complaints\u0027 is being created with three columns \u0027customer_id\u0027, \u0027complaint_type\u0027, and \u0027complaint_date\u0027 in the \u0027telecom\u0027 schema." +}, { + "id": "3395", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many mobile subscribers have made international calls in the Southeast region?", + "sql_context": "CREATE TABLE mobile_subscribers (subscriber_id INT, international_calls BOOLEAN, region VARCHAR(20)); INSERT INTO mobile_subscribers (subscriber_id, international_calls, region) VALUES (1, TRUE, \u0027Southeast\u0027), (2, FALSE, \u0027Northeast\u0027), (3, FALSE, \u0027Southeast\u0027);", + "sql": "SELECT COUNT(*) FROM mobile_subscribers WHERE international_calls \u003d TRUE AND region \u003d \u0027Southeast\u0027;", + "sql_explanation": "This query counts the number of records (COUNT(*)) where the international_calls field is TRUE and the region is \u0027Southeast\u0027." +}, { + "id": "3444", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average monthly mobile data usage for postpaid customers in the \u0027Urban\u0027 region?", + "sql_context": "CREATE TABLE mobile_subscribers (subscriber_id INT, data_usage FLOAT, plan_type VARCHAR(10), region VARCHAR(20)); INSERT INTO mobile_subscribers (subscriber_id, data_usage, plan_type, region) VALUES (1, 3.5, \u0027postpaid\u0027, \u0027Urban\u0027), (2, 6.2, \u0027postpaid\u0027, \u0027Rural\u0027);", + "sql": "SELECT AVG(data_usage) FROM mobile_subscribers WHERE plan_type \u003d \u0027postpaid\u0027 AND region \u003d \u0027Urban\u0027;", + "sql_explanation": "This query calculates the average data usage for postpaid customers in the Urban region by selecting all records with \u0027postpaid\u0027 and \u0027Urban\u0027 values and averaging the data_usage column." +}, { + "id": "3453", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the compliance status for a record in the regulatory_compliance table", + "sql_context": "CREATE TABLE regulatory_compliance (compliance_id INT, regulation_name VARCHAR(50), compliance_status VARCHAR(50), compliance_date DATE);", + "sql": "UPDATE regulatory_compliance SET compliance_status \u003d \u0027Non-Compliant\u0027 WHERE compliance_id \u003d 22222;", + "sql_explanation": "This query updates the compliance status for the record with the compliance ID 22222 in the \u0027regulatory_compliance\u0027 table." +}, { + "id": "3494", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 3 countries with the highest broadband internet download speed.", + "sql_context": "CREATE TABLE network_infrastructure (country VARCHAR(20), download_speed FLOAT); INSERT INTO network_infrastructure (country, download_speed) VALUES (\u0027USA\u0027, 100), (\u0027Canada\u0027, 120), (\u0027Mexico\u0027, 80);", + "sql": "SELECT country, download_speed FROM network_infrastructure ORDER BY download_speed DESC LIMIT 3;", + "sql_explanation": "Orders the download_speed column in descending order and selects the top 3 countries with the highest download speeds by using the LIMIT clause." +}, { + "id": "3583", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total data usage for the month?", + "sql_context": "CREATE TABLE data_usage (usage_id INT, data_usage INT, usage_date DATE);", + "sql": "SELECT SUM(data_usage) FROM data_usage WHERE usage_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-01-31\u0027;", + "sql_explanation": "This query calculates the sum (SUM) of the data_usage column for rows in the data_usage table with usage_date values between \u00272022-01-01\u0027 and \u00272022-01-31\u0027." +}, { + "id": "3674", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average monthly data usage for customers in the Americas?", + "sql_context": "CREATE TABLE customers (customer_id INT, name VARCHAR(100), region VARCHAR(50), monthly_data_usage DECIMAL(10,2)); INSERT INTO customers (customer_id, name, region, monthly_data_usage) VALUES (1, \u0027John Doe\u0027, \u0027Americas\u0027, 10), (2, \u0027Jane Smith\u0027, \u0027Europe\u0027, 15), (3, \u0027Alice Johnson\u0027, \u0027Asia-Pacific\u0027, 8), (4, \u0027Bob Brown\u0027, \u0027Americas\u0027, 12), (5, \u0027Charlie Davis\u0027, \u0027Europe\u0027, 18);", + "sql": "SELECT AVG(customers.monthly_data_usage) FROM customers WHERE customers.region \u003d \u0027Americas\u0027;", + "sql_explanation": "This query calculates the average monthly data usage for customers in the Americas by filtering the customers table to only include customers from the Americas region using the WHERE clause. Then, the AVG function is used to calculate the average monthly data usage." +}, { + "id": "3686", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many broadband subscribers have speeds greater than 500 Mbps in the state of California?", + "sql_context": "CREATE TABLE broadband_subscribers (subscriber_id INT, state VARCHAR(255), speed_mbps DECIMAL(5,1)); INSERT INTO broadband_subscribers (subscriber_id, state, speed_mbps) VALUES (1, \u0027California\u0027, 600.3), (2, \u0027California\u0027, 400.5), (3, \u0027Nevada\u0027, 550.7);", + "sql": "SELECT COUNT(*) FROM broadband_subscribers WHERE state \u003d \u0027California\u0027 AND speed_mbps \u003e 500;", + "sql_explanation": "The SQL query counts the number of rows for \u0027California\u0027 state and speed greater than 500 Mbps." +}, { + "id": "3692", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add new network infrastructure investments for the South region.", + "sql_context": "CREATE TABLE network_investments_2 (investment_id INT, amount FLOAT, region VARCHAR(20)); INSERT INTO network_investments_2 (investment_id, amount, region) VALUES (4, 75000.0, \u0027South\u0027), (5, 80000.0, \u0027South\u0027), (6, 70000.0, \u0027South\u0027);", + "sql": "INSERT INTO network_investments SELECT * FROM network_investments_2 WHERE region \u003d \u0027South\u0027;", + "sql_explanation": "The query inserts 3 new records into the \u0027network_investments\u0027 table for network infrastructure investments in the South region with different investment_ids and amounts." +}, { + "id": "3720", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many mobile customers have made international calls in the past month?", + "sql_context": "CREATE TABLE mobile_usage (customer_id INT, last_month_calls INT, international_call BOOLEAN); INSERT INTO mobile_usage (customer_id, last_month_calls, international_call) VALUES (1, 50, TRUE), (2, 30, FALSE), (3, 70, FALSE), (4, 40, TRUE), (5, 60, FALSE);", + "sql": "SELECT COUNT(*) FROM mobile_usage WHERE last_month_calls \u003e 0 AND international_call \u003d TRUE;", + "sql_explanation": "This query selects the number of mobile customers who made international calls in the past month by filtering the mobile_usage table to only include customers with a last_month_calls value greater than 0 and an international_call value of TRUE." +}, { + "id": "3731", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records of mobile subscribers who have not made international calls in the Southeast region.", + "sql_context": "CREATE TABLE mobile_subscribers (subscriber_id INT, international_calls BOOLEAN, region VARCHAR(20)); INSERT INTO mobile_subscribers (subscriber_id, international_calls, region) VALUES (1, TRUE, \u0027Southeast\u0027), (2, FALSE, \u0027Northeast\u0027), (3, FALSE, \u0027Southeast\u0027), (4, TRUE, \u0027Northern\u0027), (5, TRUE, \u0027Eastern\u0027);", + "sql": "DELETE FROM mobile_subscribers WHERE international_calls \u003d FALSE AND region \u003d \u0027Southeast\u0027;", + "sql_explanation": "This query deletes the records where the international_calls field is FALSE and the region is \u0027Southeast\u0027 from the mobile_subscribers table." +}, { + "id": "3848", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many mobile subscribers are there in Illinois with a data plan that is not unlimited?", + "sql_context": "CREATE TABLE mobile_subscribers (id INT, name VARCHAR(50), data_plan VARCHAR(20), state VARCHAR(50)); INSERT INTO mobile_subscribers (id, name, data_plan, state) VALUES (11, \u0027Gina Adams\u0027, \u0027Limited\u0027, \u0027IL\u0027); INSERT INTO mobile_subscribers (id, name, data_plan, state) VALUES (12, \u0027Henry Brown\u0027, \u0027Limited\u0027, \u0027IL\u0027);", + "sql": "SELECT COUNT(*) FROM mobile_subscribers WHERE data_plan !\u003d \u0027Unlimited\u0027 AND state \u003d \u0027IL\u0027;", + "sql_explanation": "This SQL query counts the number of mobile subscribers in Illinois with a data plan that is not unlimited by counting the records with \u0027data_plan\u0027 not equal to \u0027Unlimited\u0027 and \u0027state\u0027 equal to \u0027IL\u0027." +}, { + "id": "3884", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new mobile plan named \"Family\" with a monthly cost of 50.00", + "sql_context": "CREATE TABLE mobile_plans (plan_id INT, plan_name VARCHAR(255), monthly_cost DECIMAL(10,2));", + "sql": "INSERT INTO mobile_plans (plan_id, plan_name, monthly_cost) VALUES (3, \u0027Family\u0027, 50.00);", + "sql_explanation": "This query inserts a new record into the mobile_plans table for a plan named \"Family\" with a monthly cost of 50.00." +}, { + "id": "4616", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which network towers were installed before 2015?", + "sql_context": "CREATE TABLE network_towers (tower_id INT, installation_date DATE); INSERT INTO network_towers (tower_id, installation_date) VALUES (1, \u00272014-05-12\u0027), (2, \u00272016-08-24\u0027), (3, \u00272012-09-01\u0027);", + "sql": "SELECT tower_id FROM network_towers WHERE installation_date \u003c \u00272015-01-01\u0027", + "sql_explanation": "This query selects the tower_id from the network_towers table, filtering by installation_date earlier than 2015 to find the towers installed before that year." +}, { + "id": "4690", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average broadband download speed for customers in the city of Tokyo?", + "sql_context": "CREATE TABLE broadband_customers (customer_id INT, city VARCHAR(20), download_speed FLOAT); INSERT INTO broadband_customers (customer_id, city, download_speed) VALUES (1, \u0027Tokyo\u0027, 300), (2, \u0027Tokyo\u0027, 400), (3, \u0027Osaka\u0027, 500);", + "sql": "SELECT AVG(download_speed) FROM broadband_customers WHERE city \u003d \u0027Tokyo\u0027;", + "sql_explanation": "This query calculates the average broadband download speed for customers in the city of Tokyo. It does this by selecting the average download_speed from the broadband_customers table where the city is \u0027Tokyo\u0027." +}, { + "id": "4699", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many broadband customers does the NYC regional office have?", + "sql_context": "CREATE TABLE broadband_subscribers (subscriber_id INT, regional_office VARCHAR(20)); INSERT INTO broadband_subscribers (subscriber_id, regional_office) VALUES (1, \u0027Boston\u0027), (2, \u0027Boston\u0027), (3, \u0027NYC\u0027), (4, \u0027NYC\u0027), (5, \u0027NYC\u0027);", + "sql": "SELECT COUNT(*) FROM broadband_subscribers WHERE regional_office \u003d \u0027NYC\u0027;", + "sql_explanation": "This query counts the number of rows in the broadband_subscribers table where the regional_office column is equal to \u0027NYC\u0027." +}, { + "id": "4707", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which network devices were installed in 2021?", + "sql_context": "CREATE TABLE network_devices (device_id INT, device_name VARCHAR(50), install_date DATE); INSERT INTO network_devices VALUES (1, \u0027RouterA\u0027, \u00272021-01-01\u0027); INSERT INTO network_devices VALUES (2, \u0027SwitchB\u0027, \u00272021-06-15\u0027); INSERT INTO network_devices VALUES (3, \u0027FirewallC\u0027, \u00272022-03-05\u0027);", + "sql": "SELECT device_name FROM network_devices WHERE YEAR(install_date) \u003d 2021;", + "sql_explanation": "This SQL query retrieves the names of the network devices that were installed in 2021 by filtering the network_devices table based on the year of the install_date column." +}, { + "id": "4837", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum data usage by a mobile customer in the state of New York?", + "sql_context": "CREATE TABLE mobile_customers (customer_id INT, data_usage FLOAT, state VARCHAR(20)); INSERT INTO mobile_customers (customer_id, data_usage, state) VALUES (1, 3.5, \u0027New York\u0027), (2, 6.2, \u0027New York\u0027);", + "sql": "SELECT MAX(data_usage) FROM mobile_customers WHERE state \u003d \u0027New York\u0027;", + "sql_explanation": "This SQL query calculates the maximum data usage by a mobile customer in the state of New York by selecting the MAX function on the data_usage column and filtering for the state of New York." +}, { + "id": "4898", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum and maximum data allowance for broadband plans?", + "sql_context": "CREATE TABLE broadband_plans (plan_name TEXT, data_allowance INT);", + "sql": "SELECT MIN(data_allowance), MAX(data_allowance) FROM broadband_plans;", + "sql_explanation": "This query finds the minimum and maximum data allowance values from the broadband_plans table." +}, { + "id": "4944", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many broadband subscribers are there in the state of New York?", + "sql_context": "CREATE TABLE broadband_subscribers (subscriber_id INT, state VARCHAR(20)); INSERT INTO broadband_subscribers (subscriber_id, state) VALUES (1, \u0027New York\u0027), (2, \u0027Texas\u0027), (3, \u0027New York\u0027);", + "sql": "SELECT COUNT(*) FROM broadband_subscribers WHERE state \u003d \u0027New York\u0027;", + "sql_explanation": "This SQL query counts the number of broadband subscribers in the state of New York. It does this by using the COUNT function with a WHERE clause to filter the rows based on the conditions specified." +}, { + "id": "5293", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average monthly data usage for customers in the \u0027Rural\u0027 region?", + "sql_context": "CREATE TABLE customers (id INT, name VARCHAR(50), data_usage FLOAT, region VARCHAR(20)); INSERT INTO customers (id, name, data_usage, region) VALUES (1, \u0027John Doe\u0027, 15.0, \u0027Rural\u0027);", + "sql": "SELECT AVG(data_usage) FROM customers WHERE region \u003d \u0027Rural\u0027;", + "sql_explanation": "This SQL query calculates the average data usage for customers in the \u0027Rural\u0027 region by selecting all data_usage values where the region is \u0027Rural\u0027 and then finding the average of those values." +}, { + "id": "5529", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all mobile subscribers with unpaid balances in city Z", + "sql_context": "CREATE TABLE mobile_subscribers (subscriber_id INT, name TEXT, city TEXT, balance_usd FLOAT);", + "sql": "SELECT name, city, balance_usd FROM mobile_subscribers", + "sql_explanation": "List all columns (name, city, balance_usd) from the mobile_subscribers table where the balance_usd is not zero." +}, { + "id": "5736", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum data usage by a single subscriber?", + "sql_context": "CREATE TABLE subscribers (id INT, name TEXT, data_usage FLOAT); INSERT INTO subscribers (id, name, data_usage) VALUES (1, \u0027John Doe\u0027, 15.0); INSERT INTO subscribers (id, name, data_usage) VALUES (2, \u0027Jane Smith\u0027, 20.0); INSERT INTO subscribers (id, name, data_usage) VALUES (3, \u0027Bob Johnson\u0027, 25.0);", + "sql": " SELECT MAX(data_usage) FROM subscribers; ", + "sql_explanation": "This query finds the maximum data usage by a single subscriber. It does this by using the MAX function on the data_usage column." +}, { + "id": "5779", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Drop the \u0027compliance_reports\u0027 table", + "sql_context": "CREATE SCHEMA telecom; CREATE TABLE compliance_reports (quarter INT, filed BOOLEAN);", + "sql": "DROP TABLE telecom.compliance_reports;", + "sql_explanation": "* The \u0027compliance_reports\u0027 table in the \u0027telecom\u0027 schema is dropped." +}, { + "id": "639", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update contract details for \u0027North America\u0027 defense projects with timelines \u003e 2025", + "sql_context": "CREATE TABLE defense_projects (proj_id INT, proj_name VARCHAR(50), region VARCHAR(50), start_date DATE, end_date DATE); CREATE TABLE defense_contracts (cont_id INT, cont_name VARCHAR(50), proj_id INT, cont_status VARCHAR(50), cont_end_date DATE);", + "sql": "UPDATE defense_contracts dc SET cont_status \u003d \u0027Active\u0027 FROM defense_projects dp WHERE dc.proj_id \u003d dp.proj_id AND dp.region \u003d \u0027North America\u0027 AND dp.start_date \u003c \u00272026-01-01\u0027 AND dp.end_date \u003e \u00272026-12-31\u0027;", + "sql_explanation": "Step 1: Update the contract_status to \u0027Active\u0027 for defense contracts associated with North America defense projects that have a timeline beyond 2025." +}, { + "id": "908", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all contract negotiations involving both Acme Corp and Global Defence Corp.", + "sql_context": "CREATE TABLE contract_negotiations (negotiation_id INT, vendor_1 VARCHAR(255), vendor_2 VARCHAR(255), negotiation_date DATE); INSERT INTO contract_negotiations (negotiation_id, vendor_1, vendor_2, negotiation_date) VALUES (1, \u0027Acme Corp\u0027, \u0027Global Defence Corp\u0027, \u00272021-01-01\u0027);", + "sql": "SELECT vendor_1, vendor_2 FROM contract_negotiations WHERE (vendor_1 \u003d \u0027Acme Corp\u0027 AND vendor_2 \u003d \u0027Global Defence Corp\u0027) OR (vendor_1 \u003d \u0027Global Defence Corp\u0027 AND vendor_2 \u003d \u0027Acme Corp\u0027);", + "sql_explanation": "This query lists all contract negotiations involving both Acme Corp and Global Defence Corp by selecting the vendor_1 and vendor_2 columns in the contract_negotiations table, where either (vendor_1 is Acme Corp and vendor_2 is Global Defence Corp) or (vendor_1 is Global Defence Corp and vendor_2 is Acme Corp)." +}, { + "id": "925", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of defense projects in the Americas and their average duration?", + "sql_context": "CREATE TABLE defense_projects(id INT, project_name VARCHAR(50), start_date DATE, end_date DATE, status VARCHAR(20), region VARCHAR(20));", + "sql": "SELECT \u0027Americas\u0027 AS continent, AVG(DATEDIFF(end_date, start_date)) AS avg_duration, COUNT(*) AS total_projects FROM defense_projects WHERE region IN (\u0027North America\u0027, \u0027South America\u0027);", + "sql_explanation": "This SQL query retrieves the total number of defense projects in the Americas and their average duration by selecting the continent as \u0027Americas\u0027, the average duration (calculated using the DATEDIFF function), and the count of projects from the defense_projects table where the region is either \u0027North America\u0027 or \u0027South America\u0027, grouped by continent." +}, { + "id": "1047", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total value of military equipment sales to the Australian government by TopDefense from 2015 to 2020?", + "sql_context": "CREATE TABLE TopDefense.EquipmentSales (id INT, manufacturer VARCHAR(255), equipment_type VARCHAR(255), quantity INT, price DECIMAL(10,2), buyer_country VARCHAR(255), sale_date DATE);", + "sql": "SELECT SUM(quantity * price) FROM TopDefense.EquipmentSales WHERE buyer_country \u003d \u0027Australia\u0027 AND manufacturer \u003d \u0027TopDefense\u0027 AND sale_date BETWEEN \u00272015-01-01\u0027 AND \u00272020-12-31\u0027;", + "sql_explanation": "This query calculates the total value of military equipment sales to the Australian government by TopDefense from 2015 to 2020 by summing the product of quantity and price where the buyer_country is Australia, the manufacturer is TopDefense, and the sale_date is between 2015-01-01 and 2020-12-31." +}, { + "id": "1338", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average quantity of military equipment sold by Boeing to Asian countries in Q3 2018, excluding sales to China?", + "sql_context": "CREATE TABLE Military_Equipment_Sales(equipment_id INT, manufacturer VARCHAR(255), purchaser VARCHAR(255), sale_date DATE, quantity INT);INSERT INTO Military_Equipment_Sales(equipment_id, manufacturer, purchaser, sale_date, quantity) VALUES (1, \u0027Boeing\u0027, \u0027Japan\u0027, \u00272018-07-15\u0027, 5), (2, \u0027Boeing\u0027, \u0027China\u0027, \u00272018-08-01\u0027, 10), (3, \u0027Boeing\u0027, \u0027South Korea\u0027, \u00272018-09-10\u0027, 7);", + "sql": "SELECT AVG(quantity) FROM Military_Equipment_Sales WHERE manufacturer \u003d \u0027Boeing\u0027 AND purchaser NOT IN (\u0027China\u0027) AND sale_date BETWEEN \u00272018-07-01\u0027 AND \u00272018-09-30\u0027;", + "sql_explanation": "This SQL query calculates the average quantity of military equipment sold by Boeing to Asian countries in Q3 2018, excluding sales to China. It does this by using the AVG function to find the average quantity value for rows that meet the specified conditions." +}, { + "id": "1733", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the total value of contracts negotiated by \u0027Northrop Grumman\u0027 in Q1 2021 from the \u0027contracts\u0027 table", + "sql_context": "CREATE TABLE contracts (id INT, contractor VARCHAR(255), negotiation_date DATE, contract_value DECIMAL(10,2)); INSERT INTO contracts (id, contractor, negotiation_date, contract_value) VALUES (1, \u0027Northrop Grumman\u0027, \u00272021-01-10\u0027, 5000000); INSERT INTO contracts (id, contractor, negotiation_date, contract_value) VALUES (2, \u0027Boeing\u0027, \u00272021-03-15\u0027, 7000000);", + "sql": "SELECT SUM(contract_value) FROM contracts WHERE contractor \u003d \u0027Northrop Grumman\u0027 AND QUARTER(negotiation_date) \u003d 1 AND YEAR(negotiation_date) \u003d 2021;", + "sql_explanation": "This query calculates the total value of contracts negotiated by \u0027Northrop Grumman\u0027 in Q1 2021 by summing the \u0027contract_value\u0027 column where the \u0027contractor\u0027 is \u0027Northrop Grumman\u0027, and the \u0027negotiation_date\u0027 is in Q1 2021." +}, { + "id": "1793", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the average duration of defense projects initiated between 2015 and 2018, excluding projects that lasted more than 5 years?", + "sql_context": "CREATE TABLE Defense_Projects(id INT, project_name VARCHAR(255), start_year INT, end_year INT); INSERT INTO Defense_Projects(id, project_name, start_year, end_year) VALUES (1, \u0027Project A\u0027, 2015, 2018), (2, \u0027Project B\u0027, 2016, 2019), (3, \u0027Project C\u0027, 2017, 2020), (4, \u0027Project D\u0027, 2018, 2021), (5, \u0027Project E\u0027, 2015, 2020), (6, \u0027Project F\u0027, 2016, 2017);", + "sql": "SELECT AVG(end_year - start_year) as Average_Duration FROM Defense_Projects WHERE start_year BETWEEN 2015 AND 2018 AND end_year - start_year \u003c\u003d 5;", + "sql_explanation": "This query calculates the average duration of defense projects initiated between 2015 and 2018, excluding projects that lasted more than 5 years. It uses the AVG function to calculate the average duration, and filters the data using the WHERE clause." +}, { + "id": "1869", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of defense projects per year in the North America region for the past 5 years?", + "sql_context": "CREATE TABLE projects (id INT, region VARCHAR(255), year INT); INSERT INTO projects (id, region, year) VALUES (1, \u0027North America\u0027, 2018), (2, \u0027Europe\u0027, 2021), (3, \u0027North America\u0027, 2019), (4, \u0027North America\u0027, 2020), (5, \u0027North America\u0027, 2021), (6, \u0027Europe\u0027, 2022);", + "sql": "SELECT AVG(year) as avg_year FROM projects WHERE region \u003d \u0027North America\u0027 AND year BETWEEN (YEAR(CURRENT_DATE()) - 5) AND YEAR(CURRENT_DATE());", + "sql_explanation": "This query calculates the average number of defense projects per year in the North America region for the past 5 years by filtering the results based on the specified conditions and then calculating the average of the year column. It returns the average year for defense projects in the specified region for the past 5 years. Note: This query assumes that the current date is being used to determine the past 5 years. The syntax for calculating the current year may vary depending on the database management system being used." +}, { + "id": "2022", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum sale price of military equipment sold by \u0027Epsilon Corp\u0027 in \u0027North America\u0027 in the year 2020?", + "sql_context": "CREATE TABLE MilitaryEquipmentSales (seller VARCHAR(255), buyer VARCHAR(255), equipment_model VARCHAR(255), sale_price DECIMAL(10,2), sale_date DATE, region VARCHAR(255));", + "sql": "SELECT MAX(sale_price) FROM MilitaryEquipmentSales WHERE seller \u003d \u0027Epsilon Corp\u0027 AND region \u003d \u0027North America\u0027 AND YEAR(sale_date) \u003d 2020;", + "sql_explanation": "This query calculates the maximum sale price of military equipment sold by \u0027Epsilon Corp\u0027 in \u0027North America\u0027 in the year 2020 by selecting the maximum \u0027sale_price\u0027 where the \u0027seller\u0027, \u0027region\u0027, and year of \u0027sale_date\u0027 match the specified criteria." +}, { + "id": "2049", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the defense projects with timelines exceeding 2 years in Canada.", + "sql_context": "CREATE TABLE DefenseProjects (project_id INT, country VARCHAR(50), start_date DATE, end_date DATE); INSERT INTO DefenseProjects (project_id, country, start_date, end_date) VALUES (1, \u0027Canada\u0027, \u00272018-01-01\u0027, \u00272023-12-31\u0027); INSERT INTO DefenseProjects (project_id, country, start_date, end_date) VALUES (2, \u0027Canada\u0027, \u00272020-01-01\u0027, \u00272021-12-31\u0027);", + "sql": "SELECT project_id, country, start_date, end_date FROM DefenseProjects WHERE DATEDIFF(end_date, start_date) \u003e 730 AND country \u003d \u0027Canada\u0027;", + "sql_explanation": "The SQL query selects projects with timelines exceeding 2 years by finding the difference between end_date and start_date, and filtering projects where the difference is greater than 730 days (2 years) in Canada." +}, { + "id": "2062", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many ground vehicles has Raytheon sold to the Middle East?", + "sql_context": "CREATE TABLE Raytheon_Sales (id INT, corporation VARCHAR(20), customer VARCHAR(20), quantity INT, equipment VARCHAR(20)); INSERT INTO Raytheon_Sales (id, corporation, customer, quantity, equipment) VALUES (1, \u0027Raytheon\u0027, \u0027Middle East\u0027, 20, \u0027Ground Vehicles\u0027);", + "sql": "SELECT SUM(quantity) FROM Raytheon_Sales WHERE corporation \u003d \u0027Raytheon\u0027 AND customer \u003d \u0027Middle East\u0027 AND equipment \u003d \u0027Ground Vehicles\u0027;", + "sql_explanation": "This query calculates the total number of ground vehicles sold by Raytheon to the Middle East by summing the \u0027quantity\u0027 column where the \u0027corporation\u0027 is \u0027Raytheon\u0027, the \u0027customer\u0027 is \u0027Middle East\u0027, and the \u0027equipment\u0027 is \u0027Ground Vehicles\u0027." +}, { + "id": "2362", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many military equipment sales were made to South America in Q4 2021?", + "sql_context": "CREATE TABLE MilitaryEquipmentSales (id INT, region VARCHAR(50), amount FLOAT, sale_date DATE); INSERT INTO MilitaryEquipmentSales (id, region, amount, sale_date) VALUES (1, \u0027South America\u0027, 8000000, \u00272021-11-15\u0027); INSERT INTO MilitaryEquipmentSales (id, region, amount, sale_date) VALUES (2, \u0027South America\u0027, 6000000, \u00272021-10-01\u0027); INSERT INTO MilitaryEquipmentSales (id, region, amount, sale_date) VALUES (3, \u0027South America\u0027, 9000000, \u00272021-12-20\u0027);", + "sql": "SELECT COUNT(*) FROM MilitaryEquipmentSales WHERE region \u003d \u0027South America\u0027 AND sale_date BETWEEN \u00272021-10-01\u0027 AND \u00272021-12-31\u0027;", + "sql_explanation": "The SQL query counts military equipment sales made to South America in Q4 2021 by selecting the count of records where \u0027region\u0027 is \u0027South America\u0027 and \u0027sale_date\u0027 is within Q4 2021." +}, { + "id": "2550", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the average cost of military equipment sold by Northrop Grumman to Africa in 2019?", + "sql_context": "CREATE TABLE military_equipment_sales (company VARCHAR(255), region VARCHAR(255), year INT, cost INT); INSERT INTO military_equipment_sales (company, region, year, cost) VALUES (\u0027Northrop Grumman\u0027, \u0027Africa\u0027, 2019, 5000000);", + "sql": "SELECT AVG(cost) FROM military_equipment_sales WHERE company \u003d \u0027Northrop Grumman\u0027 AND region \u003d \u0027Africa\u0027 AND year \u003d 2019;", + "sql_explanation": "The SQL query calculates the average cost of military equipment sold by Northrop Grumman to Africa in 2019 by using the AVG() function." +}, { + "id": "2595", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record for a defense project \"Project X\" with a planned start date of 2023-01-01 and a status of \"Planning\".", + "sql_context": "CREATE TABLE defense_projects (id INT PRIMARY KEY, project_name VARCHAR(255), status VARCHAR(255), planned_start_date DATE);", + "sql": "INSERT INTO defense_projects (project_name, status, planned_start_date) VALUES (\u0027Project X\u0027, \u0027Planning\u0027, \u00272023-01-01\u0027);", + "sql_explanation": "The query inserts a new record into the defense_projects table for \"Project X\" with a planned start date of 2023-01-01 and a status of \"Planning\"." +}, { + "id": "2922", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List defense projects with timelines that started before 2020-01-01 and are still ongoing?", + "sql_context": "CREATE TABLE Defense_Projects(id INT, project_name VARCHAR(50), start_date DATE, end_date DATE); INSERT INTO Defense_Projects(id, project_name, start_date, end_date) VALUES (1, \u0027Project A\u0027, \u00272019-01-01\u0027, \u00272022-12-31\u0027);", + "sql": "SELECT * FROM Defense_Projects WHERE start_date \u003c \u00272020-01-01\u0027 AND (end_date IS NULL OR end_date \u003e CURDATE());", + "sql_explanation": "This query lists defense projects with timelines that started before 2020-01-01 and are still ongoing by selecting all columns from Defense_Projects table where start_date is before 2020-01-01 and end_date is either NULL or after the current date." +}, { + "id": "2991", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all geopolitical risk assessments for the Indo-Pacific region in H1 2023.", + "sql_context": "CREATE TABLE geopolitical_risk (id INT, region VARCHAR(20), half INT, year INT, assessment TEXT); INSERT INTO geopolitical_risk (id, region, half, year, assessment) VALUES (1, \u0027Indo-Pacific\u0027, 1, 2023, \u0027Stable\u0027);", + "sql": "SELECT region, assessment FROM geopolitical_risk WHERE region \u003d \u0027Indo-Pacific\u0027 AND half \u003d 1 AND year \u003d 2023;", + "sql_explanation": "The SQL query lists all geopolitical risk assessments for the Indo-Pacific region in H1 2023 by selecting the relevant columns from the geopolitical_risk table and filtering rows based on region, half, and year." +}, { + "id": "3075", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which defense projects started in 2019 and have a budget over 10 million?", + "sql_context": "CREATE TABLE Defense_Projects (project_id INT, start_year INT, budget FLOAT);", + "sql": "SELECT project_id, start_year, budget FROM Defense_Projects WHERE start_year \u003d 2019 AND budget \u003e 10000000;", + "sql_explanation": "This query retrieves defense projects that started in 2019 and have a budget over 10 million by selecting the \u0027project_id\u0027, \u0027start_year\u0027, and \u0027budget\u0027 columns where the \u0027start_year\u0027 is 2019 and the \u0027budget\u0027 is greater than 10 million." +}, { + "id": "3139", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average duration of military projects that General Dynamics has been involved in?", + "sql_context": "CREATE TABLE military_projects (contractor VARCHAR(255), project VARCHAR(255), start_date DATE, end_date DATE); INSERT INTO military_projects (contractor, project, start_date, end_date) VALUES (\u0027General Dynamics\u0027, \u0027Project F\u0027, \u00272015-01-01\u0027, \u00272017-12-31\u0027), (\u0027General Dynamics\u0027, \u0027Project G\u0027, \u00272016-07-01\u0027, \u00272018-06-30\u0027), (\u0027General Dynamics\u0027, \u0027Project H\u0027, \u00272017-03-01\u0027, \u00272019-02-28\u0027), (\u0027General Dynamics\u0027, \u0027Project I\u0027, \u00272018-09-01\u0027, \u00272020-08-31\u0027), (\u0027General Dynamics\u0027, \u0027Project J\u0027, \u00272019-11-01\u0027, \u00272022-10-31\u0027);", + "sql": "SELECT AVG(DATEDIFF(end_date, start_date)) FROM military_projects WHERE contractor \u003d \u0027General Dynamics\u0027;", + "sql_explanation": "This SQL query calculates the average duration of military projects that General Dynamics has been involved in. It averages the number of days between the start date and the end date for all projects where the contractor is General Dynamics." +}, { + "id": "3188", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total value of military equipment sales to South East Asia in Q1 2021?", + "sql_context": "CREATE TABLE military_sales (id INT, quarter INT, region VARCHAR(50), year INT, value FLOAT); INSERT INTO military_sales (id, quarter, region, year, value) VALUES (1, 1, \u0027South East Asia\u0027, 2021, 2000000);", + "sql": "SELECT SUM(value) FROM military_sales WHERE quarter \u003d 1 AND region \u003d \u0027South East Asia\u0027 AND year \u003d 2021;", + "sql_explanation": "This query calculates the total value of military equipment sales to South East Asia in Q1 2021 by summing the \u0027value\u0027 column for the corresponding quarter, region and year." +}, { + "id": "3512", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the defense project timelines for projects in the Caribbean?", + "sql_context": "CREATE TABLE DefenseProjectTimelines (projectID INT, projectName VARCHAR(255), startDate DATE, endDate DATE, region VARCHAR(255)); INSERT INTO DefenseProjectTimelines (projectID, projectName, startDate, endDate, region) VALUES (1, \u0027Operation Tradewinds\u0027, \u00272022-06-01\u0027, \u00272022-06-15\u0027, \u0027Caribbean\u0027);", + "sql": "SELECT projectName, startDate, endDate FROM DefenseProjectTimelines WHERE region \u003d \u0027Caribbean\u0027;", + "sql_explanation": "This SQL query retrieves the defense project timelines for projects in the Caribbean by selecting the project name, start date, and end date from the DefenseProjectTimelines table where the region is the Caribbean. It uses the basic SELECT statement to fetch specific columns from the table." +}, { + "id": "3519", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the earliest start date of defense project negotiations with Saudi Arabia?", + "sql_context": "CREATE TABLE DefenseProjectTimelines (id INT PRIMARY KEY, project_name VARCHAR(50), negotiation_start_date DATE, negotiation_end_date DATE, country VARCHAR(50)); INSERT INTO DefenseProjectTimelines (id, project_name, negotiation_start_date, negotiation_end_date, country) VALUES (1, \u0027S-400 Missile Defense System\u0027, \u00272016-01-01\u0027, \u00272018-01-01\u0027, \u0027Saudi Arabia\u0027), (2, \u0027AK-12 Assault Rifle\u0027, \u00272017-01-01\u0027, \u00272019-01-01\u0027, \u0027Saudi Arabia\u0027);", + "sql": "SELECT MIN(negotiation_start_date) FROM DefenseProjectTimelines WHERE country \u003d \u0027Saudi Arabia\u0027;", + "sql_explanation": "The SQL query calculates the earliest start date of defense project negotiations with Saudi Arabia by taking the minimum negotiation_start_date where country is \u0027Saudi Arabia\u0027." +}, { + "id": "3546", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "Show all contract negotiations with defense contractor \u0027XYZ Inc.\u0027 that were not successful.", + "sql_context": "CREATE TABLE contract_negotiations(id INT, country VARCHAR(50), contractor VARCHAR(50), negotiation_date DATE, status VARCHAR(50)); INSERT INTO contract_negotiations(id, country, contractor, negotiation_date, status) VALUES (1, \u0027US\u0027, \u0027XYZ Inc.\u0027, \u00272022-01-01\u0027, \u0027Unsuccessful\u0027); INSERT INTO contract_negotiations(id, country, contractor, negotiation_date, status) VALUES (2, \u0027Canada\u0027, \u0027ABC Corp\u0027, \u00272022-02-01\u0027, \u0027Successful\u0027);", + "sql": "SELECT * FROM contract_negotiations WHERE contractor \u003d \u0027XYZ Inc.\u0027 AND status \u003d \u0027Unsuccessful\u0027;", + "sql_explanation": "The SQL query shows all contract negotiations with defense contractor \u0027XYZ Inc.\u0027 that were not successful by selecting all columns (\u0027*\u0027) from \u0027contract_negotiations\u0027 table where \u0027contractor\u0027 is \u0027XYZ Inc.\u0027 and \u0027status\u0027 is \u0027Unsuccessful\u0027." +}, { + "id": "3571", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total value of military equipment sales in the Middle East?", + "sql_context": "CREATE TABLE MilitaryEquipmentSales (saleID INT, equipmentName VARCHAR(255), quantity INT, pricePerUnit DECIMAL(10,2), country VARCHAR(255)); INSERT INTO MilitaryEquipmentSales (saleID, equipmentName, quantity, pricePerUnit, country) VALUES (1, \u0027M1 Abrams Tank\u0027, 100, 7000000, \u0027Saudi Arabia\u0027);", + "sql": "SELECT SUM(quantity * pricePerUnit) FROM MilitaryEquipmentSales WHERE country \u003d \u0027Middle East\u0027;", + "sql_explanation": "This SQL query calculates the total value of military equipment sales in the Middle East by summing the product of the quantity and price per unit from the MilitaryEquipmentSales table. It uses the SUM function to calculate the total value and filters the results based on the country column." +}, { + "id": "3611", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total value of military equipment sales to the Canadian government in 2020?", + "sql_context": "CREATE TABLE military_sales (id INT, year INT, customer VARCHAR(20), equipment_type VARCHAR(20), value FLOAT); INSERT INTO military_sales (id, year, customer, equipment_type, value) VALUES (1, 2020, \u0027Canadian Government\u0027, \u0027Aircraft\u0027, 5000000); INSERT INTO military_sales (id, year, customer, equipment_type, value) VALUES (2, 2020, \u0027Canadian Government\u0027, \u0027Naval Vessels\u0027, 8000000);", + "sql": "SELECT SUM(value) FROM military_sales WHERE year \u003d 2020 AND customer \u003d \u0027Canadian Government\u0027;", + "sql_explanation": "This query calculates the total value of military equipment sales to the Canadian government in 2020 by summing the value column where the year is 2020 and the customer is the Canadian Government." +}, { + "id": "3636", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many defense projects were delayed in 2021?", + "sql_context": "CREATE SCHEMA if not exists defense_projects;CREATE TABLE if not exists defense_project_timelines(project_name text, start_year integer, end_year integer, status text);INSERT INTO defense_project_timelines(project_name, start_year, end_year, status) VALUES(\u0027F-35\u0027, 2007, 2017, \u0027Completed\u0027), (\u0027Joint Light Tactical Vehicle\u0027, 2012, 2021, \u0027Delayed\u0027), (\u0027Global Hawk\u0027, 2001, 2011, \u0027Completed\u0027);", + "sql": "SELECT COUNT(*) FROM defense_project_timelines WHERE status \u003d \u0027Delayed\u0027 AND end_year \u003d 2021;", + "sql_explanation": "The SQL query counts the number of defense projects delayed in 2021 by selecting rows with status \u0027Delayed\u0027 and end_year 2021 and counting the number of rows." +}, { + "id": "3929", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a table named \u0027project_milestones\u0027 with columns \u0027project_id\u0027, \u0027milestone\u0027, \u0027due_date\u0027", + "sql_context": "CREATE TABLE project_milestones (project_id INT, milestone VARCHAR(50), due_date DATE);", + "sql": "CREATE TABLE project_milestones (project_id INT, milestone VARCHAR(50), due_date DATE);", + "sql_explanation": "Create a new table named \u0027project_milestones\u0027 with three columns: \u0027project_id\u0027, \u0027milestone\u0027, and \u0027due_date\u0027. The \u0027project_id\u0027 column stores integer data, the \u0027milestone\u0027 column stores varchar data with a max length of 50 characters, and the \u0027due_date\u0027 column stores date data." +}, { + "id": "4024", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which defense projects have experienced delays of over 6 months since their original timeline?", + "sql_context": "CREATE TABLE defense_projects (project_name VARCHAR(255), start_date DATE, end_date DATE); INSERT INTO defense_projects (project_name, start_date, end_date) VALUES (\u0027Joint Light Tactical Vehicle\u0027, \u00272016-01-01\u0027, \u00272020-12-31\u0027), (\u0027Ground Combat Vehicle\u0027, \u00272015-01-01\u0027, \u00272024-12-31\u0027);", + "sql": "SELECT project_name FROM defense_projects WHERE DATEDIFF(end_date, start_date) \u003e 180;", + "sql_explanation": "The SQL query identifies projects with delays of over 6 months by calculating the difference between end_date and start_date, then filtering for a difference greater than 180 days." +}, { + "id": "4032", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the defense projects that have experienced delays of over 6 months?", + "sql_context": "CREATE TABLE defense_projects(id INT, project_name VARCHAR(50), start_date DATE, end_date DATE);", + "sql": "SELECT project_name FROM defense_projects WHERE DATEDIFF(end_date, start_date) \u003e 180;", + "sql_explanation": "This query identifies defense projects with delays of over 6 months by finding the difference between the end_date and start_date for each project and selecting the project_name where the difference is greater than 180 days (6 months)." +}, { + "id": "4080", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete geopolitical risk assessments for \u0027Morocco\u0027 before \u00272022\u0027?", + "sql_context": "CREATE TABLE Geopolitical_Risk_Assessments (country VARCHAR(255), year INT, risk_level INT); INSERT INTO Geopolitical_Risk_Assessments (country, year, risk_level) VALUES (\u0027Morocco\u0027, 2022, 4), (\u0027Morocco\u0027, 2021, 3);", + "sql": "DELETE FROM Geopolitical_Risk_Assessments WHERE country \u003d \u0027Morocco\u0027 AND year \u003c 2022;", + "sql_explanation": "This query deletes geopolitical risk assessments for Morocco before 2022 by deleting rows that meet the specified conditions in the WHERE clause." +}, { + "id": "4438", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of units sold for any military equipment model in Africa?", + "sql_context": "CREATE TABLE EquipmentSales (id INT, equipment_model VARCHAR(255), region VARCHAR(255), quantity INT); INSERT INTO EquipmentSales (id, equipment_model, region, quantity) VALUES (1, \u0027M1 Abrams\u0027, \u0027Africa\u0027, 60), (2, \u0027F-35\u0027, \u0027Africa\u0027, 150), (3, \u0027M2 Bradley\u0027, \u0027Africa\u0027, 45);", + "sql": "SELECT MAX(quantity) as max_sold FROM EquipmentSales WHERE region \u003d \u0027Africa\u0027;", + "sql_explanation": "The SQL query calculates the maximum number of units sold for any military equipment model in Africa. It uses the MAX function to find the maximum quantity sold, and the WHERE clause filters the results to only include sales in Africa." +}, { + "id": "4572", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records related to \u0027Helicopters\u0027 sold in 2022 from the Navy_Equipment table.", + "sql_context": "CREATE TABLE Navy_Equipment (equipment VARCHAR(50), year INT, quantity INT);", + "sql": "DELETE FROM Navy_Equipment WHERE equipment \u003d \u0027Helicopters\u0027 AND year \u003d 2022;", + "sql_explanation": "The SQL query deletes all records related to \u0027Helicopters\u0027 sold in 2022 from the Navy_Equipment table." +}, { + "id": "4642", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the total value of military equipment sales in NATO countries", + "sql_context": "CREATE TABLE nato_military_sales (id INT, country VARCHAR(255), sale_value FLOAT); INSERT INTO nato_military_sales (id, country, sale_value) VALUES (1, \u0027Country A\u0027, 2000000); INSERT INTO nato_military_sales (id, country, sale_value) VALUES (2, \u0027Country B\u0027, 3000000);", + "sql": "SELECT SUM(sale_value) FROM nato_military_sales WHERE country IN (\u0027NATO\u0027);", + "sql_explanation": "This SQL query calculates the total value of military equipment sales in NATO countries by summing up the sale_value column where the country is in NATO." +}, { + "id": "4648", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the geopolitical risk scores for African nations in 2022?", + "sql_context": "CREATE TABLE risks(id INT, country VARCHAR(50), region VARCHAR(20), score NUMERIC, year INT);", + "sql": "SELECT country, score FROM risks WHERE region \u003d \u0027Africa\u0027 AND year \u003d 2022;", + "sql_explanation": "The query selects country and risk scores from the risks table where the region is Africa and the year is 2022." +}, { + "id": "4730", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all military equipment sales records for \u0027Middle East\u0027 in the year \u00272024\u0027", + "sql_context": "CREATE TABLE military_sales (id INT PRIMARY KEY, region VARCHAR(20), year INT, equipment_name VARCHAR(30), quantity INT, value FLOAT); INSERT INTO military_sales (id, region, year, equipment_name, quantity, value) VALUES (1, \u0027Middle East\u0027, 2024, \u0027Fighter Jet\u0027, 12, 5000000), (2, \u0027Middle East\u0027, 2024, \u0027Tank\u0027, 25, 12000000), (3, \u0027Middle East\u0027, 2024, \u0027Helicopter\u0027, 10, 8000000);", + "sql": "DELETE FROM military_sales WHERE region \u003d \u0027Middle East\u0027 AND year \u003d 2024;", + "sql_explanation": "*This query deletes all records from the military_sales table where the region is \u0027Middle East\u0027 and the year is \u00272024\u0027." +}, { + "id": "4733", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum value of military equipment sales to India in any year?", + "sql_context": "CREATE TABLE Military_Equipment_Sales (sale_id INT, year INT, country VARCHAR(50), value FLOAT); INSERT INTO Military_Equipment_Sales (sale_id, year, country, value) VALUES (1, 2015, \u0027India\u0027, 3000000), (2, 2016, \u0027India\u0027, 4000000), (3, 2017, \u0027India\u0027, 5000000);", + "sql": "SELECT MAX(value) FROM Military_Equipment_Sales WHERE country \u003d \u0027India\u0027;", + "sql_explanation": "This query finds the maximum value of military equipment sales to India in any year by selecting the maximum value from the \u0027value\u0027 column where the \u0027country\u0027 is India." +}, { + "id": "4880", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which defense projects have a start date on or after January 1, 2023?", + "sql_context": "CREATE TABLE DefenseProjects (id INT PRIMARY KEY, project VARCHAR(50), start_date DATE); INSERT INTO DefenseProjects (id, project, start_date) VALUES (1, \u0027Project B\u0027, \u00272023-01-01\u0027);", + "sql": "SELECT project FROM DefenseProjects WHERE start_date \u003e\u003d \u00272023-01-01\u0027;", + "sql_explanation": "The SQL query identifies defense projects with a start date on or after January 1, 2023 by selecting the \u0027project\u0027 column where the \u0027start_date\u0027 is on or after \u00272023-01-01\u0027." +}, { + "id": "5162", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all defense projects and their respective start and end dates.", + "sql_context": "CREATE TABLE defense_projects (project_id INT, project_name VARCHAR(255), start_date DATE, end_date DATE); INSERT INTO defense_projects (project_id, project_name, start_date, end_date) VALUES (1, \u0027Joint Strike Fighter\u0027, \u00272001-01-01\u0027, \u00272025-01-01\u0027), (2, \u0027THAAD Missile Defense\u0027, \u00272005-01-01\u0027, \u00272027-01-01\u0027);", + "sql": "SELECT project_name, start_date, end_date FROM defense_projects;", + "sql_explanation": "This query retrieves all defense projects and their respective start and end dates from the defense_projects table." +}, { + "id": "5249", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the contract negotiators for defense projects in Europe?", + "sql_context": "CREATE TABLE contract_negotiators (id INT, name VARCHAR(255), region VARCHAR(255)); INSERT INTO contract_negotiators (id, name, region) VALUES (1, \u0027Sophia Nguyen\u0027, \u0027Europe\u0027), (2, \u0027James Lee\u0027, \u0027Asia\u0027), (3, \u0027Marieke Janssen\u0027, \u0027Americas\u0027), (4, \u0027Ali Al-Said\u0027, \u0027Middle East\u0027), (5, \u0027SneÅžana Petrović\u0027, \u0027Europe\u0027), (6, \u0027Pedro GonzÃĄlez\u0027, \u0027Americas\u0027);", + "sql": "SELECT name FROM contract_negotiators WHERE region \u003d \u0027Europe\u0027;", + "sql_explanation": "This SQL query retrieves the contract negotiators for defense projects in Europe by selecting the name column where the region is equal to \u0027Europe\u0027." +}, { + "id": "5511", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all defense projects that have not started yet.", + "sql_context": "CREATE TABLE DefenseProjects (project_id INT, project_name VARCHAR(255), start_date DATE, end_date DATE); INSERT INTO DefenseProjects (project_id, project_name, start_date, end_date) VALUES (1, \u0027Project A\u0027, \u00272022-02-01\u0027, \u00272023-01-31\u0027), (2, \u0027Project B\u0027, \u00272021-06-15\u0027, \u00272022-05-31\u0027), (3, \u0027Project C\u0027, \u00272024-04-01\u0027, \u00272025-03-31\u0027);", + "sql": "SELECT * FROM DefenseProjects WHERE start_date \u003e NOW();", + "sql_explanation": "The query selects all columns (project_id, project_name, start_date, end_date) from DefenseProjects where start_date is in the future." +}, { + "id": "5563", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update military equipment sale records\u0027 total cost based on a 10% increase", + "sql_context": "CREATE TABLE sales_data (id INT, equipment_name TEXT, sale_date DATE, quantity INT, total_cost FLOAT);", + "sql": "UPDATE sales_data SET total_cost \u003d total_cost * 1.10;", + "sql_explanation": "The SQL query updates the total_cost column in the sales_data table by multiplying its current value by 1.10, representing a 10% increase." +}, { + "id": "5683", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum value of military equipment sold in a single transaction?", + "sql_context": "CREATE TABLE MilitaryEquipmentSales (id INT PRIMARY KEY, year INT, country VARCHAR(50), equipment VARCHAR(50), value FLOAT); INSERT INTO MilitaryEquipmentSales (id, year, country, equipment, value) VALUES (1, 2022, \u0027USA\u0027, \u0027Ships\u0027, 50000000);", + "sql": "SELECT MAX(value) FROM MilitaryEquipmentSales;", + "sql_explanation": "The SQL query identifies the maximum value of military equipment sold in a single transaction by selecting the maximum value in the \u0027value\u0027 column." +}, { + "id": "5781", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the lowest value military equipment sale?", + "sql_context": "CREATE TABLE military_sales (id INT, country VARCHAR, value FLOAT); INSERT INTO military_sales (id, country, value) VALUES (1, \u0027Canada\u0027, 5000000), (2, \u0027Mexico\u0027, 3000000), (3, \u0027Canada\u0027, 7000000);", + "sql": "SELECT MIN(value) FROM military_sales;", + "sql_explanation": "This query calculates the minimum value from the \u0027value\u0027 column in the \u0027military_sales\u0027 table, providing the lowest value military equipment sale." +}, { + "id": "1588", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many students with hearing impairments have not utilized sign language interpreters in the past year?", + "sql_context": "CREATE TABLE students (id INT, name TEXT, disability TEXT, sign_language_interpreter BOOLEAN); INSERT INTO students (id, name, disability, sign_language_interpreter) VALUES (1, \u0027John Doe\u0027, \u0027hearing impairment\u0027, true); INSERT INTO students (id, name, disability, sign_language_interpreter) VALUES (2, \u0027Jane Smith\u0027, \u0027learning disability\u0027, false);", + "sql": "SELECT COUNT(*) FROM students WHERE disability \u003d \u0027hearing impairment\u0027 AND sign_language_interpreter \u003d false AND date \u003e\u003d DATE_SUB(NOW(), INTERVAL 1 YEAR);", + "sql_explanation": "This query counts the number of students with a disability of \u0027hearing impairment\u0027 who have not utilized sign language interpreters (sign_language_interpreter \u003d false) in the past year." +}, { + "id": "2375", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which support programs were offered in a specific state in the past 6 months?", + "sql_context": "CREATE TABLE SupportPrograms (ProgramID INT, ProgramName VARCHAR(50), State VARCHAR(50)); INSERT INTO SupportPrograms (ProgramID, ProgramName, State) VALUES (1, \u0027Tutoring\u0027, \u0027New York\u0027); INSERT INTO SupportPrograms (ProgramID, ProgramName, State) VALUES (2, \u0027Mentoring\u0027, \u0027California\u0027);", + "sql": "SELECT ProgramName FROM SupportPrograms WHERE State \u003d \u0027New York\u0027 AND Date BETWEEN DATEADD(month, -6, GETDATE()) AND GETDATE();", + "sql_explanation": "This query filters the SupportPrograms table to only include records where the state is \"New York\" and the date is within the past 6 months. It then selects the ProgramName column from the filtered results." +}, { + "id": "2381", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the count of unique accommodations for \u0027MobilityAssistanceDog\u0027 in the \u0027ServiceAnimalAccommodations\u0027 table?", + "sql_context": "CREATE TABLE ServiceAnimalAccommodations (service_animal_id INT, accommodation_type VARCHAR(255)); INSERT INTO ServiceAnimalAccommodations (service_animal_id, accommodation_type) VALUES (1001, \u0027VisualAssistanceDog\u0027), (1002, \u0027HearingDog\u0027), (1003, \u0027MobilityAssistanceDog\u0027), (1004, \u0027MobilityAssistanceDog\u0027), (1005, \u0027VisualAssistanceDog\u0027);", + "sql": "SELECT COUNT(DISTINCT accommodation_type) FROM ServiceAnimalAccommodations WHERE accommodation_type \u003d \u0027MobilityAssistanceDog\u0027;", + "sql_explanation": "The SQL query selects distinct \u0027accommodation_type\u0027 values from the \u0027ServiceAnimalAccommodations\u0027 table where the accommodation type is \u0027MobilityAssistanceDog\u0027, returning the count of unique accommodations for this type." +}, { + "id": "2601", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the total budget allocated for disability services in the BudgetAllocations table for the fiscal year 2022.", + "sql_context": "CREATE TABLE BudgetAllocations (allocationID INT, allocationType VARCHAR(50), allocationAmount FLOAT, fiscalYear INT);", + "sql": "SELECT SUM(allocationAmount) FROM BudgetAllocations WHERE fiscalYear \u003d 2022 AND allocationType \u003d \u0027Disability Services\u0027;", + "sql_explanation": "This SQL query shows the total budget allocated for disability services in the \u0027BudgetAllocations\u0027 table for the fiscal year 2022 by using the SUM function on the \u0027allocationAmount\u0027 column, filtering the results by \u0027fiscalYear\u0027 and \u0027allocationType\u0027 columns." +}, { + "id": "2954", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total budget allocated to \u0027Accessibility Services\u0027 in the first three quarters of the fiscal year.", + "sql_context": "CREATE TABLE BudgetAllocations (ID INT, Category TEXT, Quarter INT, Amount FLOAT); INSERT INTO BudgetAllocations (ID, Category, Quarter, Amount) VALUES (1, \u0027Accessibility Services\u0027, 1, 10000.00), (2, \u0027Policy Advocacy\u0027, 2, 15000.00), (3, \u0027Accessibility Services\u0027, 3, 8000.00);", + "sql": "SELECT SUM(Amount) FROM BudgetAllocations WHERE Category \u003d \u0027Accessibility Services\u0027 AND Quarter IN (1, 2, 3);", + "sql_explanation": "The SQL query calculates the total budget allocated to \u0027Accessibility Services\u0027 in the first three quarters of the fiscal year (quarters 1, 2 and 3) using the SUM function and the IN operator." +}, { + "id": "2986", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum cost of accommodations provided to students with mobility impairments in the past year?", + "sql_context": "CREATE TABLE accommodations (id INT, student_id INT, type TEXT, cost INT, date DATE); INSERT INTO accommodations (id, student_id, type, cost, date) VALUES (1, 1, \u0027wheelchair\u0027, 500, \u00272022-01-01\u0027); INSERT INTO accommodations (id, student_id, type, cost, date) VALUES (2, 2, \u0027note taker\u0027, 500, \u00272022-02-01\u0027);", + "sql": "SELECT MIN(cost) FROM accommodations WHERE type \u003d \u0027wheelchair\u0027 AND date \u003e\u003d DATE_SUB(NOW(), INTERVAL 1 YEAR);", + "sql_explanation": "This query calculates the minimum cost of accommodations provided to students with mobility impairments in the past year. It does this by filtering on the type of accommodation (wheelchair) and the date (past year) and then finding the minimum cost." +}, { + "id": "3192", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum budget for accommodations for students with physical disabilities in the Southwest?", + "sql_context": "CREATE TABLE Accommodations (ID INT, Type VARCHAR(50), Cost FLOAT, Disability VARCHAR(50), Region VARCHAR(50)); INSERT INTO Accommodations (ID, Type, Cost, Disability, Region) VALUES (1, \u0027Wheelchair Accessibility\u0027, 2000.0, \u0027Physical Disability\u0027, \u0027Southwest\u0027), (2, \u0027Adaptive Equipment\u0027, 2500.0, \u0027Physical Disability\u0027, \u0027Southwest\u0027), (3, \u0027Sign Language Interpretation\u0027, 1500.0, \u0027Physical Disability\u0027, \u0027Southwest\u0027);", + "sql": "SELECT MAX(Cost) FROM Accommodations WHERE Disability \u003d \u0027Physical Disability\u0027 AND Region \u003d \u0027Southwest\u0027;", + "sql_explanation": "This SQL query calculates the maximum budget for accommodations for students with physical disabilities in the Southwest by selecting the maximum of the Cost column from the Accommodations table where the Disability is \u0027Physical Disability\u0027 and the Region is \u0027Southwest\u0027. The results are then returned as the maximum budget for accommodations for students with physical disabilities in the Southwest." +}, { + "id": "3363", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total cost of accommodations for students with hearing impairments in April?", + "sql_context": "CREATE TABLE Accommodations (student_id INT, accommodation_type VARCHAR(255), cost FLOAT, month INT);", + "sql": "SELECT SUM(cost) FROM Accommodations WHERE accommodation_type \u003d \u0027Hearing Impairment\u0027 AND month \u003d 4;", + "sql_explanation": "The SQL query calculates the total cost of accommodations for students with hearing impairments in April by summing the cost values in the Accommodations table where the accommodation_type is \u0027Hearing Impairment\u0027 and the month is 4." +}, { + "id": "3474", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "What is the policy advocacy history for programs with a focus on mental health in Washington and Oregon?", + "sql_context": "CREATE TABLE Programs (program_id INT, state VARCHAR(50), focus_area VARCHAR(50), policy_advocacy_history TEXT);", + "sql": "SELECT * FROM Programs WHERE state IN (\u0027Washington\u0027, \u0027Oregon\u0027) AND focus_area \u003d \u0027mental health\u0027;", + "sql_explanation": "This query creates a table named \u0027Programs\u0027 with 4 columns: program_id, state, focus_area, and policy_advocacy_history. Then it inserts a sample record for a program with a focus on mental health in Washington in the \u0027Programs\u0027 table. The query then retrieves the policy advocacy history for programs with a focus on mental health in Washington and Oregon by filtering the data based on the given conditions." +}, { + "id": "3601", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many disability accommodations were made in \u0027Springfield\u0027 last year?", + "sql_context": "CREATE TABLE DisabilityAccommodations (location VARCHAR(25), year INT, count INT); INSERT INTO DisabilityAccommodations (location, year, count) VALUES (\u0027New York\u0027, 2020, 150), (\u0027Springfield\u0027, 2020, 200), (\u0027Los Angeles\u0027, 2020, 175), (\u0027Chicago\u0027, 2020, 160), (\u0027Springfield\u0027, 2019, 180);", + "sql": "SELECT COUNT(*) FROM DisabilityAccommodations WHERE location \u003d \u0027Springfield\u0027 AND year \u003d 2020;", + "sql_explanation": "This SQL query counts the number of disability accommodations made in \u0027Springfield\u0027 last year by selecting the records for \u0027Springfield\u0027 in the year 2020 and counting the number of records." +}, { + "id": "3846", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average cost of accommodations per student who utilizes assistive technology?", + "sql_context": "CREATE TABLE accommodations (accommodation_cost DECIMAL(5,2), student_id INT, utilizes_assistive_tech BOOLEAN); INSERT INTO accommodations (accommodation_cost, student_id, utilizes_assistive_tech) VALUES (100.00, 1, TRUE), (200.00, 2, FALSE);", + "sql": "SELECT AVG(accommodation_cost) FROM accommodations WHERE utilizes_assistive_tech \u003d TRUE;", + "sql_explanation": "This SQL query calculates the average cost of accommodations per student who utilizes assistive technology by averaging the \u0027accommodation_cost\u0027 column in the \u0027accommodations\u0027 table where the \u0027utilizes_assistive_tech\u0027 is TRUE." +}, { + "id": "4059", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "Which support programs were added in Q3 2021?", + "sql_context": "CREATE TABLE SupportPrograms (Id INT, Name VARCHAR(100), Description TEXT, DateAdded DATETIME); INSERT INTO SupportPrograms (Id, Name, Description, DateAdded) VALUES (1, \u0027Bridge Program\u0027, \u0027Assists students with disabilities transition to college\u0027, \u00272021-07-01 10:30:00\u0027);", + "sql": "SELECT * FROM SupportPrograms WHERE DateAdded BETWEEN \u00272021-07-01\u0027 AND \u00272021-09-30\u0027;", + "sql_explanation": "The SQL query filters the SupportPrograms table for records added between July 1 and September 30, 2021, returning any support programs added during Q3 2021." +}, { + "id": "4062", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum budget for policy advocacy in \"East Coast\" region in 2020?", + "sql_context": "CREATE TABLE Policy_Advocacy (advocacy_id INT, region VARCHAR(20), budget DECIMAL(10, 2), year INT); INSERT INTO Policy_Advocacy (advocacy_id, region, budget, year) VALUES (1, \u0027Southeast\u0027, 5000, 2020), (2, \u0027Northwest\u0027, 6000, 2019), (3, \u0027East Coast\u0027, 7000, 2020), (4, \u0027East Coast\u0027, 6000, 2019);", + "sql": "SELECT MAX(budget) FROM Policy_Advocacy WHERE region \u003d \u0027East Coast\u0027 AND year \u003d 2020;", + "sql_explanation": "The query selects the maximum budget for rows where the region is \u0027East Coast\u0027 and the year is 2020." +}, { + "id": "4131", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total budget for policy advocacy in \"Northeast\" region in 2019?", + "sql_context": "CREATE TABLE Policy_Advocacy (advocacy_id INT, region VARCHAR(20), budget DECIMAL(10, 2), year INT); INSERT INTO Policy_Advocacy (advocacy_id, region, budget, year) VALUES (1, \u0027Southeast\u0027, 5000, 2020), (2, \u0027Northwest\u0027, 6000, 2019), (3, \u0027East Coast\u0027, 7000, 2020), (4, \u0027East Coast\u0027, 6000, 2019), (5, \u0027Northeast\u0027, 8000, 2019), (6, \u0027Northeast\u0027, 9000, 2018);", + "sql": "SELECT SUM(budget) FROM Policy_Advocacy WHERE region \u003d \u0027Northeast\u0027 AND year \u003d 2019;", + "sql_explanation": "The query calculates the total budget for rows where the region is \u0027Northeast\u0027 and the year is 2019." +}, { + "id": "4273", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the cost of \"visual_aids\" to 300.0 in the \"accommodations\" table", + "sql_context": "CREATE TABLE accommodations (id INT, student_id INT, accommodation_type VARCHAR(255), cost FLOAT); INSERT INTO accommodations (id, student_id, accommodation_type, cost) VALUES (1, 123, \u0027visual_aids\u0027, 250.0), (2, 456, \u0027audio_aids\u0027, 100.0), (3, 789, \u0027large_print_materials\u0027, 120.0), (4, 890, \u0027mobility_aids\u0027, 300.0);", + "sql": "UPDATE accommodations SET cost \u003d 300.0 WHERE accommodation_type \u003d \u0027visual_aids\u0027;", + "sql_explanation": "This SQL query updates the cost of \"visual_aids\" to 300.0 in the \"accommodations\" table by using the UPDATE statement, filtering the table where the accommodation_type is \u0027visual_aids\u0027, and setting the cost to 300.0." +}, { + "id": "4436", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records from the accommodations table that are not wheelchair ramps or elevators.", + "sql_context": "CREATE TABLE accommodations (id INT, type VARCHAR(255), description VARCHAR(255)); INSERT INTO accommodations (id, type, description) VALUES (1, \u0027Wheelchair Ramp\u0027, \u0027Ramp with handrails and non-slip surface\u0027); INSERT INTO accommodations (id, type, description) VALUES (2, \u0027Elevator\u0027, \u0027Standard elevator for building access\u0027); INSERT INTO accommodations (id, type, description) VALUES (3, \u0027Automatic Door\u0027, \u0027Door that opens automatically for accessibility\u0027);", + "sql": "DELETE FROM accommodations WHERE type NOT IN (\u0027Wheelchair Ramp\u0027, \u0027Elevator\u0027);", + "sql_explanation": "The SQL query deletes all records from the accommodations table that are not wheelchair ramps or elevators by using the DELETE statement and specifying the condition for the rows to be deleted." +}, { + "id": "4437", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete the record for \u0027Sign Language Interpreter\u0027 from the support_program table.", + "sql_context": "CREATE TABLE support_program (program_id INT, program_name TEXT, org_id INT); INSERT INTO support_program (program_id, program_name, org_id) VALUES (1, \u0027Adaptive Sports\u0027, 1); INSERT INTO support_program (program_id, program_name, org_id) VALUES (2, \u0027Assistive Technology\u0027, 2); INSERT INTO support_program (program_id, program_name, org_id) VALUES (3, \u0027Sign Language Interpreter\u0027, 1);", + "sql": "DELETE FROM support_program WHERE program_name \u003d \u0027Sign Language Interpreter\u0027;", + "sql_explanation": "This query deletes the record for \u0027Sign Language Interpreter\u0027 from the support_program table using the DELETE statement and specifying the program_name in the WHERE clause." +}, { + "id": "4673", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all support programs with their respective budgets, in alphabetical order.", + "sql_context": "CREATE TABLE SupportPrograms (ProgramID INT, ProgramName VARCHAR(50), Budget DECIMAL(10,2)); INSERT INTO SupportPrograms (ProgramID, ProgramName, Budget) VALUES (1, \u0027Art Therapy\u0027, 15000), (2, \u0027Braille Literacy\u0027, 18000), (3, \u0027Communication Assistance\u0027, 22000), (4, \u0027Dietary Accommodations\u0027, 14000), (5, \u0027Hearing Loops\u0027, 20000), (6, \u0027Inclusive Fitness\u0027, 25000), (7, \u0027Low Vision Services\u0027, 19000);", + "sql": "SELECT ProgramName, Budget FROM SupportPrograms ORDER BY ProgramName ASC;", + "sql_explanation": "This query retrieves all support programs with their respective budgets in alphabetical order." +}, { + "id": "4826", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum cost for accommodations in the South American region?", + "sql_context": "CREATE TABLE accommodations_3 (id INT, name TEXT, region TEXT, cost FLOAT); INSERT INTO accommodations_3 (id, name, region, cost) VALUES (1, \u0027Wheelchair Ramp\u0027, \u0027South America\u0027, 120000.00), (2, \u0027Sign Language Interpreter\u0027, \u0027South America\u0027, 60000.00);", + "sql": "SELECT MIN(cost) FROM accommodations_3 WHERE region \u003d \u0027South America\u0027;", + "sql_explanation": "This query calculates the minimum cost for accommodations in the South American region by specifying the condition in the WHERE clause and then finding the minimum cost." +}, { + "id": "5177", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the name and email of all staff members involved in disability services?", + "sql_context": "CREATE TABLE Staff (StaffID INT, FirstName VARCHAR(50), LastName VARCHAR(50), Email VARCHAR(50)); INSERT INTO Staff (StaffID, FirstName, LastName, Email) VALUES (1, \u0027Jane\u0027, \u0027Doe\u0027, \u0027[jane.doe@disabilityservices.org](mailto:jane.doe@disabilityservices.org)\u0027); INSERT INTO Staff (StaffID, FirstName, LastName, Email) VALUES (2, \u0027John\u0027, \u0027Doe\u0027, \u0027[john.doe@disabilityservices.org](mailto:john.doe@disabilityservices.org)\u0027);", + "sql": "SELECT Staff.FirstName, Staff.LastName, Staff.Email FROM Staff;", + "sql_explanation": "This query selects the FirstName, LastName, and Email columns from the Staff table." +}, { + "id": "5208", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average budget allocated for disability support programs in the APAC region?", + "sql_context": "CREATE TABLE SupportPrograms (Id INT, Program VARCHAR(50), Region VARCHAR(30), Budget DECIMAL(10, 2)); INSERT INTO SupportPrograms (Id, Program, Region, Budget) VALUES (1, \u0027Sign Language Interpreters\u0027, \u0027APAC\u0027, 50000), (2, \u0027Assistive Technology\u0027, \u0027APAC\u0027, 80000), (3, \u0027Adaptive Furniture\u0027, \u0027APAC\u0027, 30000), (4, \u0027Mobility Equipment\u0027, \u0027APAC\u0027, 70000);", + "sql": "SELECT AVG(Budget) FROM SupportPrograms WHERE Region \u003d \u0027APAC\u0027;", + "sql_explanation": "This query calculates the average budget allocated for disability support programs in the APAC region by filtering the SupportPrograms table for rows with a Region value of \u0027APAC\u0027 and then calculating the average value of the Budget column." +}, { + "id": "5253", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete any records related to accommodations made for students with disabilities who graduated before 2020.", + "sql_context": "CREATE TABLE StudentAccommodations (StudentID INT, StudentName VARCHAR(255), DisabilityType VARCHAR(255), GraduationYear INT); INSERT INTO StudentAccommodations (StudentID, StudentName, DisabilityType, GraduationYear) VALUES (1, \u0027John Doe\u0027, \u0027Visual Impairment\u0027, 2018), (2, \u0027Jane Smith\u0027, \u0027Hearing Impairment\u0027, 2022), (3, \u0027Michael Johnson\u0027, \u0027Mobility Impairment\u0027, 2019);", + "sql": "DELETE FROM StudentAccommodations WHERE GraduationYear \u003c 2020;", + "sql_explanation": "The SQL query deletes any records related to accommodations made for students with disabilities who graduated before 2020. It uses the WHERE clause to filter the data to only include records where the GraduationYear is less than 2020. Then, it deletes those records using the DELETE statement." +}, { + "id": "5298", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the budget of support_programs_5 with ID 3 to 90000.00.", + "sql_context": "CREATE TABLE support_programs_5 (id INT, name TEXT, region TEXT, budget FLOAT); INSERT INTO support_programs_5 (id, name, region, budget) VALUES (1, \u0027Accessible Tech\u0027, \u0027Asia\u0027, 50000.00), (2, \u0027Mobility Training\u0027, \u0027Asia\u0027, 75000.00), (3, \u0027Assistive Devices\u0027, \u0027Asia\u0027, 100000.00);", + "sql": "UPDATE support_programs_5 SET budget \u003d 90000.00 WHERE id \u003d 3;", + "sql_explanation": "This query updates the budget of support_programs_5 with ID 3 to 90000.00 by specifying the condition in the WHERE clause." +}, { + "id": "5408", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete the record of the disability policy advocacy event that took place on 2021-07-01.", + "sql_context": "CREATE TABLE AdvocacyEvents (EventID INT, EventName VARCHAR(50), EventDate DATETIME); INSERT INTO AdvocacyEvents (EventID, EventName, EventDate) VALUES (1, \u0027Event A\u0027, \u00272021-01-01\u0027), (2, \u0027Event B\u0027, \u00272021-02-01\u0027), (3, \u0027Event C\u0027, \u00272021-07-01\u0027), (4, \u0027Event D\u0027, \u00272021-08-01\u0027);", + "sql": "DELETE FROM AdvocacyEvents WHERE EventDate \u003d \u00272021-07-01\u0027;", + "sql_explanation": "This query deletes the record of the disability policy advocacy event that took place on 2021-07-01." +}, { + "id": "5633", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "Display support programs in alphabetical order by name", + "sql_context": "CREATE TABLE support_programs (program_id INT PRIMARY KEY, name VARCHAR(255), description TEXT, category VARCHAR(255), budget DECIMAL(10,2));", + "sql": "SELECT * FROM support_programs ORDER BY name ASC;", + "sql_explanation": "We select all columns from the support_programs table and order the results by the name column in ascending order." +}, { + "id": "5712", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average accommodation cost per student in the StudentAccommodations table?", + "sql_context": "CREATE TABLE StudentAccommodations (studentID INT, accommodationType VARCHAR(50), cost FLOAT);", + "sql": "SELECT AVG(cost) FROM StudentAccommodations;", + "sql_explanation": "This SQL query calculates the average cost of accommodations by using the AVG function on the \u0027cost\u0027 column in the \u0027StudentAccommodations\u0027 table." +}, { + "id": "5762", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "Retrieve all records from the disability_accommodations table", + "sql_context": "CREATE TABLE disability_accommodations (id INT PRIMARY KEY, student_id INT, faculty_id INT, accommodation_type VARCHAR(50), start_date DATE, end_date DATE);", + "sql": "SELECT * FROM disability_accommodations;", + "sql_explanation": "This SQL query retrieves all records from the disability_accommodations table." +}, { + "id": "5802", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete the record with ID 4 from the \u0027Staff\u0027 table.", + "sql_context": "CREATE TABLE Staff (StaffID INT, FirstName VARCHAR(50), LastName VARCHAR(50), Position VARCHAR(50)); INSERT INTO Staff (StaffID, FirstName, LastName, Position) VALUES (1, \u0027John\u0027, \u0027Doe\u0027, \u0027Manager\u0027), (2, \u0027Jane\u0027, \u0027Doe\u0027, \u0027Assistant Manager\u0027), (3, \u0027Bob\u0027, \u0027Smith\u0027, \u0027Coordinator\u0027), (4, \u0027Alice\u0027, \u0027Johnson\u0027, \u0027Specialist\u0027);", + "sql": "DELETE FROM Staff WHERE StaffID \u003d 4;", + "sql_explanation": "This query deletes the record with ID 4 from the Staff table. It achieves this by specifying the WHERE clause to filter based on the StaffID column." +}, { + "id": "5830", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average budget spent on disability support programs per country?", + "sql_context": "CREATE TABLE Country (id INT, name TEXT, budget DECIMAL(10,2)); INSERT INTO Country (id, name, budget) VALUES (1, \u0027USA\u0027, 5000000.00), (2, \u0027Canada\u0027, 3000000.00);", + "sql": "SELECT AVG(budget) FROM Country;", + "sql_explanation": "This query calculates the average budget spent on disability support programs by taking the average of the \u0027budget\u0027 column in the \u0027Country\u0027 table." +}, { + "id": "5839", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all policy advocacy initiatives and their budgets", + "sql_context": "CREATE TABLE policy_advocacy (initiative_id INT, initiative_name VARCHAR(30), budget DECIMAL(10,2)); INSERT INTO policy_advocacy (initiative_id, initiative_name, budget) VALUES (1, \u0027Accessibility Laws\u0027, 50000), (2, \u0027Inclusive Education\u0027, 75000), (3, \u0027Employment Policies\u0027, 60000);", + "sql": "SELECT * FROM policy_advocacy;", + "sql_explanation": "This query selects all columns from the policy_advocacy table." +}, { + "id": "5846", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "Show all records from the 2022 policies view", + "sql_context": "CREATE TABLE disability_policies (id INT PRIMARY KEY, policy_name VARCHAR(100), description TEXT, effective_date DATE); INSERT INTO disability_policies (id, policy_name, description, effective_date) VALUES (1, \u0027Policy 1\u0027, \u0027Description 1\u0027, \u00272022-01-01\u0027), (2, \u0027Policy 2\u0027, \u0027Description 2\u0027, \u00272022-02-01\u0027), (3, \u0027Policy 3\u0027, \u0027Description 3\u0027, \u00272022-03-01\u0027), (4, \u0027Policy 4\u0027, \u0027Description 4\u0027, \u00272022-04-01\u0027); CREATE VIEW policies_2022 AS SELECT * FROM disability_policies WHERE effective_date \u003e\u003d \u00272022-01-01\u0027 AND effective_date \u003c\u003d \u00272022-12-31\u0027;", + "sql": "SELECT * FROM policies_2022;", + "sql_explanation": "This retrieves all records from the \u0027policies_2022\u0027 view, showing only policies with an effective date in 2022." +}, { + "id": "576", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of citizen feedback related to housing and urban development in 2022?", + "sql_context": "CREATE TABLE CitizenFeedback (Year INT, Topic VARCHAR(20), Feedback VARCHAR(10)); INSERT INTO CitizenFeedback (Year, Topic, Feedback) VALUES (2022, \u0027Housing and Urban Development\u0027, \u0027Positive\u0027), (2022, \u0027Housing and Urban Development\u0027, \u0027Negative\u0027), (2022, \u0027Housing and Urban Development\u0027, \u0027Neutral\u0027), (2022, \u0027Environment\u0027, \u0027Positive\u0027), (2022, \u0027Environment\u0027, \u0027Negative\u0027);", + "sql": "SELECT (COUNT(CASE WHEN Topic \u003d \u0027Housing and Urban Development\u0027 AND Feedback IN (\u0027Positive\u0027, \u0027Negative\u0027, \u0027Neutral\u0027) THEN 1 END) * 100.0 / COUNT(*)) as Pct_Housing_Feedback FROM CitizenFeedback WHERE Year \u003d 2022;", + "sql_explanation": "Calculate the percentage of citizen feedback related to housing and urban development in 2022 by filtering the Year and Topic columns and counting the number of feedback entries." +}, { + "id": "1452", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the names of the services \u0027Police\u0027 and \u0027Fire\u0027 to \u0027Public Safety - Police\u0027 and \u0027Public Safety - Fire\u0027 in the \u0027StateData\u0027 schema\u0027s \u0027StateServices\u0027 table.", + "sql_context": "CREATE SCHEMA StateData; CREATE TABLE StateServices (Service varchar(255), Type varchar(255)); INSERT INTO StateServices (Service, Type) VALUES (\u0027Police\u0027, \u0027Safety\u0027), (\u0027Fire\u0027, \u0027Safety\u0027), (\u0027Transportation\u0027, \u0027Infrastructure\u0027);", + "sql": "UPDATE StateData.StateServices SET Service \u003d CASE Service WHEN \u0027Police\u0027 THEN \u0027Public Safety - Police\u0027 WHEN \u0027Fire\u0027 THEN \u0027Public Safety - Fire\u0027 ELSE Service END;", + "sql_explanation": "Update the \u0027Service\u0027 column values in the \u0027StateServices\u0027 table in the \u0027StateData\u0027 schema by replacing \u0027Police\u0027 and \u0027Fire\u0027 with \u0027Public Safety - Police\u0027 and \u0027Public Safety - Fire\u0027, respectively, while preserving the original values for other services." +}, { + "id": "1878", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which schools have the highest proportion of female teachers?", + "sql_context": "CREATE TABLE schools (id INT, name TEXT, num_teachers INT, num_female_teachers INT);", + "sql": "SELECT s.name, (s.num_female_teachers::FLOAT/s.num_teachers) as prop_female_teachers FROM schools s ORDER BY prop_female_teachers DESC LIMIT 1;", + "sql_explanation": "This query calculates the proportion of female teachers for each school by dividing the number of female teachers by the total number of teachers. It then filters for the school with the highest proportion by sorting in descending order and limiting the results to 1." +}, { + "id": "2272", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names and cities of public parks that have not received any maintenance in the past 9 months?", + "sql_context": "CREATE TABLE public_parks (name TEXT, city TEXT, last_maintenance_date DATE); INSERT INTO public_parks (name, city, last_maintenance_date) VALUES (\u0027Park A\u0027, \u0027Los Angeles\u0027, \u00272021-06-01\u0027), (\u0027Park B\u0027, \u0027Los Angeles\u0027, \u00272022-02-15\u0027), (\u0027Park C\u0027, \u0027New York\u0027, NULL);", + "sql": "SELECT name, city FROM public_parks WHERE last_maintenance_date IS NULL OR last_maintenance_date \u003c DATEADD(month, -9, GETDATE());", + "sql_explanation": "This query selects the name and city columns from the public_parks table, filters the results to include only those records where the last_maintenance_date is NULL (indicating no maintenance has been recorded) or is older than 9 months, based on the current date." +}, { + "id": "2369", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many traffic violations were issued in the city of Los Angeles in the year 2021?", + "sql_context": "CREATE TABLE traffic_violations (violation_id INTEGER, violation_location TEXT, violation_date DATE); INSERT INTO traffic_violations (violation_id, violation_location, violation_date) VALUES (1, \u0027Los Angeles\u0027, \u00272021-01-01\u0027), (2, \u0027Los Angeles\u0027, \u00272021-05-12\u0027), (3, \u0027New York\u0027, \u00272021-12-31\u0027);", + "sql": "SELECT COUNT(*) FROM traffic_violations WHERE violation_location \u003d \u0027Los Angeles\u0027 AND EXTRACT(YEAR FROM violation_date) \u003d 2021;", + "sql_explanation": "This SQL query counts the number of traffic violations issued in the city of Los Angeles in the year 2021. It does this by using the COUNT() function, which returns the number of rows that match a specified condition. The query filters the traffic_violations table to only include violations issued in Los Angeles and in the year 2021 using the WHERE clause and the EXTRACT() function." +}, { + "id": "2520", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the budget allocations for the top 2 most expensive public facilities in the state of California?", + "sql_context": "CREATE TABLE public_facilities (name TEXT, state TEXT, budget_allocation INT); INSERT INTO public_facilities (name, state, budget_allocation) VALUES (\u0027Facility A\u0027, \u0027California\u0027, 600000), (\u0027Facility B\u0027, \u0027California\u0027, 550000), (\u0027Facility C\u0027, \u0027California\u0027, 500000);", + "sql": "SELECT name, budget_allocation FROM public_facilities WHERE state \u003d \u0027California\u0027 ORDER BY budget_allocation DESC LIMIT 2;", + "sql_explanation": "This query selects the name and budget_allocation columns from the public_facilities table, filters the results by the state of California, orders the results by budget_allocation in descending order, and limits the output to the top 2 records." +}, { + "id": "2668", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum budget allocated for transportation in the state of \"California\" in the year 2018?", + "sql_context": "CREATE TABLE budget_allocation (year INT, state TEXT, category TEXT, amount FLOAT); INSERT INTO budget_allocation (year, state, category, amount) VALUES (2020, \u0027California\u0027, \u0027Transportation\u0027, 12000000), (2019, \u0027California\u0027, \u0027Transportation\u0027, 10000000), (2018, \u0027California\u0027, \u0027Transportation\u0027, 8000000);", + "sql": "SELECT MIN(amount) FROM budget_allocation WHERE year \u003d 2018 AND state \u003d \u0027California\u0027 AND category \u003d \u0027Transportation\u0027;", + "sql_explanation": "This SQL query calculates the minimum budget allocated for transportation in the state of \"California\" in the year 2018 by using the MIN function on the amount column and filtering the records with the WHERE clause for the year 2018, state \"California\", and category \"Transportation\"." +}, { + "id": "2679", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average citizen satisfaction score for public libraries in the state of Texas?", + "sql_context": "CREATE TABLE citizen_satisfaction (state VARCHAR(255), service_type VARCHAR(255), score FLOAT); INSERT INTO citizen_satisfaction (state, service_type, score) VALUES (\u0027Texas\u0027, \u0027Public Libraries\u0027, 8.2), (\u0027Texas\u0027, \u0027Public Schools\u0027, 7.8), (\u0027Texas\u0027, \u0027Public Hospitals\u0027, 7.5);", + "sql": "SELECT AVG(score) AS avg_score FROM citizen_satisfaction WHERE state \u003d \u0027Texas\u0027 AND service_type \u003d \u0027Public Libraries\u0027;", + "sql_explanation": "This query calculates the average citizen satisfaction score for public libraries in the state of Texas by averaging the score values in the citizen_satisfaction table where the state is Texas and the service_type is Public Libraries." +}, { + "id": "2864", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average response time for emergency calls in the city of Los Angeles in 2021?", + "sql_context": "CREATE TABLE emergency_calls (city VARCHAR(255), year INT, response_time FLOAT); INSERT INTO emergency_calls (city, year, response_time) VALUES (\u0027Los Angeles\u0027, 2021, 8.5), (\u0027Los Angeles\u0027, 2021, 7.8), (\u0027Los Angeles\u0027, 2021, 9.2), (\u0027Los Angeles\u0027, 2021, 8.9), (\u0027Los Angeles\u0027, 2021, 7.6);", + "sql": "SELECT AVG(response_time) AS avg_response_time FROM emergency_calls WHERE city \u003d \u0027Los Angeles\u0027 AND year \u003d 2021;", + "sql_explanation": "This query calculates the average response time for emergency calls in the city of Los Angeles in 2021 by averaging the response_time values in the emergency_calls table where the city is Los Angeles and the year is 2021." +}, { + "id": "2928", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount spent on waste management services in Texas, only considering cities with a population greater than 500,000?", + "sql_context": "CREATE TABLE waste_management (service_id INT, service_name TEXT, city TEXT, state TEXT, cost INT); INSERT INTO waste_management (service_id, service_name, city, state, cost) VALUES (1, \u0027City of Houston Waste Services\u0027, \u0027Houston\u0027, \u0027Texas\u0027, 100000000); INSERT INTO waste_management (service_id, service_name, city, state, cost) VALUES (2, \u0027City of Dallas Waste Collection\u0027, \u0027Dallas\u0027, \u0027Texas\u0027, 80000000); INSERT INTO waste_management (service_id, service_name, city, state, cost) VALUES (3, \u0027San Antonio Solid Waste Management\u0027, \u0027San Antonio\u0027, \u0027Texas\u0027, 70000000);", + "sql": "SELECT SUM(cost) FROM waste_management WHERE state \u003d \u0027Texas\u0027 AND city IN (\u0027Houston\u0027, \u0027Dallas\u0027, \u0027San Antonio\u0027);", + "sql_explanation": "This query calculates the total amount spent on waste management services in Texas for cities with a population greater than 500,000 (Houston, Dallas, and San Antonio)." +}, { + "id": "2994", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total budget allocation for the education sector in the state of New York and California?", + "sql_context": "CREATE TABLE state_budget (state VARCHAR(20), sector VARCHAR(20), allocation INT); INSERT INTO state_budget (state, sector, allocation) VALUES (\u0027New York\u0027, \u0027Education\u0027, 12000), (\u0027New York\u0027, \u0027Healthcare\u0027, 15000), (\u0027California\u0027, \u0027Education\u0027, 10000), (\u0027California\u0027, \u0027Healthcare\u0027, 18000);", + "sql": "SELECT SUM(allocation) FROM state_budget WHERE state IN (\u0027New York\u0027, \u0027California\u0027) AND sector \u003d \u0027Education\u0027;", + "sql_explanation": "This query calculates the total budget allocation for the education sector in the states of New York and California." +}, { + "id": "3172", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total budget allocated for education in the state of New York in the year 2022?", + "sql_context": "CREATE TABLE education_budget (budget_id INT, budget_year INT, budget_state TEXT, budget_amount FLOAT); INSERT INTO education_budget (budget_id, budget_year, budget_state, budget_amount) VALUES (1, 2022, \u0027New York\u0027, 18000000), (2, 2021, \u0027New York\u0027, 17000000), (3, 2022, \u0027Texas\u0027, 20000000);", + "sql": "SELECT SUM(budget_amount) FROM education_budget WHERE budget_year \u003d 2022 AND budget_state \u003d \u0027New York\u0027;", + "sql_explanation": "This query calculates the total budget allocated for education in the state of New York in the year 2022 by selecting the SUM function on the budget_amount column, and filtering rows by budget_year column where the value is 2022 and budget_state column where the value is \u0027New York\u0027." +}, { + "id": "3201", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the total budgets for public services in 2023, excluding the social services and environment protection services?", + "sql_context": "CREATE TABLE budget_2023 (service TEXT, budget INTEGER); INSERT INTO budget_2023 (service, budget) VALUES (\u0027Social Services\u0027, 1600000), (\u0027Environment Protection\u0027, 1500000), (\u0027Healthcare\u0027, 1800000);", + "sql": "SELECT SUM(budget) FROM budget_2023 WHERE service NOT IN (\u0027Social Services\u0027, \u0027Environment Protection\u0027);", + "sql_explanation": "This query calculates the total budget for public services in 2023, excluding the social services and environment protection services, by summing the budget values where the service is not social services or environment protection." +}, { + "id": "3294", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total budget allocated for public parks in the state of New South Wales?", + "sql_context": "CREATE TABLE state_services (state VARCHAR(20), service VARCHAR(20), budget INT); INSERT INTO state_services (state, service, budget) VALUES (\u0027New South Wales\u0027, \u0027Public Parks\u0027, 15000000);", + "sql": "SELECT SUM(budget) FROM state_services WHERE state \u003d \u0027New South Wales\u0027 AND service \u003d \u0027Public Parks\u0027;", + "sql_explanation": "The SQL query calculates the total budget allocated for public parks in the state of New South Wales by selecting the budget values for the specified state and service, and then summing the values using the SUM function." +}, { + "id": "3451", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a new table \u0027public_feedback\u0027 with columns \u0027city\u0027, \u0027service\u0027, \u0027rating\u0027, and \u0027comment\u0027.", + "sql_context": "CREATE TABLE public_services (id INT PRIMARY KEY, service VARCHAR(255), location VARCHAR(255), budget DECIMAL(10, 2), provider VARCHAR(255)); CREATE TABLE citizen_feedback (id INT PRIMARY KEY, city VARCHAR(255), age INT, feedback TEXT);", + "sql": "CREATE TABLE public_feedback (city VARCHAR(255), service VARCHAR(255), rating INT, comment TEXT);", + "sql_explanation": "1. Create a new table named \u0027public_feedback\u0027. 2. Add columns named \u0027city\u0027, \u0027service\u0027, \u0027rating\u0027, and \u0027comment\u0027. 3. Set all columns as nullable. 4. Do not define any primary key or foreign keys." +}, { + "id": "3642", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total budget allocated to infrastructure in 2021 and 2022?", + "sql_context": "CREATE TABLE Budget (Year INT, Department VARCHAR(20), Amount INT); INSERT INTO Budget VALUES (2021, \u0027Infrastructure\u0027, 1000000), (2022, \u0027Infrastructure\u0027, 1200000);", + "sql": "SELECT SUM(Amount) FROM Budget WHERE Department \u003d \u0027Infrastructure\u0027 AND Year IN (2021, 2022);", + "sql_explanation": "The SQL query calculates the total budget allocated to infrastructure in 2021 and 2022 by summing the Amount column for the corresponding rows." +}, { + "id": "3643", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total budget allocated for healthcare and education services in 2021?", + "sql_context": "CREATE TABLE Budget (Year INT, Service TEXT, Amount INT); INSERT INTO Budget (Year, Service, Amount) VALUES (2021, \u0027Healthcare\u0027, 1500000), (2021, \u0027Education\u0027, 1200000);", + "sql": "SELECT SUM(Amount) FROM Budget WHERE Year \u003d 2021 AND Service IN (\u0027Healthcare\u0027, \u0027Education\u0027);", + "sql_explanation": "Summing the amounts for the specified services and year." +}, { + "id": "3728", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the number of public parks in urban areas and suburban areas, and calculate the ratio.", + "sql_context": "CREATE TABLE areas (name text, type text); INSERT INTO areas VALUES (\u0027Urban\u0027, \u0027CityA\u0027), (\u0027Suburban\u0027, \u0027CityB\u0027), (\u0027Urban\u0027, \u0027CityC\u0027), (\u0027Rural\u0027, \u0027CityD\u0027); CREATE TABLE parks (name text, area_type text); INSERT INTO parks VALUES (\u0027Park1\u0027, \u0027Urban\u0027), (\u0027Park2\u0027, \u0027Urban\u0027), (\u0027Park3\u0027, \u0027Suburban\u0027), (\u0027Park4\u0027, \u0027Rural\u0027);", + "sql": "SELECT urban_parks / COUNT(DISTINCT area_type), urban_parks + suburban_parks AS total_parks", + "sql_explanation": "The SQL query calculates the ratio of public parks in urban areas to the total number of parks." +}, { + "id": "3754", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many citizen feedback submissions were made for parks and recreation services in Chicago?", + "sql_context": "CREATE TABLE feedback (submission_id INT, submission_date DATE, service VARCHAR(50), city VARCHAR(50)); INSERT INTO feedback (submission_id, submission_date, service, city) VALUES (1, \u00272022-02-15\u0027, \u0027Parks and Recreation\u0027, \u0027Chicago\u0027), (2, \u00272022-02-20\u0027, \u0027Waste Management\u0027, \u0027Chicago\u0027), (3, \u00272022-02-25\u0027, \u0027Parks and Recreation\u0027, \u0027Chicago\u0027);", + "sql": "SELECT COUNT(*) FROM feedback WHERE service \u003d \u0027Parks and Recreation\u0027 AND city \u003d \u0027Chicago\u0027;", + "sql_explanation": "Count the number of citizen feedback submissions for parks and recreation services in Chicago by filtering the \u0027service\u0027 column and the \u0027city\u0027 column and then counting the number of rows." +}, { + "id": "3780", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of fines issued by the police department of Houston in the year 2019?", + "sql_context": "CREATE TABLE fines (fine_id INT, fine_date DATE, fine_city TEXT, fine_amount FLOAT); INSERT INTO fines (fine_id, fine_date, fine_city, fine_amount) VALUES (1, \u00272019-01-01\u0027, \u0027Houston\u0027, 100), (2, \u00272019-06-15\u0027, \u0027Houston\u0027, 200), (3, \u00272020-12-31\u0027, \u0027Houston\u0027, 50);", + "sql": "SELECT SUM(fine_amount) FROM fines WHERE fine_city \u003d \u0027Houston\u0027 AND YEAR(fine_date) \u003d 2019;", + "sql_explanation": "This query calculates the total amount of fines issued by the police department of Houston in the year 2019 by selecting the SUM function on the fine_amount column, and filtering rows by fine_city column where the value is \u0027Houston\u0027 and YEAR function on fine_date column where the value is 2019." +}, { + "id": "3799", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total budget allocated to public services in the East region in 2020?", + "sql_context": "CREATE TABLE Budget (Year INT, Service VARCHAR(255), Region VARCHAR(255), Amount DECIMAL(10,2)); INSERT INTO Budget (Year, Service, Region, Amount) VALUES (2020, \u0027Healthcare\u0027, \u0027East\u0027, 1000000.00), (2020, \u0027Education\u0027, \u0027East\u0027, 1200000.00), (2020, \u0027Transportation\u0027, \u0027East\u0027, 800000.00), (2020, \u0027Utilities\u0027, \u0027East\u0027, 1100000.00);", + "sql": "SELECT SUM(Amount) FROM Budget WHERE Year \u003d 2020 AND Region \u003d \u0027East\u0027 AND Type \u003d \u0027Public\u0027;", + "sql_explanation": "This query sums the Amount column for all rows in the Budget table where the Year is 2020, the Region is East, and the Type is Public." +}, { + "id": "3831", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total budget allocated for healthcare in the city of Toronto?", + "sql_context": "CREATE TABLE global_budget (city VARCHAR(20), category VARCHAR(20), budget INT); INSERT INTO global_budget (city, category, budget) VALUES (\u0027Toronto\u0027, \u0027Healthcare\u0027, 7000000);", + "sql": "SELECT SUM(budget) FROM global_budget WHERE city \u003d \u0027Toronto\u0027 AND category \u003d \u0027Healthcare\u0027;", + "sql_explanation": "The SQL query calculates the total budget allocated for healthcare in the city of Toronto by selecting the budget values for the specified city and category, and then summing the values using the SUM function." +}, { + "id": "3936", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average budget allocation for education in urban areas?", + "sql_context": "CREATE TABLE budget (service varchar(20), location varchar(20), allocation int); INSERT INTO budget (service, location, allocation) VALUES (\u0027Education\u0027, \u0027Urban\u0027, 5000000), (\u0027Healthcare\u0027, \u0027Urban\u0027, 7000000), (\u0027Education\u0027, \u0027Rural\u0027, 3000000), (\u0027Healthcare\u0027, \u0027Rural\u0027, 4000000);", + "sql": "SELECT AVG(allocation) FROM budget WHERE service \u003d \u0027Education\u0027 AND location \u003d \u0027Urban\u0027;", + "sql_explanation": "This query calculates the average budget allocation for education in urban areas by filtering the budget table for the education service and urban location, and then computing the average allocation." +}, { + "id": "3996", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many citizen feedback records were created in January 2022?", + "sql_context": "CREATE TABLE feedback (id INT, created_at DATETIME); INSERT INTO feedback (id, created_at) VALUES (1, \u00272022-01-01 12:34:56\u0027), (2, \u00272022-01-15 10:20:34\u0027), (3, \u00272022-02-20 16:45:01\u0027);", + "sql": "SELECT COUNT(*) FROM feedback WHERE created_at BETWEEN \u00272022-01-01\u0027 AND \u00272022-01-31\u0027;", + "sql_explanation": "Count the number of citizen feedback records created in January 2022 by filtering records between \u00272022-01-01\u0027 and \u00272022-01-31\u0027." +}, { + "id": "4048", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many public community centers are there in the Southeast region?", + "sql_context": "CREATE TABLE CommunityCenter (Name VARCHAR(255), Region VARCHAR(255), Type VARCHAR(255)); INSERT INTO CommunityCenter (Name, Region, Type) VALUES (\u0027Southeast Community Center\u0027, \u0027Southeast\u0027, \u0027Public\u0027), (\u0027Northeast Community Center\u0027, \u0027Northeast\u0027, \u0027Public\u0027), (\u0027Southwest Community Center\u0027, \u0027Southwest\u0027, \u0027Public\u0027), (\u0027Northwest Community Center\u0027, \u0027Northwest\u0027, \u0027Public\u0027);", + "sql": "SELECT COUNT(*) FROM CommunityCenter WHERE Region \u003d \u0027Southeast\u0027 AND Type \u003d \u0027Public\u0027;", + "sql_explanation": "This query counts all rows in the \u0027CommunityCenter\u0027 table where \u0027Region\u0027 is \u0027Southeast\u0027 and \u0027Type\u0027 is \u0027Public\u0027." +}, { + "id": "4101", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total budget allocated for infrastructure in 2023, in the \u0027annual_budget\u0027 table?", + "sql_context": "CREATE TABLE annual_budget (year INT, category VARCHAR(255), budget INT); INSERT INTO annual_budget (year, category, budget) VALUES (2022, \u0027Education\u0027, 1000000), (2023, \u0027Infrastructure\u0027, 1500000);", + "sql": "SELECT budget FROM annual_budget WHERE year \u003d 2023 AND category \u003d \u0027Infrastructure\u0027;", + "sql_explanation": "This query retrieves the total budget allocated for infrastructure in 2023 from the \u0027annual_budget\u0027 table by selecting the \u0027budget\u0027 column in the \u0027annual_budget\u0027 table where the year is 2023 and the category is \u0027Infrastructure\u0027." +}, { + "id": "4158", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the budget for the justice department by 10% for 2023?", + "sql_context": "CREATE TABLE Budget (Service text, Year int, Budget int); INSERT INTO Budget (Service, Year, Budget) VALUES (\u0027Justice\u0027, 2023, 3000000);", + "sql": "UPDATE Budget SET Budget \u003d Budget * 1.1 WHERE Service \u003d \u0027Justice\u0027 AND Year \u003d 2023;", + "sql_explanation": "Update the budget for the Justice department by 10% for 2023." +}, { + "id": "4285", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the budget allocation for the \u0027Transportation\u0027 department in the \u0027BudgetAllocation\u0027 table", + "sql_context": "CREATE TABLE BudgetAllocation (department VARCHAR(20), budget INT);", + "sql": "UPDATE BudgetAllocation SET budget \u003d 500000 WHERE department \u003d \u0027Transportation\u0027;", + "sql_explanation": "This query updates the budget allocation for the \u0027Transportation\u0027 department in the \u0027BudgetAllocation\u0027 table by setting the budget to 500000 where the department is \u0027Transportation\u0027." +}, { + "id": "4344", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the average budget allocated to public services in Florida in 2022.", + "sql_context": "CREATE TABLE PublicServices (state VARCHAR(20), year INT, budget INT); INSERT INTO PublicServices (state, year, budget) VALUES (\u0027Florida\u0027, 2022, 1000000), (\u0027Florida\u0027, 2022, 1200000), (\u0027Florida\u0027, 2022, 800000), (\u0027Florida\u0027, 2022, 1100000);", + "sql": "SELECT AVG(budget) FROM PublicServices WHERE state \u003d \u0027Florida\u0027 AND year \u003d 2022;", + "sql_explanation": "This query identifies the average budget allocated to public services in Florida for the year 2022 by selecting all rows in the PublicServices table where the state is \u0027Florida\u0027 and the year is 2022, and calculating the average budget using the AVG function." +}, { + "id": "4681", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum budget allocated for public libraries in the state of New York?", + "sql_context": "CREATE TABLE public_libraries_budget (state VARCHAR(20), budget INT); INSERT INTO public_libraries_budget (state, budget) VALUES (\u0027New York\u0027, 8000000); INSERT INTO public_libraries_budget (state, budget) VALUES (\u0027New York\u0027, 10000000); INSERT INTO public_libraries_budget (state, budget) VALUES (\u0027Florida\u0027, 5000000);", + "sql": "SELECT MIN(budget) FROM public_libraries_budget WHERE state \u003d \u0027New York\u0027;", + "sql_explanation": "The SQL query finds the minimum budget allocated for public libraries in the state of New York by selecting the MIN function on the budget column, filtering for the state of New York." +}, { + "id": "4918", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average budget allocated per school district in the state of \"California\"?", + "sql_context": "CREATE TABLE school_districts (district_id INT, district_name TEXT, state TEXT, budget FLOAT); INSERT INTO school_districts (district_id, district_name, state, budget) VALUES (1, \u0027Los Angeles Unified\u0027, \u0027California\u0027, 8000000), (2, \u0027San Diego Unified\u0027, \u0027California\u0027, 6000000), (3, \u0027Fresno Unified\u0027, \u0027California\u0027, 4000000);", + "sql": "SELECT AVG(budget) FROM school_districts WHERE state \u003d \u0027California\u0027;", + "sql_explanation": "This SQL query calculates the average budget allocated per school district in the state of \"California\" by using the AVG function on the budget column and filtering the records with the WHERE clause for the state \"California\"." +}, { + "id": "5174", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum budget allocated to any public service in the city of Chicago?", + "sql_context": "CREATE TABLE public_services (name VARCHAR(255), city VARCHAR(255), budget DECIMAL(10,2)); INSERT INTO public_services (name, city, budget) VALUES (\u0027Chicago Public Libraries\u0027, 18000000.00), (\u0027Chicago Transit Authority\u0027, 15000000.00), (\u0027Chicago Park District\u0027, 14000000.00);", + "sql": "SELECT MAX(budget) FROM public_services WHERE city \u003d \u0027Chicago\u0027;", + "sql_explanation": "This query calculates the maximum budget allocated to any public service in Chicago by selecting all records with city \u0027Chicago\u0027 from the public_services table, then calculating the maximum value in the budget column." +}, { + "id": "5292", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the policy areas and their respective feedback scores in India in 2017.", + "sql_context": "CREATE SCHEMA in_schema;CREATE TABLE in_schema.policy_areas (area_id INT, area_name VARCHAR(20), feedback_score INT);INSERT INTO in_schema.policy_areas (area_id, area_name, feedback_score) VALUES (1, \u0027Healthcare\u0027, 75), (2, \u0027Education\u0027, 85), (3, \u0027Transportation\u0027, 80), (4, \u0027Housing\u0027, 70);", + "sql": "SELECT area_name, feedback_score FROM in_schema.policy_areas;", + "sql_explanation": "The SQL query lists the policy areas and their respective feedback scores in India in 2017 by selecting the area_name and feedback_score columns from the policy_areas table." +}, { + "id": "5577", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all public services with a budget greater than $400,000.", + "sql_context": "CREATE TABLE public_services (id INT PRIMARY KEY, service VARCHAR(255), location VARCHAR(255), budget DECIMAL(10, 2), provider VARCHAR(255));", + "sql": "SELECT * FROM public_services WHERE budget \u003e 400000;", + "sql_explanation": "1. Select all columns from the \u0027public_services\u0027 table. 2. Filter rows based on the condition: budget is greater than $400,000." +}, { + "id": "5626", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete the feedback with Feedback_ID 1", + "sql_context": "CREATE TABLE Policy_Feedback (Feedback_ID INT PRIMARY KEY, Council_ID INT, Policy_Area VARCHAR(30), Feedback VARCHAR(100)); INSERT INTO Policy_Feedback (Feedback_ID, Council_ID, Policy_Area, Feedback) VALUES (1, 1, \u0027Housing\u0027, \u0027More affordable housing needed\u0027), (2, 2, \u0027Transportation\u0027, \u0027Improve public transportation\u0027), (3, 3, \u0027Education\u0027, \u0027Increase funding for schools\u0027);", + "sql": "DELETE FROM Policy_Feedback WHERE Feedback_ID \u003d 1;", + "sql_explanation": "This query deletes the feedback with a Feedback_ID of 1." +}, { + "id": "743", + "domain": "media entertainment", + "domain_description": "Audience demographics, viewership trends, content ratings, production budgets, and marketing strategies for movies, TV shows, and music.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new records for the TV shows released in 2023 with a rating above 8.5", + "sql_context": "CREATE TABLE tv_shows (title VARCHAR(255), genre VARCHAR(50), budget INT, release_year INT, rating INT);", + "sql": "INSERT INTO tv_shows (title, genre, budget, release_year, rating) VALUES (\u0027Show10\u0027, \u0027Comedy\u0027, 12000000, 2023, 8.7), (\u0027Show11\u0027, \u0027Drama\u0027, 14000000, 2023, 9), (\u0027Show12\u0027, \u0027Action\u0027, 16000000, 2023, 8.6);", + "sql_explanation": "This query inserts new records for the TV shows released in 2023 with a rating above 8.5 by inserting the rows into the tv_shows table." +}, { + "id": "1193", + "domain": "media entertainment", + "domain_description": "Audience demographics, viewership trends, content ratings, production budgets, and marketing strategies for movies, TV shows, and music.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many times has music from the United States been streamed in Africa in the last 3 years?", + "sql_context": "CREATE TABLE music (id INT, title VARCHAR(100), artist_country VARCHAR(50), streams INT); INSERT INTO music (id, title, artist_country, streams) VALUES (1, \u0027MusicA\u0027, \u0027United States\u0027, 1000000); INSERT INTO music (id, title, artist_country, streams) VALUES (2, \u0027MusicB\u0027, \u0027United States\u0027, 1200000);", + "sql": "SELECT SUM(streams) FROM music WHERE artist_country \u003d \u0027United States\u0027 AND (EXTRACT(YEAR FROM CURRENT_DATE) - EXTRACT(YEAR FROM date(substr(title, 7, 4)))) BETWEEN 1 AND 3;", + "sql_explanation": "Calculates the total number of streams for music from the United States in Africa over the last 3 years by summing the streams column for rows with the specified conditions. This assumes that the 4th and 5th characters of the title column represent the year of release." +}, { + "id": "1414", + "domain": "media entertainment", + "domain_description": "Audience demographics, viewership trends, content ratings, production budgets, and marketing strategies for movies, TV shows, and music.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 5 most-watched animated movies released in Japan between 2010 and 2020, ordered by global gross.", + "sql_context": "CREATE TABLE movie (id INT, title VARCHAR(100), release_year INT, country VARCHAR(50), genre VARCHAR(50), global_gross INT); INSERT INTO movie (id, title, release_year, country, genre, global_gross) VALUES (1, \u0027Movie1\u0027, 2015, \u0027Japan\u0027, \u0027Animation\u0027, 500000000); INSERT INTO movie (id, title, release_year, country, genre, global_gross) VALUES (2, \u0027Movie2\u0027, 2018, \u0027Japan\u0027, \u0027Animation\u0027, 600000000);", + "sql": "SELECT title, global_gross FROM movie WHERE country \u003d \u0027Japan\u0027 AND genre \u003d \u0027Animation\u0027 AND release_year BETWEEN 2010 AND 2020 ORDER BY global_gross DESC LIMIT 5;", + "sql_explanation": "This query finds the top 5 most-watched animated movies released in Japan between 2010 and 2020, ordered by global gross. It filters the movie table based on the country, genre, and release year, then orders the results by global gross in descending order and limits the results to 5." +}, { + "id": "2274", + "domain": "media entertainment", + "domain_description": "Audience demographics, viewership trends, content ratings, production budgets, and marketing strategies for movies, TV shows, and music.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which TV shows have the highest and lowest ratings among those produced in Japan and South Korea?", + "sql_context": "CREATE TABLE tv_shows (id INT, title VARCHAR(100), rating DECIMAL(3,2), production_country VARCHAR(50)); INSERT INTO tv_shows (id, title, rating, production_country) VALUES (1, \u0027TVShowA\u0027, 8.5, \u0027Japan\u0027); INSERT INTO tv_shows (id, title, rating, production_country) VALUES (2, \u0027TVShowB\u0027, 7.2, \u0027South Korea\u0027);", + "sql": "SELECT title, rating FROM tv_shows WHERE production_country IN (\u0027Japan\u0027, \u0027South Korea\u0027) ORDER BY rating DESC, rating ASC LIMIT 1;", + "sql_explanation": "Retrieves the title and rating of TV shows produced in Japan and South Korea, then sorts the result by rating in descending and ascending order, and finally returns the top row with the highest and lowest ratings." +}, { + "id": "2788", + "domain": "media entertainment", + "domain_description": "Audience demographics, viewership trends, content ratings, production budgets, and marketing strategies for movies, TV shows, and music.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Avg. revenue of Latin music released between 2010-2020", + "sql_context": "CREATE TABLE Music_Data (title VARCHAR(255), genre VARCHAR(50), release_date DATE, revenue INT);", + "sql": "SELECT AVG(revenue) FROM Music_Data WHERE genre \u003d \u0027Latin\u0027 AND release_date BETWEEN \u00272010-01-01\u0027 AND \u00272020-12-31\u0027;", + "sql_explanation": "This query calculates the average revenue of Latin music released between 2010 and 2020. It does this by selecting the AVG function on the revenue column, filtering for rows where the genre is \u0027Latin\u0027 and release_date is between 2010-01-01 and 2020-12-31." +}, { + "id": "3896", + "domain": "media entertainment", + "domain_description": "Audience demographics, viewership trends, content ratings, production budgets, and marketing strategies for movies, TV shows, and music.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many users unsubscribed from the music streaming service in India?", + "sql_context": "CREATE TABLE Users (user_id INT, username VARCHAR(50), registration_date DATE, unsubscription_date DATE, country VARCHAR(50)); INSERT INTO Users (user_id, username, registration_date, unsubscription_date, country) VALUES (11, \u0027UserK\u0027, \u00272022-01-01\u0027, \u00272022-02-01\u0027, \u0027India\u0027); INSERT INTO Users (user_id, username, registration_date, unsubscription_date, country) VALUES (12, \u0027UserL\u0027, \u00272022-01-02\u0027, NULL, \u0027USA\u0027); INSERT INTO Users (user_id, username, registration_date, unsubscription_date, country) VALUES (13, \u0027UserM\u0027, \u00272022-01-03\u0027, \u00272022-03-01\u0027, \u0027India\u0027);", + "sql": "SELECT COUNT(*) FROM Users WHERE unsubscription_date IS NOT NULL AND country \u003d \u0027India\u0027;", + "sql_explanation": "The SQL query counts the number of users who unsubscribed from the music streaming service in India by using the COUNT function on the star symbol (*), filtering rows with the WHERE clause, checking if unsubscription_date is not null, and country equal to \u0027India\u0027." +}, { + "id": "4708", + "domain": "media entertainment", + "domain_description": "Audience demographics, viewership trends, content ratings, production budgets, and marketing strategies for movies, TV shows, and music.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average budget of movies released between 2000 and 2010?", + "sql_context": "CREATE TABLE Movies (id INT, title VARCHAR(255), release_year INT, budget INT); INSERT INTO Movies (id, title, release_year, budget) VALUES (1, \u0027Movie1\u0027, 2005, 50000000), (2, \u0027Movie2\u0027, 2002, 35000000), (3, \u0027Movie3\u0027, 2010, 80000000);", + "sql": "SELECT AVG(budget) FROM Movies WHERE release_year BETWEEN 2000 AND 2010;", + "sql_explanation": "The SQL query calculates the average budget of movies by using the AVG function on the budget column. It filters the data to only include rows where the release_year is between 2000 and 2010." +}, { + "id": "4814", + "domain": "media entertainment", + "domain_description": "Audience demographics, viewership trends, content ratings, production budgets, and marketing strategies for movies, TV shows, and music.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the production budget for the movie \"MovieA\" to 18000000.", + "sql_context": "CREATE TABLE movies (id INT, title VARCHAR(100), genre VARCHAR(50), release_year INT, production_budget INT); INSERT INTO movies (id, title, genre, release_year, production_budget) VALUES (1, \u0027MovieA\u0027, \u0027Action\u0027, 2005, 15000000); INSERT INTO movies (id, title, genre, release_year, production_budget) VALUES (2, \u0027MovieB\u0027, \u0027Action\u0027, 2002, 20000000);", + "sql": "UPDATE movies SET production_budget \u003d 18000000 WHERE title \u003d \u0027MovieA\u0027;", + "sql_explanation": "This query updates the production budget for the movie \"MovieA\" to 18000000. It filters the records based on the title and updates the production_budget column with the new value." +}, { + "id": "4838", + "domain": "media entertainment", + "domain_description": "Audience demographics, viewership trends, content ratings, production budgets, and marketing strategies for movies, TV shows, and music.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total budget for movies released before 2010?", + "sql_context": "CREATE TABLE Movies_Release_Year (id INT, title VARCHAR(100), release_year INT, budget DECIMAL(10,2)); INSERT INTO Movies_Release_Year (id, title, release_year, budget) VALUES (1, \u0027The Matrix\u0027, 1999, 63000000.00), (2, \u0027Titanic\u0027, 1997, 200000000.00), (3, \u0027The Lord of the Rings: The Fellowship of the Ring\u0027, 2001, 93000000.00);", + "sql": "SELECT SUM(budget) FROM Movies_Release_Year WHERE release_year \u003c 2010;", + "sql_explanation": "This query calculates the total budget for movies that were released before 2010." +}, { + "id": "4941", + "domain": "media entertainment", + "domain_description": "Audience demographics, viewership trends, content ratings, production budgets, and marketing strategies for movies, TV shows, and music.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the name of the lead actor in \u0027The Queen\u0027s Gambit\u0027?", + "sql_context": "CREATE TABLE tv_shows (id INT, title VARCHAR(255), season INT, lead_actor VARCHAR(255)); INSERT INTO tv_shows (id, title, season, lead_actor) VALUES (1, \u0027The Queen\u0027\u0027s Gambit\u0027, 1, \u0027Anya Taylor-Joy\u0027);", + "sql": "SELECT lead_actor FROM tv_shows WHERE title \u003d \u0027The Queen\u0027\u0027s Gambit\u0027;", + "sql_explanation": "This query retrieves the name of the lead actor in \u0027The Queen\u0027s Gambit\u0027 by selecting the lead_actor column where the title is \u0027The Queen\u0027s Gambit\u0027." +}, { + "id": "5223", + "domain": "media entertainment", + "domain_description": "Audience demographics, viewership trends, content ratings, production budgets, and marketing strategies for movies, TV shows, and music.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the average budget of TV shows in the \u0027Drama\u0027 genre?", + "sql_context": "CREATE TABLE tv_show_budgets (id INT, title VARCHAR(255), genre VARCHAR(255), budget INT); INSERT INTO tv_show_budgets (id, title, genre, budget) VALUES (1, \u0027The Big Bang Theory\u0027, \u0027Comedy\u0027, 8000000), (2, \u0027Friends\u0027, \u0027Comedy\u0027, 10000000), (3, \u0027Breaking Bad\u0027, \u0027Drama\u0027, 3000000);", + "sql": "SELECT AVG(budget) FROM tv_show_budgets WHERE genre \u003d \u0027Drama\u0027;", + "sql_explanation": "This query calculates the average budget of TV shows in the \u0027Drama\u0027 genre by filtering the \u0027tv_show_budgets\u0027 table for the genre \u0027Drama\u0027 and then computing the average (\u0027AVG\u0027) of the \u0027budget\u0027 column." +}, { + "id": "5393", + "domain": "media entertainment", + "domain_description": "Audience demographics, viewership trends, content ratings, production budgets, and marketing strategies for movies, TV shows, and music.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue for concerts in Asia?", + "sql_context": "CREATE TABLE Concerts (id INT, location VARCHAR(255), revenue INT); INSERT INTO Concerts (id, location, revenue) VALUES (1, \u0027Asia\u0027, 2000000), (2, \u0027Europe\u0027, 1500000), (3, \u0027North America\u0027, 1800000);", + "sql": "SELECT SUM(revenue) FROM Concerts WHERE location \u003d \u0027Asia\u0027;", + "sql_explanation": "The SQL query calculates the total revenue for concerts in Asia using the SUM function on the revenue column and filtering the data using the WHERE clause to only include rows where the location is \u0027Asia\u0027." +}, { + "id": "5771", + "domain": "media entertainment", + "domain_description": "Audience demographics, viewership trends, content ratings, production budgets, and marketing strategies for movies, TV shows, and music.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Remove all movies with a rating under \u0027PG\u0027", + "sql_context": "CREATE TABLE movies (id INT, title TEXT, rating TEXT);", + "sql": "DELETE FROM movies WHERE rating \u003c \u0027PG\u0027;", + "sql_explanation": "The SQL query deletes rows from the \u0027movies\u0027 table where the \u0027rating\u0027 column is less than \u0027PG\u0027." +}, { + "id": "740", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the production quantity of Samarium for each day in the week, starting from Monday.", + "sql_context": "CREATE TABLE production (id INT, country VARCHAR(255), element VARCHAR(255), quantity INT, day VARCHAR(255), week INT, year INT); INSERT INTO production (id, country, element, quantity, day, week, year) VALUES (1, \u0027China\u0027, \u0027Samarium\u0027, 500, \u0027Monday\u0027, 1, 2021), (2, \u0027China\u0027, \u0027Samarium\u0027, 550, \u0027Tuesday\u0027, 1, 2021), (3, \u0027USA\u0027, \u0027Samarium\u0027, 400, \u0027Monday\u0027, 1, 2021), (4, \u0027USA\u0027, \u0027Samarium\u0027, 450, \u0027Tuesday\u0027, 1, 2021);", + "sql": "SELECT country, element, day, quantity FROM production WHERE element \u003d \u0027Samarium\u0027 AND week \u003d 1 AND day IN (\u0027Monday\u0027, \u0027Tuesday\u0027, \u0027Wednesday\u0027, \u0027Thursday\u0027, \u0027Friday\u0027, \u0027Saturday\u0027, \u0027Sunday\u0027) ORDER BY day;", + "sql_explanation": "The SQL query determines the production quantity of Samarium for each day in the week, starting from Monday, by selecting the rows where the element is Samarium, the week is 1, and the day is one of the seven days of the week. It then orders the result by day." +}, { + "id": "1405", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the production of Dysprosium in the Brazilian mine for March 2021 to 95.0.", + "sql_context": "CREATE TABLE mine (id INT, name TEXT, location TEXT, Dysprosium_monthly_production FLOAT, timestamp TIMESTAMP); INSERT INTO mine (id, name, location, Dysprosium_monthly_production, timestamp) VALUES (1, \u0027Australian Mine\u0027, \u0027Australia\u0027, 120.5, \u00272021-03-01\u0027), (2, \u0027Californian Mine\u0027, \u0027USA\u0027, 150.3, \u00272021-03-01\u0027), (3, \u0027Brazilian Mine\u0027, \u0027Brazil\u0027, 80.0, \u00272021-03-01\u0027);", + "sql": "UPDATE mine SET Dysprosium_monthly_production \u003d 95.0 WHERE name \u003d \u0027Brazilian Mine\u0027 AND EXTRACT(MONTH FROM timestamp) \u003d 3 AND EXTRACT(YEAR FROM timestamp) \u003d 2021;", + "sql_explanation": "This query updates the production of Dysprosium in the Brazilian mine for March 2021 to 95.0. It does this by filtering the \u0027mine\u0027 table for rows where the name is \u0027Brazilian Mine\u0027 and the month and year of the timestamp is March 2021. It then sets the Dysprosium_monthly_production column to 95.0 for these rows." +}, { + "id": "2190", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total production of gadolinium in India and Pakistan combined for the last 5 years?", + "sql_context": "CREATE TABLE gadolinium_production (year INT, country TEXT, production_quantity INT); INSERT INTO gadolinium_production (year, country, production_quantity) VALUES (2017, \u0027India\u0027, 1200), (2018, \u0027India\u0027, 1500), (2019, \u0027India\u0027, 1700), (2020, \u0027India\u0027, 2000), (2021, \u0027India\u0027, 2200), (2017, \u0027Pakistan\u0027, 800), (2018, \u0027Pakistan\u0027, 900), (2019, \u0027Pakistan\u0027, 1000), (2020, \u0027Pakistan\u0027, 1200), (2021, \u0027Pakistan\u0027, 1400);", + "sql": "SELECT SUM(production_quantity) FROM gadolinium_production WHERE country IN (\u0027India\u0027, \u0027Pakistan\u0027) AND year \u003e\u003d 2017 AND year \u003c\u003d 2021;", + "sql_explanation": "The SQL query calculates the total production of gadolinium in India and Pakistan for the last 5 years by using the SUM function on the \u0027production_quantity\u0027 column. It filters the records with the WHERE clause to only consider the rows with \u0027India\u0027 or \u0027Pakistan\u0027 as the country and within the specified year range." +}, { + "id": "2221", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average production of Neodymium in 2020 from mines located in Canada?", + "sql_context": "CREATE TABLE mines (id INT, name TEXT, location TEXT, neodymium_production FLOAT); INSERT INTO mines (id, name, location, neodymium_production) VALUES (1, \u0027Mine A\u0027, \u0027Canada\u0027, 120.5), (2, \u0027Mine B\u0027, \u0027Canada\u0027, 150.7), (3, \u0027Mine C\u0027, \u0027USA\u0027, 200.3);", + "sql": "SELECT AVG(neodymium_production) FROM mines WHERE location \u003d \u0027Canada\u0027 AND YEAR(mines.timestamp) \u003d 2020 AND mine_type \u003d \u0027Neodymium\u0027;", + "sql_explanation": "This query calculates the average Neodymium production from Canadian mines in 2020. It filters the mines table by location and year, then calculates the average Neodymium production using the AVG function." +}, { + "id": "2513", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of Lutetium transactions with prices over 70 dollars in European countries.", + "sql_context": "CREATE TABLE lutetium_transactions (country VARCHAR(20), element VARCHAR(20), price DECIMAL(5,2), transaction_date DATE); INSERT INTO lutetium_transactions (country, element, price, transaction_date) VALUES (\u0027France\u0027, \u0027Lutetium\u0027, 80, \u00272020-01-01\u0027), (\u0027Germany\u0027, \u0027Lutetium\u0027, 65, \u00272020-02-01\u0027), (\u0027France\u0027, \u0027Lutetium\u0027, 75, \u00272020-03-01\u0027);", + "sql": "SELECT COUNT(*) FROM lutetium_transactions WHERE country IN (\u0027France\u0027, \u0027Germany\u0027) AND element \u003d \u0027Lutetium\u0027 AND price \u003e 70;", + "sql_explanation": "This query counts the number of Lutetium transactions with prices over 70 dollars in European countries by filtering the lutetium_transactions table for European countries, the Lutetium element, and prices above 70 dollars." +}, { + "id": "2677", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the average Yttrium production in Oceania between 2016 and 2018?", + "sql_context": "CREATE TABLE production (year INT, region VARCHAR(10), element VARCHAR(10), quantity INT); INSERT INTO production (year, region, element, quantity) VALUES (2015, \u0027Oceania\u0027, \u0027Yttrium\u0027, 1200), (2016, \u0027Oceania\u0027, \u0027Yttrium\u0027, 1400), (2017, \u0027Oceania\u0027, \u0027Yttrium\u0027, 1500), (2018, \u0027Oceania\u0027, \u0027Yttrium\u0027, 1700), (2019, \u0027Oceania\u0027, \u0027Yttrium\u0027, 1800);", + "sql": "SELECT AVG(quantity) FROM production WHERE element \u003d \u0027Yttrium\u0027 AND region \u003d \u0027Oceania\u0027 AND year BETWEEN 2016 AND 2018;", + "sql_explanation": "This query calculates the average quantity of Yttrium produced in Oceania between the years 2016 and 2018 by selecting the AVG function of the quantity column for rows where the element is Yttrium, the region is Oceania, and the year is between 2016 and 2018, using the BETWEEN operator." +}, { + "id": "2683", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total Lutetium production from the first quarter of 2017 to the third quarter of 2018.", + "sql_context": "CREATE TABLE Lutetium_Production (Year INT, Quarter INT, Quantity INT); INSERT INTO Lutetium_Production (Year, Quarter, Quantity) VALUES (2016, 4, 90), (2017, 1, 100), (2017, 2, 110), (2017, 3, 120), (2017, 4, 130), (2018, 1, 140), (2018, 2, 150), (2018, 3, 160), (2018, 4, 170);", + "sql": "SELECT SUM(Quantity) FROM Lutetium_Production WHERE (Year \u003d 2017 AND Quarter \u003c\u003d 3) OR (Year \u003d 2018 AND Quarter \u003e\u003d 1);", + "sql_explanation": "The SQL query calculates the total Lutetium production quantity from the first quarter of 2017 to the third quarter of 2018." +}, { + "id": "2874", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average market price of Dysprosium (Dy) and Yttrium (Y) in Q3 2022.", + "sql_context": "CREATE TABLE market_trends (element VARCHAR(2), quarter INT, year INT, price DECIMAL(5,2)); INSERT INTO market_trends VALUES (\u0027Dy\u0027, 3, 2022, 25.6), (\u0027Y\u0027, 3, 2022, 32.1), (\u0027Dy\u0027, 3, 2022, 26.0);", + "sql": "SELECT AVG(price) AS avg_price FROM market_trends WHERE element IN (\u0027Dy\u0027, \u0027Y\u0027) AND quarter \u003d 3 AND year \u003d 2022;", + "sql_explanation": "This SQL query calculates the average market price for Dysprosium (Dy) and Yttrium (Y) in Q3 2022. It filters the market_trends table for the desired elements and time period, then calculates the average price for the matching rows." +}, { + "id": "3115", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records of Dysprosium production in South Korea for 2018 and 2019.", + "sql_context": "CREATE TABLE production (year INT, element VARCHAR(10), country VARCHAR(10), quantity INT); INSERT INTO production (year, element, country, quantity) VALUES (2017, \u0027Dysprosium\u0027, \u0027South Korea\u0027, 1200), (2018, \u0027Dysprosium\u0027, \u0027South Korea\u0027, 1400), (2019, \u0027Dysprosium\u0027, \u0027South Korea\u0027, 1600), (2020, \u0027Dysprosium\u0027, \u0027South Korea\u0027, 1800), (2021, \u0027Dysprosium\u0027, \u0027South Korea\u0027, 2000);", + "sql": "DELETE FROM production WHERE element \u003d \u0027Dysprosium\u0027 AND country \u003d \u0027South Korea\u0027 AND year IN (2018, 2019);", + "sql_explanation": "This query deletes records of Dysprosium production in South Korea for 2018 and 2019. It uses the WHERE clause to target the correct rows." +}, { + "id": "3153", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum Terbium production in 2021 from mines in Australia?", + "sql_context": "CREATE TABLE mines (id INT, name TEXT, location TEXT, terbium_production FLOAT, timestamp DATE); INSERT INTO mines (id, name, location, terbium_production, timestamp) VALUES (1, \u0027Mine A\u0027, \u0027Australia\u0027, 120.5, \u00272021-01-01\u0027), (2, \u0027Mine B\u0027, \u0027Australia\u0027, 150.7, \u00272021-02-01\u0027), (3, \u0027Mine C\u0027, \u0027USA\u0027, 200.3, \u00272021-03-01\u0027);", + "sql": "SELECT MAX(terbium_production) FROM mines WHERE location \u003d \u0027Australia\u0027 AND YEAR(mines.timestamp) \u003d 2021;", + "sql_explanation": "This query calculates the maximum Terbium production from Australian mines in 2021. It filters the mines table by location and year, then calculates the maximum Terbium production using the MAX function." +}, { + "id": "3159", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many tons of Dysprosium were produced in Q1 and Q3 of 2019?", + "sql_context": "CREATE TABLE production (year INT, element VARCHAR(10), quarter INT, quantity INT); INSERT INTO production (year, element, quarter, quantity) VALUES (2019, \u0027Dysprosium\u0027, 1, 1500); INSERT INTO production (year, element, quarter, quantity) VALUES (2019, \u0027Dysprosium\u0027, 3, 1800);", + "sql": "SELECT SUM(quantity) FROM production WHERE year \u003d 2019 AND element \u003d \u0027Dysprosium\u0027 AND quarter IN (1, 3);", + "sql_explanation": "This query calculates the total production of Dysprosium in Q1 and Q3 of 2019. It does so by summing the quantity for all rows where the year is 2019, the element is Dysprosium, and the quarter is either 1 or 3." +}, { + "id": "3470", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total production of Holmium in 2016 and 2017 combined?", + "sql_context": "CREATE TABLE production (element VARCHAR(10), year INT, quantity FLOAT); INSERT INTO production (element, year, quantity) VALUES (\u0027Holmium\u0027, 2015, 100), (\u0027Holmium\u0027, 2016, 150), (\u0027Holmium\u0027, 2017, 200), (\u0027Holmium\u0027, 2018, 250), (\u0027Holmium\u0027, 2019, 300);", + "sql": "SELECT SUM(quantity) FROM production WHERE element \u003d \u0027Holmium\u0027 AND (year \u003d 2016 OR year \u003d 2017);", + "sql_explanation": "This SQL query calculates the total production of Holmium in 2016 and 2017 by using the SUM function on the \u0027quantity\u0027 column, filtering the data for Holmium, and using the OR operator to combine the conditions for 2016 and 2017." +}, { + "id": "3544", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many terbium refineries are there in India and South Africa as of 2021?", + "sql_context": "CREATE TABLE terbium_refineries (country TEXT, num_refineries INT); INSERT INTO terbium_refineries (country, num_refineries) VALUES (\u0027India\u0027, 7), (\u0027South Africa\u0027, 8);", + "sql": "SELECT SUM(num_refineries) FROM terbium_refineries WHERE country IN (\u0027India\u0027, \u0027South Africa\u0027);", + "sql_explanation": "Counts the total number of terbium refineries in India and South Africa as of 2021 by summing the \u0027num_refineries\u0027 column values where \u0027country\u0027 is either \u0027India\u0027 or \u0027South Africa\u0027." +}, { + "id": "3652", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all the distinct producers of Gadolinium from the \u0027production\u0027 table, ordered alphabetically.", + "sql_context": "CREATE TABLE production (element VARCHAR(10), year INT, month INT, producer VARCHAR(20), quantity INT);", + "sql": "SELECT DISTINCT producer FROM production WHERE element \u003d \u0027Gadolinium\u0027 ORDER BY producer ASC;", + "sql_explanation": "The SQL query lists all the distinct producers of Gadolinium by selecting the producer column from the production table where the element is Gadolinium, then using the DISTINCT keyword to select only distinct values. The ORDER BY clause orders the data by producer in ascending order." +}, { + "id": "3666", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average market price of Dysprosium in Q2 and Q3 2022.", + "sql_context": "CREATE TABLE DysprosiumMarketPrices (quarter VARCHAR(10), year INT, price DECIMAL(5,2)); INSERT INTO DysprosiumMarketPrices (quarter, year, price) VALUES (\u0027Q2\u0027, 2022, 260.50), (\u0027Q2\u0027, 2022, 262.30), (\u0027Q3\u0027, 2022, 270.00), (\u0027Q3\u0027, 2022, 268.80);", + "sql": "SELECT AVG(price) FROM DysprosiumMarketPrices WHERE quarter IN (\u0027Q2\u0027, \u0027Q3\u0027) AND year \u003d 2022;", + "sql_explanation": "The SQL query calculates the average price from the DysprosiumMarketPrices table where the quarter is either \u0027Q2\u0027 or \u0027Q3\u0027 and the year is 2022." +}, { + "id": "3719", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of Europium produced in Oceania?", + "sql_context": "CREATE TABLE europium_production (year INT, region VARCHAR(20), quantity INT); INSERT INTO europium_production (year, region, quantity) VALUES (2015, \u0027Australia\u0027, 1000), (2016, \u0027Australia\u0027, 1200), (2015, \u0027New Zealand\u0027, 800), (2016, \u0027New Zealand\u0027, 900);", + "sql": "SELECT SUM(quantity) FROM europium_production WHERE region IN (\u0027Australia\u0027, \u0027New Zealand\u0027);", + "sql_explanation": "This query calculates the total quantity of Europium produced in Oceania by filtering the europium_production table for Oceania regions and then summing the quantity." +}, { + "id": "3759", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total annual production of Terbium from all mines in 2020?", + "sql_context": "CREATE TABLE mine (id INT, name TEXT, location TEXT, Terbium_annual_production FLOAT, timestamp TIMESTAMP); INSERT INTO mine (id, name, location, Terbium_annual_production, timestamp) VALUES (1, \u0027Australian Mine\u0027, \u0027Australia\u0027, 1500.5, \u00272020-01-01\u0027), (2, \u0027Californian Mine\u0027, \u0027USA\u0027, 1700.3, \u00272020-01-01\u0027), (3, \u0027Brazilian Mine\u0027, \u0027Brazil\u0027, 1000.0, \u00272020-01-01\u0027);", + "sql": "SELECT SUM(Terbium_annual_production) FROM mine WHERE EXTRACT(YEAR FROM timestamp) \u003d 2020;", + "sql_explanation": "This query calculates the total annual production of Terbium from all mines in 2020. It does this by filtering the \u0027mine\u0027 table for rows where the year of the timestamp is 2020. It then calculates the sum of the Terbium_annual_production column." +}, { + "id": "3828", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the production quantity for all records of Erbium in 2020 to 125 units.", + "sql_context": "CREATE TABLE Producers (ProducerID INT PRIMARY KEY, Name TEXT, ProductionYear INT, RareEarth TEXT, Quantity INT);", + "sql": "UPDATE Producers SET Quantity \u003d 125 WHERE RareEarth \u003d \u0027Erbium\u0027 AND ProductionYear \u003d 2020;", + "sql_explanation": "The query updates the production quantity for all records of Erbium in 2020 to 125 units." +}, { + "id": "3838", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many Rare earth elements were produced in total between 2015 and 2017?", + "sql_context": "CREATE TABLE RareEarthElements_Production (Year INT, Element VARCHAR(10), Quantity INT); INSERT INTO RareEarthElements_Production (Year, Element, Quantity) VALUES (2015, \u0027Neodymium\u0027, 1200), (2015, \u0027Dysprosium\u0027, 1000), (2015, \u0027Praseodymium\u0027, 800), (2016, \u0027Neodymium\u0027, 1500), (2016, \u0027Dysprosium\u0027, 1200), (2016, \u0027Praseodymium\u0027, 1000), (2017, \u0027Neodymium\u0027, 1800), (2017, \u0027Dysprosium\u0027, 1600), (2017, \u0027Praseodymium\u0027, 1400);", + "sql": "SELECT SUM(Quantity) FROM RareEarthElements_Production WHERE Year BETWEEN 2015 AND 2017;", + "sql_explanation": "This SQL query calculates the total production quantity of Rare earth elements between 2015 and 2017 by summing up the Quantity column where the Year column is between 2015 and 2017." +}, { + "id": "3845", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum quantity of \u0027Lanthanum\u0027 produced in a year by \u0027Canada\u0027?", + "sql_context": "CREATE TABLE production (element VARCHAR(10), country VARCHAR(20), quantity INT, year INT); INSERT INTO production (element, country, quantity, year) VALUES (\u0027Lanthanum\u0027, \u0027Canada\u0027, 7000, 2016), (\u0027Lanthanum\u0027, \u0027Canada\u0027, 8000, 2017), (\u0027Lanthanum\u0027, \u0027Canada\u0027, 9000, 2018), (\u0027Lanthanum\u0027, \u0027Canada\u0027, 10000, 2019), (\u0027Lanthanum\u0027, \u0027Canada\u0027, 11000, 2020), (\u0027Lanthanum\u0027, \u0027Canada\u0027, 12000, 2021);", + "sql": "SELECT MAX(quantity) FROM production WHERE element \u003d \u0027Lanthanum\u0027 AND country \u003d \u0027Canada\u0027;", + "sql_explanation": "The SQL query finds the maximum quantity of Lanthanum produced in a year by Canada using the MAX function." +}, { + "id": "3861", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average price of Dysprosium in 2017 and 2019?", + "sql_context": "CREATE TABLE price_data (element VARCHAR(10), year INT, price DECIMAL(5,2)); INSERT INTO price_data VALUES (\u0027Dysprosium\u0027, 2015, 35.50), (\u0027Dysprosium\u0027, 2016, 36.10), (\u0027Dysprosium\u0027, 2017, 34.90), (\u0027Dysprosium\u0027, 2018, 35.80), (\u0027Dysprosium\u0027, 2019, 36.30);", + "sql": "SELECT AVG(price) FROM price_data WHERE element \u003d \u0027Dysprosium\u0027 AND year IN (2017, 2019);", + "sql_explanation": "The SQL query calculates the average price of Dysprosium for the years 2017 and 2019 by selecting the average price value in the price_data table for the element Dysprosium and the specified years." +}, { + "id": "4212", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum production quantity of Neodymium since 2015?", + "sql_context": "CREATE TABLE production (year INT, element VARCHAR(10), quantity INT); INSERT INTO production (year, element, quantity) VALUES (2015, \u0027Neodymium\u0027, 12000), (2016, \u0027Neodymium\u0027, 11000);", + "sql": "SELECT MIN(quantity) FROM production WHERE element \u003d \u0027Neodymium\u0027 AND year \u003e\u003d 2015", + "sql_explanation": "This query calculates the minimum production quantity of Neodymium since 2015 by selecting the MIN function on the quantity column, filtering the data by the element \u0027Neodymium\u0027 and year greater than or equal to 2015." +}, { + "id": "4241", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average monthly production of Neodymium in 2020 from the \u0027production\u0027 table?", + "sql_context": "CREATE TABLE production (element VARCHAR(10), year INT, month INT, quantity INT);", + "sql": "SELECT AVG(quantity) FROM production WHERE element \u003d \u0027Neodymium\u0027 AND year \u003d 2020;", + "sql_explanation": "The SQL query calculates the average monthly production of Neodymium in 2020 by selecting the quantity column from the production table where the element is Neodymium and the year is 2020, then using the AVG function to find the average." +}, { + "id": "4397", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the minimum production of Cerium in 2019?", + "sql_context": "CREATE TABLE production (year INT, element VARCHAR(10), quantity INT); INSERT INTO production (year, element, quantity) VALUES (2015, \u0027Cerium\u0027, 1200), (2016, \u0027Cerium\u0027, 1400), (2017, \u0027Cerium\u0027, 1500), (2018, \u0027Cerium\u0027, 1700), (2019, \u0027Cerium\u0027, 1300), (2020, \u0027Cerium\u0027, 1400), (2021, \u0027Cerium\u0027, 1500);", + "sql": "SELECT MIN(quantity) FROM production WHERE element \u003d \u0027Cerium\u0027 AND year \u003d 2019;", + "sql_explanation": "This query calculates the minimum quantity of Cerium produced in the year 2019. It does so by selecting the MIN function of the quantity column for rows where the element is Cerium and the year is 2019." +}, { + "id": "4773", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the minimum price of erbium produced in South America.", + "sql_context": "CREATE TABLE erbium_prices (continent VARCHAR(10), price DECIMAL(5,2), year INT); INSERT INTO erbium_prices (continent, price, year) VALUES (\u0027South America\u0027, 250.00, 2020), (\u0027South America\u0027, 240.00, 2019), (\u0027South America\u0027, 260.00, 2018);", + "sql": "SELECT MIN(price) FROM erbium_prices WHERE continent \u003d \u0027South America\u0027;", + "sql_explanation": "The SQL query calculates the minimum price of erbium produced in South America by selecting the price column from the erbium_prices table, filtering the rows with the continent \u0027South America\u0027, and then calculating the minimum using the MIN() function." +}, { + "id": "4799", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the names and locations of Europium producers that started production after 2015.", + "sql_context": "CREATE TABLE europium_production (producer_id INT PRIMARY KEY, name VARCHAR(255), location VARCHAR(255), start_year INT);", + "sql": "SELECT name, location FROM europium_production WHERE start_year \u003e 2015;", + "sql_explanation": "This query selects the name and location columns from the europium_production table, but only for rows where the start_year is greater than 2015." +}, { + "id": "4807", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the production of Dysprosium for 2018 to 28000", + "sql_context": "CREATE TABLE dysprosium_production (year INT, production FLOAT); INSERT INTO dysprosium_production (year, production) VALUES (2015, 8000), (2016, 10000), (2017, 12000), (2018, 24000), (2019, 27000), (2020, 30000);", + "sql": "UPDATE dysprosium_production SET production \u003d 28000 WHERE year \u003d 2018;", + "sql_explanation": "This query updates the production of Dysprosium for the year 2018 in the dysprosium_production table to 28000." +}, { + "id": "4811", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average price of lutetium in South Africa?", + "sql_context": "CREATE TABLE lutetium_prices (country VARCHAR(20), price DECIMAL(5,2), year INT); INSERT INTO lutetium_prices (country, price, year) VALUES (\u0027South Africa\u0027, 120.00, 2018), (\u0027South Africa\u0027, 125.00, 2019), (\u0027South Africa\u0027, 130.00, 2020);", + "sql": "SELECT AVG(price) FROM lutetium_prices WHERE country \u003d \u0027South Africa\u0027;", + "sql_explanation": "This SQL query calculates the average price of lutetium in South Africa by selecting the price column from the lutetium_prices table, filtering the rows with the country being \u0027South Africa\u0027, and calculating the average using the AVG() function." +}, { + "id": "4865", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average production volume for producers located in India?", + "sql_context": "CREATE TABLE producers (id INT, name VARCHAR(255), country VARCHAR(255), production_volume INT); INSERT INTO producers (id, name, country, production_volume) VALUES (1, \u0027ABC Elements Inc.\u0027, \u0027USA\u0027, 350); INSERT INTO producers (id, name, country, production_volume) VALUES (2, \u0027Rare Earth Corp.\u0027, \u0027China\u0027, 1200); INSERT INTO producers (id, name, country, production_volume) VALUES (3, \u0027Earth Elements Ltd.\u0027, \u0027India\u0027, 450);", + "sql": "SELECT AVG(production_volume) FROM producers WHERE country \u003d \u0027India\u0027;", + "sql_explanation": "This query calculates the average production volume for producers located in India by filtering for producers from that country and then calculating the average production volume." +}, { + "id": "4932", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum Praseodymium production in 2018?", + "sql_context": "CREATE TABLE praseodymium_production (country VARCHAR(50), year INT, quantity INT); INSERT INTO praseodymium_production (country, year, quantity) VALUES (\u0027China\u0027, 2018, 75000), (\u0027United States\u0027, 2018, 10000), (\u0027Malaysia\u0027, 2018, 8000), (\u0027India\u0027, 2018, 5000);", + "sql": "SELECT MIN(quantity) FROM praseodymium_production WHERE year \u003d 2018;", + "sql_explanation": "This query finds the minimum Praseodymium production for the year 2018 by selecting the minimum value of the quantity column in the praseodymium_production table for the year 2018." +}, { + "id": "4969", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records of neodymium magnet production from the table \u0027production\u0027 where the year is 2020", + "sql_context": "CREATE TABLE production (id INT, element VARCHAR(10), year INT, quantity INT); INSERT INTO production (id, element, year, quantity) VALUES (1, \u0027neodymium\u0027, 2018, 500), (2, \u0027neodymium\u0027, 2019, 600), (3, \u0027neodymium\u0027, 2020, 700), (4, \u0027neodymium\u0027, 2021, 800);", + "sql": "DELETE FROM production WHERE element \u003d \u0027neodymium\u0027 AND year \u003d 2020;", + "sql_explanation": "*This SQL query deletes the records of neodymium magnet production from the table \u0027production\u0027 where the year is 2020.*" +}, { + "id": "5083", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum price of terbium produced in Africa?", + "sql_context": "CREATE TABLE terbium_prices (continent VARCHAR(10), price DECIMAL(5,2), year INT); INSERT INTO terbium_prices (continent, price, year) VALUES (\u0027Africa\u0027, 320.00, 2020), (\u0027Africa\u0027, 310.00, 2019), (\u0027Africa\u0027, 330.00, 2018);", + "sql": "SELECT MIN(price) FROM terbium_prices WHERE continent \u003d \u0027Africa\u0027;", + "sql_explanation": "The SQL query calculates the minimum price of terbium produced in Africa by selecting the price column from the terbium_prices table, filtering the rows with the continent \u0027Africa\u0027, and then calculating the minimum using the MIN() function." +}, { + "id": "5473", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records from the environmental_impact table that have a value less than 5 for the impact_score column", + "sql_context": "CREATE TABLE environmental_impact ( id INT PRIMARY KEY, element VARCHAR(10), impact_score INT );", + "sql": "DELETE FROM environmental_impact WHERE impact_score \u003c 5;", + "sql_explanation": "This SQL query deletes all records from the environmental_impact table that have a value less than 5 for the impact_score column. It uses the DELETE statement and specifies the environmental_impact table. The WHERE clause filters the records based on the impact_score." +}, { + "id": "107", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new records for 5 fans who signed up for the soccer team newsletter", + "sql_context": "CREATE TABLE fans (fan_id INT, first_name VARCHAR(50), last_name VARCHAR(50), dob DATE, signed_up_for_newsletter BOOLEAN); INSERT INTO fans (fan_id, first_name, last_name, dob, signed_up_for_newsletter) VALUES (1, \u0027John\u0027, \u0027Doe\u0027, \u00271990-05-01\u0027, false), (2, \u0027Jane\u0027, \u0027Smith\u0027, \u00271985-08-12\u0027, false);", + "sql": "INSERT INTO fans (fan_id, first_name, last_name, dob, signed_up_for_newsletter) VALUES (3, \u0027Michael\u0027, \u0027Lee\u0027, \u00271995-03-25\u0027, true), (4, \u0027Sophia\u0027, \u0027Park\u0027, \u00272000-06-18\u0027, true), (5, \u0027William\u0027, \u0027Johnson\u0027, \u00272005-11-10\u0027, true), (6, \u0027Olivia\u0027, \u0027Kim\u0027, \u00272007-09-22\u0027, true), (7, \u0027Ethan\u0027, \u0027Lee\u0027, \u00272010-02-03\u0027, true);", + "sql_explanation": "This query inserts new records into the fans table for 5 fans who have signed up for the soccer team newsletter. Each fan has a unique fan_id, a first and last name, a date of birth, and a boolean value indicating they have signed up for the newsletter." +}, { + "id": "1057", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many home games did the Paris Saint-Germain win in Ligue 1 during the 2019-2020 season?", + "sql_context": "CREATE TABLE soccer_games(id INT, team VARCHAR(50), league VARCHAR(50), location VARCHAR(50), result VARCHAR(10), year INT); INSERT INTO soccer_games(id, team, league, location, result, year) VALUES (1, \u0027Paris Saint-Germain\u0027, \u0027Ligue 1\u0027, \u0027Parc des Princes\u0027, \u0027Win\u0027, 2019), (2, \u0027Paris Saint-Germain\u0027, \u0027Ligue 1\u0027, \u0027Parc des Princes\u0027, \u0027Win\u0027, 2019), (3, \u0027Paris Saint-Germain\u0027, \u0027Ligue 1\u0027, \u0027Parc des Princes\u0027, \u0027Draw\u0027, 2019);", + "sql": "SELECT COUNT(*) FROM soccer_games WHERE team \u003d \u0027Paris Saint-Germain\u0027 AND league \u003d \u0027Ligue 1\u0027 AND location \u003d \u0027Parc des Princes\u0027 AND result \u003d \u0027Win\u0027 AND (year \u003d 2019 OR year \u003d 2020);", + "sql_explanation": "This query calculates the number of home games Paris Saint-Germain won in Ligue 1 during the 2019-2020 season by counting the number of rows that meet the specified conditions. It filters the data to only include games played at Parc des Princes, with a result of \u0027Win\u0027, and in the years 2019 and 2020." +}, { + "id": "1364", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of fans and ticket price for basketball games at \u0027ArenaA\u0027 in Q1 2022?", + "sql_context": "CREATE TABLE ArenaA (game_id INT, game_name VARCHAR(50), date DATE, ticket_price DECIMAL(5,2), fan_age INT, sport VARCHAR(20));", + "sql": "SELECT AVG(fan_age) AS avg_fan_age, AVG(ticket_price) AS avg_ticket_price FROM ArenaA WHERE date \u003e\u003d \u00272022-01-01\u0027 AND date \u003c\u003d \u00272022-03-31\u0027 AND sport \u003d \u0027Basketball\u0027;", + "sql_explanation": "The SQL query calculates the average fan age and ticket price for basketball games at \u0027ArenaA\u0027 in Q1 2022 by filtering the records using the WHERE clause to only include the events from Q1 2022 and with sport type \u0027Basketball\u0027, then applying the AVG aggregate function to the \u0027fan_age\u0027 and \u0027ticket_price\u0027 columns." +}, { + "id": "2484", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record for a fan who made a purchase at a basketball event", + "sql_context": "CREATE TABLE fan_purchases (purchase_id INT, fan_id INT, team VARCHAR(50), event_date DATE, amount DECIMAL(5, 2)); INSERT INTO fan_purchases (purchase_id, fan_id, team, event_date, amount) VALUES (1, 1, \u0027Basketball\u0027, \u00272022-03-01\u0027, 100.00), (2, 2, \u0027Basketball\u0027, \u00272022-03-15\u0027, 150.00);", + "sql": "INSERT INTO fan_purchases (purchase_id, fan_id, team, event_date, amount) VALUES (3, 3, \u0027Basketball\u0027, \u00272022-04-01\u0027, 75.00);", + "sql_explanation": "This query inserts a new record into the fan_purchases table for a fan who made a purchase at a Basketball event. The fan has a unique purchase_id, a fan_id that links to the fans table, the team name, the event date, and the amount of the purchase." +}, { + "id": "2904", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of fans who have attended NFL games from historically underrepresented communities?", + "sql_context": "CREATE TABLE nfl_fans (fan_id INT, name VARCHAR(50), community VARCHAR(50), games_attended INT); INSERT INTO nfl_fans (fan_id, name, community, games_attended) VALUES (1, \u0027John Smith\u0027, \u0027African American\u0027, 5), (2, \u0027Maria Garcia\u0027, \u0027Hispanic\u0027, 3), (3, \u0027James Kim\u0027, \u0027Asian American\u0027, 4);", + "sql": "SELECT SUM(games_attended) FROM nfl_fans WHERE community IN (\u0027African American\u0027, \u0027Hispanic\u0027, \u0027Asian American\u0027);", + "sql_explanation": "This SQL query calculates the total number of fans from historically underrepresented communities who have attended NFL games." +}, { + "id": "3439", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of unique fans who attended games in the last 30 days", + "sql_context": "CREATE TABLE fan_attendance (fan_id INT, game_date DATE);", + "sql": "SELECT COUNT(DISTINCT fan_id) FROM fan_attendance WHERE game_date \u003e\u003d CURDATE() - INTERVAL 30 DAY;", + "sql_explanation": "The SQL query calculates the number of unique fans who attended games in the last 30 days by counting distinct \u0027fan_id\u0027 from \u0027fan_attendance\u0027 table within the specified date range." +}, { + "id": "4022", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average well-being score for athletes who identify as female?", + "sql_context": "CREATE TABLE athletes (athlete_id INT, well_being_score INT, gender VARCHAR(50));", + "sql": "SELECT AVG(athletes.well_being_score) FROM athletes WHERE athletes.gender \u003d \u0027Female\u0027;", + "sql_explanation": "This query calculates the average well-being score for athletes who identify as female by selecting the \u0027well-being_score\u0027 column from the \u0027athletes\u0027 table for the rows where the \u0027gender\u0027 is \u0027Female\u0027 and then calculating the average." +}, { + "id": "4045", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new records for the \u0027Melbourne United\u0027 and \u0027Sydney Kings\u0027 in the \u0027teams\u0027 table with 0 tickets sold.", + "sql_context": "CREATE TABLE teams (id INT, name TEXT, city TEXT); INSERT INTO teams (id, name, city) VALUES (1, \u0027Golden State Warriors\u0027, \u0027San Francisco\u0027), (2, \u0027Los Angeles Lakers\u0027, \u0027LA\u0027), (3, \u0027Brooklyn Nets\u0027, \u0027Brooklyn\u0027), (4, \u0027Toronto Raptors\u0027, \u0027Toronto\u0027), (5, \u0027Philadelphia 76ers\u0027, \u0027Philadelphia\u0027), (6, \u0027LA Clippers\u0027, \u0027LA\u0027), (7, \u0027Melbourne United\u0027, \u0027Melbourne\u0027), (8, \u0027Sydney Kings\u0027, \u0027Sydney\u0027); CREATE TABLE tickets (id INT, team TEXT, home_team TEXT, quantity INT);", + "sql": "INSERT INTO tickets (id, team) VALUES (9, \u0027Melbourne United\u0027), (10, \u0027Sydney Kings\u0027);", + "sql_explanation": "This query inserts new records for the \u0027Melbourne United\u0027 and \u0027Sydney Kings\u0027 in the \u0027teams\u0027 table with 0 tickets sold." +}, { + "id": "4505", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many users have registered from \u0027North America\u0027 in \u0027fan_registration\u0027 table?", + "sql_context": "CREATE TABLE fan_registration (user_id INT, name VARCHAR(255), email VARCHAR(255), country VARCHAR(255), date_registered DATE);", + "sql": "SELECT COUNT(user_id) FROM fan_registration WHERE country \u003d \u0027North America\u0027;", + "sql_explanation": "This query counts the number of users who have registered from North America in the \u0027fan_registration\u0027 table. It uses the WHERE clause to filter the results to only include users from North America and then applies the COUNT function to count the number of user_id." +}, { + "id": "4519", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many fans attended TeamB\u0027s away games?", + "sql_context": "CREATE TABLE attendance (id INT, team VARCHAR(50), location VARCHAR(50), fans INT); INSERT INTO attendance (id, team, location, fans) VALUES (1, \u0027TeamA\u0027, \u0027Home\u0027, 1000), (2, \u0027TeamA\u0027, \u0027Away\u0027, 800), (3, \u0027TeamB\u0027, \u0027Home\u0027, 1200), (4, \u0027TeamB\u0027, \u0027Away\u0027, 1500);", + "sql": "SELECT SUM(fans) FROM attendance WHERE team \u003d \u0027TeamB\u0027 AND location \u003d \u0027Away\u0027;", + "sql_explanation": "This query calculates the sum of the fans column in the attendance table, filters the rows to only those where the team is TeamB and the location is Away." +}, { + "id": "4525", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many fans from the \"Fans\" table live in the state of California and have attended more than 5 games?", + "sql_context": "CREATE TABLE fans (id INT, name VARCHAR(50), state VARCHAR(50), games_attended INT);", + "sql": "SELECT COUNT(*) FROM fans WHERE state \u003d \u0027California\u0027 AND games_attended \u003e 5;", + "sql_explanation": "This SQL query calculates the number of fans in the Fans table who live in the state of California and have attended more than 5 games. It filters for fans based on their state and the number of games they\u0027ve attended, and counts the number of fans that meet the criteria." +}, { + "id": "5114", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of tickets sold for a single game in the \"Boston Celtics\" team?", + "sql_context": "CREATE TABLE tickets(id INT, team VARCHAR(50), game_date DATE, quantity INT);INSERT INTO tickets(id, team, game_date, quantity) VALUES (1, \u0027Boston Celtics\u0027, \u00272022-01-01\u0027, 800), (2, \u0027Boston Celtics\u0027, \u00272022-01-02\u0027, 1000), (3, \u0027Boston Celtics\u0027, \u00272022-02-15\u0027, 1200);", + "sql": "SELECT MAX(quantity) FROM tickets WHERE team \u003d \u0027Boston Celtics\u0027;", + "sql_explanation": "The SQL query calculates the maximum number of tickets sold for a single game in the \"Boston Celtics\" team. It uses the MAX function to find the highest quantity of tickets sold for a single game, and the WHERE clause to filter the results to only include sales for \"Boston Celtics\" games." +}, { + "id": "5190", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all games where the attendance was less than 5000.", + "sql_context": "CREATE TABLE Games (GameID INT, HomeAttendance INT, AwayAttendance INT);", + "sql": "DELETE FROM Games WHERE HomeAttendance + AwayAttendance \u003c 5000;", + "sql_explanation": "The SQL query calculates the total attendance for each game by adding the HomeAttendance and AwayAttendance columns. It then deletes all games where the total attendance is less than 5000 using the DELETE statement." +}, { + "id": "5266", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many fans are from NY in the fan_demographics table?", + "sql_context": "CREATE TABLE fan_demographics (fan_id INTEGER, fan_state TEXT);", + "sql": "SELECT COUNT(*) FROM fan_demographics WHERE fan_state \u003d \u0027NY\u0027;", + "sql_explanation": "This SQL query counts the number of records in the fan_demographics table where the fan_state column is equal to \u0027NY\u0027." +}, { + "id": "5270", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many fans are from CA in the fan_demographics table?", + "sql_context": "CREATE TABLE fan_demographics (fan_id INTEGER, fan_state TEXT);", + "sql": "SELECT COUNT(*) FROM fan_demographics WHERE fan_state \u003d \u0027CA\u0027;", + "sql_explanation": "This SQL query counts the number of records in the fan_demographics table where the fan_state column is equal to \u0027CA\u0027." +}, { + "id": "5327", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many tickets were sold for the \u0027Homecoming\u0027 event in the \u0027prices\u0027 table?", + "sql_context": "CREATE TABLE prices (price_id INT, event VARCHAR(50), price DECIMAL(5,2), quantity INT); INSERT INTO prices (price_id, event, price, quantity) VALUES (1, \u0027Homecoming\u0027, 50.00, 300); INSERT INTO prices (price_id, event, price, quantity) VALUES (2, \u0027Season Finale\u0027, 75.00, 250);", + "sql": "SELECT SUM(quantity) FROM prices WHERE event \u003d \u0027Homecoming\u0027;", + "sql_explanation": "The SQL query calculates the total number of tickets sold for the \u0027Homecoming\u0027 event by using the SUM function on the \u0027quantity\u0027 column and filtering the \u0027event\u0027 column with the \u0027Homecoming\u0027 value." +}, { + "id": "5540", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Count the number of tickets sold for the \"Home Team\" in the \"tickets\" table.", + "sql_context": "CREATE TABLE tickets (id INT, game_id INT, team VARCHAR(50), tickets_sold INT);", + "sql": "SELECT COUNT(*) FROM tickets WHERE team \u003d \u0027Home Team\u0027;", + "sql_explanation": "This query counts the number of tickets sold for the \"Home Team\" by filtering the \"team\" column using the WHERE clause and then counting all records with the COUNT function." +}, { + "id": "5566", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of athletes in the \u0027Soccer\u0027 team in the \u0027athletes\u0027 table?", + "sql_context": "CREATE TABLE athletes (athlete_id INT, name VARCHAR(50), age INT, team VARCHAR(20)); INSERT INTO athletes (athlete_id, name, age, team) VALUES (1, \u0027Jane Doe\u0027, 30, \u0027Soccer\u0027); INSERT INTO athletes (athlete_id, name, age, team) VALUES (2, \u0027Jim Brown\u0027, 28, \u0027Basketball\u0027); INSERT INTO athletes (athlete_id, name, age, team) VALUES (3, \u0027Marie Jones\u0027, 26, \u0027Soccer\u0027);", + "sql": "SELECT AVG(age) FROM athletes WHERE team \u003d \u0027Soccer\u0027;", + "sql_explanation": "The SQL query calculates the average age of athletes in the \u0027Soccer\u0027 team by using the AVG function on the \u0027age\u0027 column and filtering the \u0027team\u0027 column with the \u0027Soccer\u0027 value." +}, { + "id": "5567", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all athletes who scored more than 10 goals", + "sql_context": "athlete_stats", + "sql": "SELECT * FROM athlete_stats WHERE goals_scored \u003e 10;", + "sql_explanation": "This query selects all records from the athlete_stats table where the goals_scored column has a value greater than 10." +}, { + "id": "5636", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List the top 5 goal scorers", + "sql_context": "top_5_goal_scorers", + "sql": "SELECT * FROM top_5_goal_scorers WHERE rank \u003c\u003d 5;", + "sql_explanation": "This query selects the top 5 goal scorers from the top_5_goal_scorers view using the WHERE clause." +}, { + "id": "5679", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List ticket sales for team_id 101", + "sql_context": "CREATE TABLE ticket_sales (sale_id INT PRIMARY KEY, team_id INT, sale_date DATE, quantity INT); INSERT INTO ticket_sales (sale_id, team_id, sale_date, quantity) VALUES (1, 101, \u00272022-02-15\u0027, 500);", + "sql": "SELECT * FROM ticket_sales WHERE team_id \u003d 101;", + "sql_explanation": "The SQL statement selects all columns from the \u0027ticket_sales\u0027 table where the team_id is 101." +}, { + "id": "5838", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average age of athletes in the \"athletes\" table.", + "sql_context": "CREATE TABLE athletes (id INT, name VARCHAR(50), age INT, sport VARCHAR(50), country VARCHAR(50)); INSERT INTO athletes (id, name, age, sport, country) VALUES (1, \u0027John Doe\u0027, 30, \u0027Basketball\u0027, \u0027USA\u0027);", + "sql": "SELECT AVG(age) FROM athletes;", + "sql_explanation": "This SQL query calculates the average age of athletes by using the AVG function on the \"age\" column from the \"athletes\" table." +}, { + "id": "1688", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 2 donors to language preservation efforts in Europe?", + "sql_context": "CREATE TABLE contributors_2 (id INT, name TEXT, country TEXT, amount_donated FLOAT); INSERT INTO contributors_2 (id, name, country, amount_donated) VALUES (1, \u0027John Smith\u0027, \u0027UK\u0027, 5000.00), (2, \u0027Jane Doe\u0027, \u0027Germany\u0027, 4000.00), (3, \u0027Amina Mohamed\u0027, \u0027France\u0027, 8000.00);", + "sql": "SELECT name FROM contributors_2 WHERE country IN (\u0027UK\u0027, \u0027Germany\u0027, \u0027France\u0027) AND role \u003d \u0027Language Preservation\u0027 ORDER BY amount_donated DESC LIMIT 2;", + "sql_explanation": "The query selects the name column from the contributors_2 table, filtering by the country column to only include rows for Europe and language preservation donors. It then orders the results by the amount_donated column in descending order and limits the results to the top 2 rows. This gives the top 2 donors to language preservation efforts in Europe." +}, { + "id": "1882", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many indigenous languages are endangered in the Arctic region?", + "sql_context": "CREATE TABLE languages (name VARCHAR(255), region VARCHAR(255), status VARCHAR(255)); INSERT INTO languages (name, region, status) VALUES (\u0027Inuktitut\u0027, \u0027Arctic\u0027, \u0027Vulnerable\u0027); INSERT INTO languages (name, region, status) VALUES (\u0027Sami\u0027, \u0027Arctic\u0027, \u0027Vulnerable\u0027);", + "sql": "SELECT COUNT(*) FROM languages WHERE region \u003d \u0027Arctic\u0027 AND status \u003d \u0027Critically Endangered\u0027 OR status \u003d \u0027Endangered\u0027 OR status \u003d \u0027Vulnerable\u0027;", + "sql_explanation": "Determines the number of indigenous languages that are endangered in the Arctic region. It does this by filtering only the languages in the \u0027languages\u0027 table with region \u0027Arctic\u0027 and status as \u0027Critically Endangered\u0027, \u0027Endangered\u0027, or \u0027Vulnerable\u0027, and selecting the count of those rows." +}, { + "id": "2042", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which indigenous languages in the European culture domain have less than 50,000 speakers and are in danger of extinction?", + "sql_context": "CREATE TABLE EuropeanLanguages (LanguageID int, LanguageName varchar(255), SpeakersCount int, CultureDomain varchar(255), Endangered int); INSERT INTO EuropeanLanguages (LanguageID, LanguageName, SpeakersCount, CultureDomain, Endangered) VALUES (1, \u0027Friulian\u0027, 60000, \u0027European\u0027, 1);", + "sql": "SELECT LanguageName, SpeakersCount FROM EuropeanLanguages WHERE CultureDomain \u003d \u0027European\u0027 AND Endangered \u003d 1 AND SpeakersCount \u003c 50000;", + "sql_explanation": "1. Select the LanguageName and SpeakersCount columns from the EuropeanLanguages table. 2. Filter the rows where CultureDomain is \u0027European\u0027, Endangered is 1 (in danger of extinction), and SpeakersCount is less than 50,000." +}, { + "id": "3274", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the earliest event at each heritage site, grouped by site location?", + "sql_context": "CREATE TABLE HeritageSites (id INT, name VARCHAR(255), location VARCHAR(255), type VARCHAR(255), year_established INT, UNIQUE(id)); CREATE TABLE Events (id INT, name VARCHAR(255), date DATE, HeritageSite_id INT, PRIMARY KEY(id), FOREIGN KEY(HeritageSite_id) REFERENCES HeritageSites(id)); CREATE VIEW EventsPerSite AS SELECT HeritageSites.location AS heritage_site, MIN(Events.date) as min_date FROM Events INNER JOIN HeritageSites ON Events.HeritageSite_id \u003d HeritageSites.id GROUP BY HeritageSites.location;", + "sql": "SELECT EventsPerSite.heritage_site, MIN(EventsPerSite.min_date) as earliest_event FROM EventsPerSite;", + "sql_explanation": "This query finds the earliest event at each heritage site, grouped by site location. It does this by creating a view (EventsPerSite) that joins the HeritageSites and Events table on the HeritageSite_id column and groups the results by the location and the minimum date of the events (MIN(Events.date)). The earliest event is then calculated for each group by finding the minimum date in the min_date column (MIN(EventsPerSite.min_date))." +}, { + "id": "3661", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add a traditional art from \u0027India\u0027 named \u0027Madhubani Painting\u0027", + "sql_context": "CREATE TABLE traditional_arts (id INT PRIMARY KEY, name TEXT, location TEXT);", + "sql": "INSERT INTO traditional_arts (id, name, location) VALUES (1, \u0027Madhubani Painting\u0027, \u0027India\u0027);", + "sql_explanation": "This query inserts a new record into the \u0027traditional_arts\u0027 table with id 1, name \u0027Madhubani Painting\u0027, and location \u0027India\u0027." +}, { + "id": "4176", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many heritage languages in Oceania are considered vulnerable?", + "sql_context": "CREATE TABLE Languages (LanguageID INT PRIMARY KEY, Name VARCHAR(50), Status VARCHAR(20), Region VARCHAR(50)); INSERT INTO Languages (LanguageID, Name, Status, Region) VALUES (1, \u0027English\u0027, \u0027Not Endangered\u0027, \u0027North America\u0027), (2, \u0027Maori\u0027, \u0027Vulnerable\u0027, \u0027Oceania\u0027);", + "sql": "SELECT COUNT(*) FROM Languages WHERE Status \u003d \u0027Vulnerable\u0027 AND Region \u003d \u0027Oceania\u0027;", + "sql_explanation": "Count the number of languages in the Languages table where the status is vulnerable and the region is Oceania." +}, { + "id": "4185", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the status column to \u0027vulnerable\u0027 in the endangered_language table where total_speakers is greater than 10000.", + "sql_context": "endangered_language (language, status, total_speakers)", + "sql": "UPDATE endangered_language SET status \u003d \u0027vulnerable\u0027 WHERE total_speakers \u003e 10000;", + "sql_explanation": "The UPDATE statement modifies records in the endangered_language table, targeting rows where total_speakers is greater than 10000. It sets the status column value to \u0027vulnerable\u0027." +}, { + "id": "4350", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many traditional art forms are being preserved in Europe?", + "sql_context": "CREATE TABLE ArtForms (ArtFormID INT PRIMARY KEY, Name VARCHAR(100), Origin VARCHAR(50), Status VARCHAR(20)); INSERT INTO ArtForms (ArtFormID, Name, Origin, Status) VALUES (1, \u0027Oil Painting\u0027, \u0027Europe\u0027, \u0027Preserved\u0027), (2, \u0027Watercolor\u0027, \u0027Europe\u0027, \u0027Preserved\u0027);", + "sql": "SELECT COUNT(*) FROM ArtForms WHERE Origin \u003d \u0027Europe\u0027 AND Status \u003d \u0027Preserved\u0027;", + "sql_explanation": "Count the number of traditional art forms in the ArtForms table where the origin is Europe and the status is preserved." +}, { + "id": "4475", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the visitors_per_month of Taj Mahal to 25000?", + "sql_context": "CREATE TABLE HeritageSite (name VARCHAR(255), visitors_per_month INT); INSERT INTO HeritageSite (name, visitors_per_month) VALUES (\u0027Taj Mahal\u0027, 20000);", + "sql": "UPDATE HeritageSite SET visitors_per_month \u003d 25000 WHERE name \u003d \u0027Taj Mahal\u0027;", + "sql_explanation": "This query updates the \u0027visitors_per_month\u0027 column in the \u0027HeritageSite\u0027 table to 25000 for the row with the name \u0027Taj Mahal\u0027." +}, { + "id": "4639", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average price of traditional arts and crafts in Africa and Asia.", + "sql_context": "CREATE TABLE ArtsAndCrafts (id INT PRIMARY KEY, location VARCHAR(255), type VARCHAR(255), price DECIMAL(10,2)); INSERT INTO ArtsAndCrafts (id, location, type, price) VALUES (1, \u0027Asia\u0027, \u0027Painting\u0027, 600), (2, \u0027Africa\u0027, \u0027Sculpture\u0027, 900), (3, \u0027Asia\u0027, \u0027Pottery\u0027, 400), (4, \u0027Africa\u0027, \u0027Weaving\u0027, 500);", + "sql": "SELECT AVG(price) FROM ArtsAndCrafts WHERE location IN (\u0027Asia\u0027, \u0027Africa\u0027);", + "sql_explanation": "This SQL query filters the ArtsAndCrafts table by the \u0027location\u0027 column for the values \u0027Asia\u0027 and \u0027Africa\u0027, then calculates the average price of traditional arts and crafts in these regions." +}, { + "id": "4917", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names and languages of all heritage sites in the \u0027Africa\u0027 region?", + "sql_context": "CREATE TABLE heritage_sites (id INT, name VARCHAR(50), location VARCHAR(50), language VARCHAR(50)); INSERT INTO heritage_sites (id, name, location, language) VALUES (1, \u0027Giza Pyramids\u0027, \u0027Egypt\u0027, \u0027Ancient Egyptian\u0027);", + "sql": "SELECT name, language FROM heritage_sites WHERE location \u003d \u0027Africa\u0027;", + "sql_explanation": "This query filters the heritage_sites table to only show records where the location is \u0027Africa\u0027. It then selects the name and language columns to display." +}, { + "id": "5003", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum year when a traditional art was first practiced in Africa?", + "sql_context": "CREATE TABLE first_practiced (id INT, art VARCHAR(255), year INT, country VARCHAR(255)); INSERT INTO first_practiced (id, art, year, country) VALUES (1, \u0027Adinkra\u0027, 1600, \u0027Ghana\u0027), (2, \u0027Udu\u0027, 1500, \u0027Nigeria\u0027);", + "sql": "SELECT MIN(year) FROM first_practiced WHERE country LIKE \u0027Africa%\u0027;", + "sql_explanation": "This SQL query calculates the minimum year when a traditional art was first practiced in Africa by using the MIN aggregation function on the \u0027year\u0027 column and using the WHERE clause with the \u0027Africa%\u0027 pattern to filter the results based on the \u0027country\u0027 column." +}, { + "id": "5045", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the traditional arts that were first inscribed in the Representative List of the Intangible Cultural Heritage of Humanity in 2003.", + "sql_context": "CREATE TABLE UNESCO_Intangible_Heritage (id INT, year INT, art_form VARCHAR(100)); INSERT INTO UNESCO_Intangible_Heritage (id, year, art_form) VALUES (1, 2001, \u0027Argentine Tango\u0027), (2, 2003, \u0027Kilim weaving in Turkey\u0027), (3, 2005, \u0027Falconry, a living human heritage\u0027);", + "sql": "SELECT art_form FROM UNESCO_Intangible_Heritage WHERE year \u003d 2003;", + "sql_explanation": "The SQL query selects all the art forms from the UNESCO_Intangible_Heritage table where the year is 2003, which is the first year that the Representative List of the Intangible Cultural Heritage of Humanity was inscribed in that year." +}, { + "id": "5435", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names of all donors who have donated more than 20 traditional art pieces?", + "sql_context": "CREATE TABLE big_donors(id INT, donor_name TEXT, art_donated INT); INSERT INTO big_donors VALUES (1, \u0027John Doe\u0027, 30), (2, \u0027Jane Smith\u0027, 25), (3, \u0027ABC Org\u0027, 40);", + "sql": "SELECT donor_name FROM big_donors WHERE art_donated \u003e 20;", + "sql_explanation": "This query retrieves the names of all donors who have donated more than 20 traditional art pieces from the \u0027big_donors\u0027 table." +}, { + "id": "5472", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many traditional arts and crafts are there in the Africa region?", + "sql_context": "CREATE TABLE Arts (ArtID INT, ArtName VARCHAR(50), Type VARCHAR(50), Location VARCHAR(50)); INSERT INTO Arts (ArtID, ArtName, Type, Location) VALUES (1, \u0027Batik\u0027, \u0027Textile\u0027, \u0027Nigeria\u0027), (2, \u0027Tie-dye\u0027, \u0027Textile\u0027, \u0027Ghana\u0027);", + "sql": "SELECT COUNT(*) FROM Arts WHERE Location LIKE \u0027Africa%\u0027;", + "sql_explanation": "This query uses the COUNT() function to count the total number of traditional arts and crafts in the Africa region." +}, { + "id": "5634", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the name of the heritage site with ID 3 in Canada?", + "sql_context": "CREATE TABLE HeritageSites (id INT, site_name VARCHAR(100), country VARCHAR(50), site_type VARCHAR(50)); INSERT INTO HeritageSites (id, site_name, country, site_type) VALUES (1, \u0027Sydney Opera House\u0027, \u0027Australia\u0027, \u0027Cultural\u0027), (2, \u0027Great Barrier Reef\u0027, \u0027Australia\u0027, \u0027Natural\u0027), (3, \u0027Head-Smashed-In Buffalo Jump\u0027, \u0027Canada\u0027, \u0027Cultural\u0027);", + "sql": "SELECT site_name FROM HeritageSites WHERE id \u003d 3;", + "sql_explanation": "The SQL query selects the site_name from the HeritageSites table, filtering for the record with id 3." +}, { + "id": "5661", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names and opening hours of all museums in the \u0027culture\u0027 schema?", + "sql_context": "CREATE TABLE museums (name VARCHAR(255), opening_hours VARCHAR(255)); INSERT INTO museums (name, opening_hours) VALUES (\u0027Museum of Natural History\u0027, \u002709:00-17:00\u0027), (\u0027Art Museum\u0027, \u002710:00-18:00\u0027);", + "sql": "SELECT name, opening_hours FROM culture.museums;", + "sql_explanation": "The SQL query selects the name and opening_hours columns from the \u0027museums\u0027 table in the \u0027culture\u0027 schema." +}, { + "id": "5697", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all traditional art forms related to textile craftsmanship.", + "sql_context": "CREATE TABLE art_forms (id INT PRIMARY KEY, name VARCHAR(50), type VARCHAR(30)); INSERT INTO art_forms (id, name, type) VALUES (1, \u0027Throat Singing\u0027, \u0027Music\u0027), (2, \u0027Batik\u0027, \u0027Textile\u0027), (3, \u0027Ikebana\u0027, \u0027Visual Arts\u0027);", + "sql": "DELETE FROM art_forms WHERE type \u003d \u0027Textile\u0027;", + "sql_explanation": "This query deletes rows from the \u0027art_forms\u0027 table with a type of \u0027Textile\u0027." +}, { + "id": "5809", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records with an id greater than 5 from the artifacts table.", + "sql_context": "CREATE TABLE artifacts (id INT, name VARCHAR(50), description TEXT); INSERT INTO artifacts (id, name, description) VALUES (1, \u0027Pottery\u0027, \u0027Ancient pottery from the Mayan civilization\u0027), (2, \u0027Totem pole\u0027, \u0027Wooden totem pole from the Haida nation\u0027), (3, \u0027Woven rug\u0027, \u0027Hand-woven rug from the Navajo tribe\u0027), (4, \u0027Beaded necklace\u0027, \u0027Beaded necklace from the Inuit people\u0027), (5, \u0027Drum\u0027, \u0027Traditional drum from the Apache tribe\u0027);", + "sql": "DELETE FROM artifacts WHERE id \u003e 5;", + "sql_explanation": "This query uses the DELETE statement to remove all records with an id greater than 5 from the artifacts table." +}, { + "id": "1445", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the number of IoT sensors that reported malfunctions in \u0027Precision Irrigation System\u0027 during the first week of August, 2021.", + "sql_context": "CREATE TABLE sensor_data (sensor_id INT, system VARCHAR(20), status VARCHAR(10), report_date DATE); INSERT INTO sensor_data (sensor_id, system, status, report_date) VALUES (1, \u0027Precision Irrigation System\u0027, \u0027malfunction\u0027, \u00272021-08-01\u0027), (2, \u0027Precision Irrigation System\u0027, \u0027working\u0027, \u00272021-08-02\u0027), (3, \u0027Precision Irrigation System\u0027, \u0027malfunction\u0027, \u00272021-08-03\u0027);", + "sql": "SELECT COUNT(*) FROM sensor_data WHERE system \u003d \u0027Precision Irrigation System\u0027 AND status \u003d \u0027malfunction\u0027 AND report_date BETWEEN \u00272021-08-01\u0027 AND \u00272021-08-07\u0027;", + "sql_explanation": "This query counts the number of records in the sensor_data table where the system is Precision Irrigation System, the status is malfunction, and the report_date is within the first week of August 2021." +}, { + "id": "1544", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum temperature recorded in \u0027Field006\u0027 in the past week?", + "sql_context": "CREATE TABLE temp_data (id INT, field_id VARCHAR(10), temperature FLOAT, timestamp TIMESTAMP); INSERT INTO temp_data (id, field_id, temperature, timestamp) VALUES (1, \u0027Field006\u0027, 10.2, \u00272022-02-01 10:00:00\u0027), (2, \u0027Field006\u0027, 8.0, \u00272022-02-03 10:00:00\u0027);", + "sql": "SELECT MIN(temperature) FROM temp_data WHERE field_id \u003d \u0027Field006\u0027 AND timestamp BETWEEN DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 7 DAY) AND CURRENT_TIMESTAMP;", + "sql_explanation": "This query calculates the minimum temperature for \u0027Field006\u0027 by filtering the records between the current time and one week ago, and then applying the MIN function to the temperature column." +}, { + "id": "1560", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average rainfall (mm) in coffee plantations in Colombia during the last 6 months?", + "sql_context": "CREATE TABLE rainfall_data (rainfall DECIMAL(4,2), reading_date DATE, location TEXT); INSERT INTO rainfall_data (rainfall, reading_date, location) VALUES (25.6, \u00272021-07-01\u0027, \u0027Colombia\u0027), (28.3, \u00272021-07-02\u0027, \u0027Colombia\u0027), (22.1, \u00272021-01-01\u0027, \u0027Colombia\u0027);", + "sql": "SELECT AVG(rainfall) FROM rainfall_data WHERE location \u003d \u0027Colombia\u0027 AND reading_date \u003e DATE_SUB(CURDATE(), INTERVAL 6 MONTH) AND location LIKE \u0027%coffee%\u0027;", + "sql_explanation": "This query calculates the average rainfall from the rainfall_data table, filtering for Colombia-based records in the past six months and locations containing the word \u0027coffee\u0027." +}, { + "id": "1725", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many hectares were under irrigation in the Philippines for the past year from satellite imagery analysis?", + "sql_context": "CREATE TABLE if not exists satellite_analysis (id INT, location VARCHAR(255), irrigated_area INT, image_date DATETIME); INSERT INTO satellite_analysis (id, location, irrigated_area, image_date) VALUES (1, \u0027Philippines\u0027, 12000, \u00272022-02-20 09:30:00\u0027), (2, \u0027Malaysia\u0027, 15000, \u00272022-02-20 09:30:00\u0027);", + "sql": "SELECT SUM(irrigated_area) FROM satellite_analysis WHERE location \u003d \u0027Philippines\u0027 AND image_date BETWEEN DATE_SUB(NOW(), INTERVAL 1 YEAR) AND NOW();", + "sql_explanation": "This query calculates the total irrigated area in the Philippines by summing up the irrigated_area values from the satellite_analysis table, filtering for data within the past year." +}, { + "id": "1795", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total irrigated area in South America for the past year from satellite imagery analysis?", + "sql_context": "CREATE TABLE if not exists satellite_analysis (id INT, location VARCHAR(255), irrigated_area INT, image_date DATETIME); INSERT INTO satellite_analysis (id, location, irrigated_area, image_date) VALUES (1, \u0027Brazil\u0027, 50000, \u00272022-01-01 00:00:00\u0027), (2, \u0027Argentina\u0027, 40000, \u00272022-01-01 00:00:00\u0027);", + "sql": "SELECT SUM(irrigated_area) FROM satellite_analysis WHERE location LIKE \u0027South%\u0027 AND image_date BETWEEN DATE_SUB(NOW(), INTERVAL 1 YEAR) AND NOW();", + "sql_explanation": "This query calculates the total irrigated area in South America from the satellite_analysis table, filtering for images from the past year." +}, { + "id": "1826", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many sensors are currently malfunctioning in the fields?", + "sql_context": "CREATE TABLE sensor_status (sensor_id INTEGER, status TEXT, last_reported DATE);", + "sql": "SELECT COUNT(*) as malfunctioning_sensors FROM sensor_status WHERE status \u003d \u0027malfunctioning\u0027 AND last_reported \u003c DATEADD(day, -1, CURRENT_DATE);", + "sql_explanation": "This query calculates the number of sensors that are currently malfunctioning in the fields. It uses the DATEADD function to filter the data to sensors that have not reported data in the past day, and then calculates the count of sensors with a status of \u0027malfunctioning\u0027." +}, { + "id": "1951", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total rainfall in Southeast Asia for the past year from satellite imagery analysis?", + "sql_context": "CREATE TABLE if not exists satellite_data (id INT, location VARCHAR(255), rainfall INT, image_date DATETIME); INSERT INTO satellite_data (id, location, rainfall, image_date) VALUES (1, \u0027Vietnam\u0027, 2000, \u00272022-01-01 00:00:00\u0027), (2, \u0027Cambodia\u0027, 1800, \u00272022-01-01 00:00:00\u0027);", + "sql": "SELECT SUM(rainfall) FROM satellite_data WHERE location LIKE \u0027Southeast%\u0027 AND image_date BETWEEN DATE_SUB(NOW(), INTERVAL 1 YEAR) AND NOW();", + "sql_explanation": "This query calculates the total rainfall in Southeast Asia from the satellite_data table, filtering for images from the past year." +}, { + "id": "2075", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add a new row to the \u0027satellite_images\u0027 table with the given details", + "sql_context": "CREATE TABLE satellite_images (id INT PRIMARY KEY, image_url TEXT, resolution FLOAT, capture_date DATE); INSERT INTO satellite_images (id, image_url, resolution, capture_date) VALUES (1, \u0027https://example.com/image1.jpg\u0027, 0.5, \u00272021-12-25\u0027), (2, \u0027https://example.com/image2.jpg\u0027, 0.7, \u00272021-12-26\u0027), (3, \u0027https://example.com/image3.jpg\u0027, 0.6, \u00272021-12-27\u0027);", + "sql": "INSERT INTO satellite_images (id, image_url, resolution, capture_date) VALUES (4, \u0027https://example.com/image4.jpg\u0027, 0.4, \u00272021-12-28\u0027);", + "sql_explanation": "This query adds a new row to the \u0027satellite_images\u0027 table with the given details: id\u003d4, image_url\u003d\u0027https://example.com/image4.jpg\u0027, resolution\u003d0.4, and capture_date\u003d\u00272021-12-28\u0027." +}, { + "id": "2303", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum and maximum NDVI index value for croplands in Brazil in 2021?", + "sql_context": "CREATE TABLE croplands (id INT, country VARCHAR(255), ndvi_index DECIMAL(3,2), year INT); INSERT INTO croplands (id, country, ndvi_index, year) VALUES (1, \u0027Brazil\u0027, 0.71, 2021); INSERT INTO croplands (id, country, ndvi_index, year) VALUES (2, \u0027Brazil\u0027, 0.77, 2021); INSERT INTO croplands (id, country, ndvi_index, year) VALUES (3, \u0027Brazil\u0027, 0.80, 2021);", + "sql": "SELECT country, MIN(ndvi_index) as min_ndvi, MAX(ndvi_index) as max_ndvi FROM croplands WHERE country \u003d \u0027Brazil\u0027 AND year \u003d 2021;", + "sql_explanation": "We calculate the minimum and maximum NDVI index values for croplands in Brazil in 2021 by selecting the minimum ndvi_index and maximum ndvi_index for the croplands located in Brazil in 2021." +}, { + "id": "2405", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the \"livestock_health\" table with a \"animal_id\" of 4, \"health_status\" of \"healthy\", and \"health_check_timestamp\" of \u00272022-07-03 09:15:00\u0027", + "sql_context": "CREATE TABLE livestock_health (animal_id INT, health_status TEXT, health_check_timestamp TIMESTAMP);", + "sql": "INSERT INTO livestock_health (animal_id, health_status, health_check_timestamp) VALUES (4, \u0027healthy\u0027, \u00272022-07-03 09:15:00\u0027);", + "sql_explanation": "This query inserts a new record into the \"livestock_health\" table with a \"animal_id\" of 4, \"health_status\" of \"healthy\", and \"health_check_timestamp\" of \u00272022-07-03 09:15:00\u0027." +}, { + "id": "2506", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum temperature recorded at farm ID 401 in the past month?", + "sql_context": "CREATE TABLE weather_data (id INT PRIMARY KEY, farm_id INT, record_date DATE, temperature FLOAT, precipitation FLOAT); INSERT INTO weather_data (id, farm_id, record_date, temperature, precipitation) VALUES (1, 401, \u00272022-05-01\u0027, 25.6, 12.3); INSERT INTO weather_data (id, farm_id, record_date, temperature, precipitation) VALUES (2, 401, \u00272022-05-03\u0027, 26.2, 13.1);", + "sql": "SELECT MAX(temperature) FROM weather_data WHERE farm_id \u003d 401 AND record_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH);", + "sql_explanation": "This query calculates the maximum temperature recorded at farm ID 401 in the past month." +}, { + "id": "2515", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum temperature recorded by IoT sensors in Australia in the last week?", + "sql_context": "CREATE TABLE if NOT EXISTS iot_sensors_2 (id int, location varchar(50), temperature float, timestamp datetime); INSERT INTO iot_sensors_2 (id, location, temperature, timestamp) VALUES (1, \u0027Australia\u0027, 18.2, \u00272022-03-15 10:00:00\u0027);", + "sql": "SELECT MIN(temperature) FROM iot_sensors_2 WHERE location \u003d \u0027Australia\u0027 AND timestamp \u003e\u003d DATE_SUB(NOW(), INTERVAL 1 WEEK);", + "sql_explanation": "This query calculates the minimum temperature from the iot_sensors_2 table for the location \u0027Australia\u0027 within the last week." +}, { + "id": "2551", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records from the agricultural_robot_metrics table where battery_level is below 20% and timestamp is older than a month", + "sql_context": "CREATE TABLE agricultural_robot_metrics (robot_id INT, battery_level DECIMAL(3,1), metric_timestamp DATETIME);", + "sql": "DELETE FROM agricultural_robot_metrics WHERE battery_level \u003c 20.0 AND metric_timestamp \u003c\u003d DATEADD(month, -1, GETDATE());", + "sql_explanation": "This query deletes all records from the agricultural_robot_metrics table where the battery_level is below 20% and the metric_timestamp is older than a month. It uses a WHERE clause to filter records based on battery_level and metric_timestamp and then deletes those records." +}, { + "id": "2808", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total area of soybean fields in the \u0027field_sizes\u0027 table that are larger than 500 acres and are located in Brazil.", + "sql_context": "CREATE TABLE field_sizes (crop_type VARCHAR(50), location VARCHAR(50), area_acres FLOAT); INSERT INTO field_sizes (crop_type, location, area_acres) VALUES (\u0027Soybean\u0027, \u0027Brazil\u0027, 650); INSERT INTO field_sizes (crop_type, location, area_acres) VALUES (\u0027Soybean\u0027, \u0027Brazil\u0027, 475);", + "sql": "SELECT SUM(area_acres) FROM field_sizes WHERE crop_type \u003d \u0027Soybean\u0027 AND location \u003d \u0027Brazil\u0027 AND area_acres \u003e 500;", + "sql_explanation": "This SQL query calculates the sum of area_acres from the \u0027field_sizes\u0027 table for the rows where the crop_type is \u0027Soybean\u0027, the location is \u0027Brazil\u0027, and the area_acres is greater than 500." +}, { + "id": "3475", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new weather record with temperature 25, humidity 50, and rainfall 0.1 for station WS010", + "sql_context": "CREATE TABLE weather_data (station_id INT, temperature INT, humidity INT, rainfall FLOAT);", + "sql": "INSERT INTO weather_data (station_id, temperature, humidity, rainfall) VALUES (10, 25, 50, 0.1);", + "sql_explanation": "This SQL query inserts a new record into the weather_data table. It specifies the column names (station_id, temperature, humidity, and rainfall) and the values for those columns (10, 25, 50, and 0.1) for the new record." +}, { + "id": "3552", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the farmland table for a farmland in Brazil with a precision agriculture value of true", + "sql_context": "CREATE TABLE farmland (farmland_id INT, country VARCHAR(255), precision_agriculture BOOLEAN); INSERT INTO farmland (farmland_id, country, precision_agriculture) VALUES (1, \u0027France\u0027, TRUE), (2, \u0027Germany\u0027, FALSE), (3, \u0027China\u0027, TRUE), (4, \u0027Japan\u0027, TRUE);", + "sql": "INSERT INTO farmland (farmland_id, country, precision_agriculture) VALUES (5, \u0027Brazil\u0027, TRUE);", + "sql_explanation": "This SQL query inserts a new record into the \u0027farmland\u0027 table for a farmland in Brazil with a precision agriculture value of true. The \u0027farmland_id\u0027 column is set to 5 for this new record." +}, { + "id": "3751", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new temperature data for sensor 008 on 2023-03-03 with a value of 25°C", + "sql_context": "CREATE TABLE TemperatureData (date DATE, temperature FLOAT, sensor_id INT, FOREIGN KEY (sensor_id) REFERENCES SensorData(sensor_id));", + "sql": "INSERT INTO TemperatureData (date, temperature, sensor_id) VALUES (\u00272023-03-03\u0027, 25, 8);", + "sql_explanation": "The SQL query inserts a new record into the \u0027TemperatureData\u0027 table for sensor 008 with a temperature value of 25°C on March 3, 2023." +}, { + "id": "3899", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the soil_moisture values in the soil_conditions table by adding 5 for all records where the location is \u0027Farm A\u0027", + "sql_context": "CREATE TABLE soil_conditions (location VARCHAR(255), soil_moisture INT, last_updated DATETIME);", + "sql": "UPDATE soil_conditions SET soil_moisture \u003d soil_moisture + 5 WHERE location \u003d \u0027Farm A\u0027;", + "sql_explanation": "This query updates the soil_moisture values in the soil_conditions table by adding 5 for all records where the location is \u0027Farm A\u0027." +}, { + "id": "4100", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which fields have experienced pest issues but have not yet received any pesticide treatment?", + "sql_context": "CREATE TABLE Fields (FieldID varchar(5), FieldName varchar(10), PestIssue bool, PesticideTreatment timestamp); INSERT INTO Fields (FieldID, FieldName, PestIssue, PesticideTreatment) VALUES (\u0027A\u0027, \u0027Field A\u0027, true, \u00272022-06-15 11:30:00\u0027), (\u0027B\u0027, \u0027Field B\u0027, false, null), (\u0027C\u0027, \u0027Field C\u0027, true, \u00272022-06-27 14:15:00\u0027);", + "sql": "SELECT FieldName FROM Fields WHERE PestIssue \u003d true AND PesticideTreatment IS NULL;", + "sql_explanation": "This query selects the field names from the \u0027Fields\u0027 table where the \u0027PestIssue\u0027 column is true and the \u0027PesticideTreatment\u0027 column is null." +}, { + "id": "4155", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average temperature recorded by soil sensors in vineyards located in France?", + "sql_context": "CREATE TABLE vineyard_soil_sensors (id INT, location VARCHAR(255), temperature DECIMAL(5,2), timestamp TIMESTAMP); INSERT INTO vineyard_soil_sensors (id, location, temperature, timestamp) VALUES (1, \u0027France-Champagne\u0027, 15.3, \u00272022-01-01 10:00:00\u0027), (2, \u0027France-Bordeaux\u0027, 14.8, \u00272022-01-01 10:00:00\u0027);", + "sql": "SELECT AVG(temperature) FROM vineyard_soil_sensors WHERE location LIKE \u0027%France%\u0027;", + "sql_explanation": "The SQL query calculates the average temperature from the vineyard_soil_sensors table by filtering the rows with locations containing the string \u0027France\u0027." +}, { + "id": "4378", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify anomalies in soil moisture readings", + "sql_context": "CREATE TABLE soil_moisture (sensor_id TEXT, moisture_level FLOAT, measurement_date DATE); INSERT INTO soil_moisture (sensor_id, moisture_level, measurement_date) VALUES (\u0027Sensor 101\u0027, 45.2, \u00272022-01-01\u0027), (\u0027Sensor 101\u0027, 46.1, \u00272022-01-02\u0027), (\u0027Sensor 101\u0027, 52.0, \u00272022-01-03\u0027);", + "sql": "SELECT sensor_id, moisture_level FROM soil_moisture WHERE moisture_level \u003e 50;", + "sql_explanation": "Detects anomalies in soil moisture readings by filtering records with moisture levels above a specified threshold." +}, { + "id": "4474", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which farms have a total area greater than 200 hectares?", + "sql_context": "CREATE TABLE farm_info (id INT, farm_name VARCHAR(50), location VARCHAR(50), total_area FLOAT); INSERT INTO farm_info (id, farm_name, location, total_area) VALUES (1, \u0027Green acres\u0027, \u0027USA, California\u0027, 150.0);", + "sql": "SELECT farm_name, location, total_area FROM farm_info WHERE total_area \u003e 200;", + "sql_explanation": "This query returns the name, location, and total area of each farm with a total area greater than 200 hectares. It filters the farm_info table by total_area greater than 200." +}, { + "id": "4497", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum temperature recorded in \u0027Greenhouse7\u0027 for the month of September?", + "sql_context": "CREATE TABLE Greenhouse7 (date DATE, temperature FLOAT);", + "sql": "SELECT MIN(temperature) FROM Greenhouse7 WHERE EXTRACT(MONTH FROM date) \u003d 9;", + "sql_explanation": "This query calculates the minimum temperature from the \u0027Greenhouse7\u0027 table for the month of September. It does this by using the MIN function on the \u0027temperature\u0027 column, and filtering the data for September using the EXTRACT function on the \u0027date\u0027 column." +}, { + "id": "4705", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many IoT devices are connected to vineyards in Italy?", + "sql_context": "CREATE TABLE IoT_Devices (id INT, device_type VARCHAR(255), location VARCHAR(255)); INSERT INTO IoT_Devices (id, device_type, location) VALUES (1, \u0027Soil Moisture Sensor\u0027, \u0027Italy Vineyard\u0027), (2, \u0027Temperature Sensor\u0027, \u0027Italy Vineyard\u0027), (3, \u0027Drone\u0027, \u0027Italy\u0027);", + "sql": "SELECT COUNT(*) FROM IoT_Devices WHERE location LIKE \u0027%Italy Vineyard%\u0027;", + "sql_explanation": "This query counts the number of IoT devices connected to vineyards in Italy. It filters data from the IoT_Devices table based on the location using the LIKE operator." +}, { + "id": "4806", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum moisture level recorded for soybean crops in Brazil?", + "sql_context": "CREATE TABLE crop_moisture (id INT, crop VARCHAR(50), moisture_level FLOAT, record_date DATE); INSERT INTO crop_moisture (id, crop, moisture_level, record_date) VALUES (1, \u0027Corn\u0027, 60.5, \u00272022-04-01\u0027), (2, \u0027Soybeans\u0027, 45.2, \u00272022-04-01\u0027), (3, \u0027Cotton\u0027, 70.0, \u00272022-04-01\u0027), (4, \u0027Wheat\u0027, 35.7, \u00272022-04-01\u0027), (5, \u0027Corn\u0027, 62.1, \u00272022-04-02\u0027), (6, \u0027Soybeans\u0027, 47.8, \u00272022-04-02\u0027), (7, \u0027Cotton\u0027, 72.5, \u00272022-04-02\u0027), (8, \u0027Wheat\u0027, 37.3, \u00272022-04-02\u0027), (9, \u0027Corn\u0027, 65.0, \u00272022-04-03\u0027), (10, \u0027Soybeans\u0027, 49.6, \u00272022-04-03\u0027);", + "sql": "SELECT MAX(moisture_level) FROM crop_moisture WHERE crop \u003d \u0027Soybeans\u0027;", + "sql_explanation": "This query calculates the maximum moisture level recorded for soybean crops in Brazil by filtering the crop_moisture table for soybean crops and calculating the maximum moisture_level." +}, { + "id": "5617", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum temperature recorded in \u0027Field D\u0027?", + "sql_context": "CREATE TABLE sensors (sensor_id INT, location VARCHAR(50)); INSERT INTO sensors (sensor_id, location) VALUES (004, \u0027Field D\u0027); CREATE TABLE temps (sensor_id INT, temp FLOAT, timestamp TIMESTAMP); INSERT INTO temps (sensor_id, temp, timestamp) VALUES (004, 29.5, \u00272022-01-01 10:00:00\u0027); INSERT INTO temps (sensor_id, temp, timestamp) VALUES (004, 31.6, \u00272022-01-02 11:00:00\u0027);", + "sql": "SELECT MAX(temp) FROM temps WHERE sensor_id \u003d 004;", + "sql_explanation": "Find the maximum temperature (MAX) recorded in the \u0027temps\u0027 table where sensor_id is 004." +}, { + "id": "5652", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete any weather records where temperature is below -5 degrees Celsius.", + "sql_context": "CREATE TABLE weather_data (id INT, farm_id INT, date DATE, temperature FLOAT, humidity FLOAT); INSERT INTO weather_data (id, farm_id, date, temperature, humidity) VALUES (1, 1, \u00272018-01-01\u0027, -6.0, 80.0); INSERT INTO weather_data (id, farm_id, date, temperature, humidity) VALUES (2, 1, \u00272018-01-02\u0027, -1.0, 75.0);", + "sql": "DELETE FROM weather_data WHERE temperature \u003c -5;", + "sql_explanation": "Delete records in the weather_data table where the temperature is below -5 degrees Celsius." +}, { + "id": "5725", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records from the table \"harvest_data\" where the \"farm_id\" is 3", + "sql_context": "CREATE TABLE harvest_data (id INT PRIMARY KEY, farm_id INT, yield FLOAT, date DATE);", + "sql": "DELETE FROM harvest_data WHERE farm_id \u003d 3;", + "sql_explanation": "This query deletes all records from the \"harvest_data\" table where the \"farm_id\" is 3." +}, { + "id": "699", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of wheelchair accessible and non-accessible vehicles in the fleet?", + "sql_context": "CREATE TABLE vehicles (vehicle_id INT, wheelchair_accessible BOOLEAN); INSERT INTO vehicles VALUES (1, TRUE); INSERT INTO vehicles VALUES (2, FALSE); INSERT INTO vehicles VALUES (3, TRUE); INSERT INTO vehicles VALUES (4, FALSE); INSERT INTO vehicles VALUES (5, TRUE);", + "sql": "SELECT SUM(IF(vehicles.wheelchair_accessible \u003d TRUE, 1, 0)) as wheelchair_accessible_vehicles, SUM(IF(vehicles.wheelchair_accessible \u003d FALSE, 1, 0)) as non_wheelchair_accessible_vehicles FROM vehicles;", + "sql_explanation": "This query calculates the total number of wheelchair accessible vehicles and the total number of non-accessible vehicles in the fleet using the IF function." +}, { + "id": "1661", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum fare for any public transportation in Sydney?", + "sql_context": "CREATE TABLE bus_routes (route_id INT, city VARCHAR(50), fare DECIMAL(5,2)); INSERT INTO bus_routes (route_id, city, fare) VALUES (1, \u0027Sydney\u0027, 4.50), (2, \u0027Sydney\u0027, 3.20); CREATE TABLE train_lines (line_id INT, city VARCHAR(50), fare DECIMAL(5,2)); INSERT INTO train_lines (line_id, city, fare) VALUES (1, \u0027Sydney\u0027, 6.00), (2, \u0027Sydney\u0027, 5.00);", + "sql": "SELECT MAX(greatest(bus_routes.fare, train_lines.fare)) FROM bus_routes, train_lines WHERE bus_routes.city \u003d \u0027Sydney\u0027 AND train_lines.city \u003d \u0027Sydney\u0027;", + "sql_explanation": "This query calculates the maximum fare for any public transportation in Sydney by using the greatest function to get the maximum fare between the bus_routes and train_lines tables. It then filters the results to only include rows where the city is \u0027Sydney\u0027 and calculates the maximum fare using the MAX function." +}, { + "id": "1983", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all transactions that occurred on the Green Line in the last week.", + "sql_context": "CREATE TABLE transactions (transaction_id INT, route_name VARCHAR(255), transaction_date DATE); INSERT INTO transactions (transaction_id, route_name, transaction_date) VALUES (1, \u0027Green Line\u0027, \u00272022-07-01\u0027), (2, \u0027Red Line\u0027, \u00272022-07-02\u0027), (3, \u0027Green Line\u0027, \u00272022-07-05\u0027);", + "sql": "SELECT * FROM transactions WHERE route_name \u003d \u0027Green Line\u0027 AND transaction_date BETWEEN DATE_SUB(CURDATE(), INTERVAL 1 WEEK) AND CURDATE();", + "sql_explanation": "This query lists all transactions that occurred on the Green Line in the last week. It does so by selecting all records in the transactions table where the route_name matches the Green Line and the transaction_date falls within the last week (using the BETWEEN operator and the DATE_SUB function)." +}, { + "id": "2294", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many accessible taxi trips were taken in Sydney in the last year?", + "sql_context": "CREATE TABLE taxi_trips (trip_id INT, trip_date DATE, trip_type VARCHAR(255), accessible BOOLEAN);", + "sql": "SELECT COUNT(*) FROM taxi_trips WHERE trip_type \u003d \u0027accessible\u0027 AND trip_date \u003e\u003d DATEADD(YEAR, -1, GETDATE()) AND city \u003d \u0027Sydney\u0027;", + "sql_explanation": "This query counts the number of accessible taxi trips taken in Sydney in the last year by using the COUNT() function on all rows in the taxi_trips table, filtering for rows where the trip_type is \u0027accessible\u0027 and the trip_date is within the last year using the GETDATE() and DATEADD() functions, and the city is \u0027Sydney\u0027." +}, { + "id": "2328", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the \u0027vehicle_maintenance\u0027 table for a bus with vehicle_id \u0027BUS-123\u0027", + "sql_context": "CREATE TABLE vehicle_maintenance (vehicle_id VARCHAR(20), maintenance_date DATE, maintenance_type VARCHAR(30)); INSERT INTO vehicle_maintenance (vehicle_id, maintenance_date, maintenance_type) VALUES (\u0027TRAIN-456\u0027, \u00272022-01-01\u0027, \u0027Oil Change\u0027), (\u0027TRAIN-789\u0027, \u00272022-02-15\u0027, \u0027Tire Rotation\u0027);", + "sql": "INSERT INTO vehicle_maintenance (vehicle_id, maintenance_date, maintenance_type) VALUES (\u0027BUS-123\u0027, \u00272022-03-01\u0027, \u0027Inspection\u0027);", + "sql_explanation": "This query inserts a new record into the \u0027vehicle_maintenance\u0027 table for a bus with vehicle_id \u0027BUS-123\u0027. It uses the INSERT INTO statement to add a new record to the table with the specified column values." +}, { + "id": "2455", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all the vehicle maintenance records for trams in January 2023.", + "sql_context": "CREATE TABLE tram_maintenance (maintenance_id INT, maintenance_date DATE, maintenance_type VARCHAR(20), vehicle_id INT, vehicle_model VARCHAR(20));", + "sql": "SELECT * FROM tram_maintenance WHERE maintenance_date BETWEEN \u00272023-01-01\u0027 AND \u00272023-01-31\u0027 AND vehicle_model LIKE \u0027%Tram%\u0027;", + "sql_explanation": "Listing all the vehicle maintenance records for trams in January 2023." +}, { + "id": "2726", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of buses and trams in the \u0027south\u0027 region?", + "sql_context": "CREATE TABLE vehicle_counts (region VARCHAR(10), vehicle_type VARCHAR(10), count INT); INSERT INTO vehicle_counts (region, vehicle_type, count) VALUES (\u0027east\u0027, \u0027buses\u0027, 100), (\u0027west\u0027, \u0027buses\u0027, 120), (\u0027north\u0027, \u0027buses\u0027, 150), (\u0027south\u0027, \u0027buses\u0027, 80), (\u0027east\u0027, \u0027trams\u0027, 20), (\u0027west\u0027, \u0027trams\u0027, 30), (\u0027north\u0027, \u0027trams\u0027, 40), (\u0027south\u0027, \u0027trams\u0027, 50);", + "sql": "SELECT SUM(count) FROM vehicle_counts WHERE region \u003d \u0027south\u0027 AND (vehicle_type \u003d \u0027buses\u0027 OR vehicle_type \u003d \u0027trams\u0027);", + "sql_explanation": "This query calculates the total number of buses and trams in the \u0027south\u0027 region by summing the counts of buses and trams in that region." +}, { + "id": "3016", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add a new record to the routes table with a route_id of 3, a route_short_name of \u0027B\u0027, a route_long_name of \u0027Broadway\u0027, and a route_type of 0", + "sql_context": "CREATE TABLE routes (route_id INT, route_short_name VARCHAR(10), route_long_name VARCHAR(255), route_type INT);", + "sql": "INSERT INTO routes (route_id, route_short_name, route_long_name, route_type) VALUES (3, \u0027B\u0027, \u0027Broadway\u0027, 0);", + "sql_explanation": "This query adds a new record to the routes table with a route_id of 3, a route_short_name of \u0027B\u0027, a route_long_name of \u0027Broadway\u0027, and a route_type of 0." +}, { + "id": "3082", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a table named \u0027accessibility\u0027 with columns \u0027station_id\u0027, \u0027wheelchair_accessible\u0027, \u0027elevator_availability", + "sql_context": "CREATE TABLE accessibility (station_id INT, wheelchair_accessible BOOLEAN, elevator_availability BOOLEAN);", + "sql": "CREATE TABLE accessibility (station_id INT, wheelchair_accessible BOOLEAN, elevator_availability BOOLEAN);", + "sql_explanation": "Creates a new table named \u0027accessibility\u0027 with columns \u0027station_id\u0027, \u0027wheelchair_accessible\u0027, and \u0027elevator_availability\u0027. BOOLEAN data types are used to ensure compatibility with different data types." +}, { + "id": "3086", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a table named \u0027stations\u0027 with columns \u0027station_id\u0027, \u0027name\u0027, \u0027latitude\u0027, \u0027longitude\u0027", + "sql_context": "CREATE TABLE stations (station_id INT, name VARCHAR(255), latitude DECIMAL(9,6), longitude DECIMAL(9,6));", + "sql": "CREATE TABLE stations (station_id INT, name VARCHAR(255), latitude DECIMAL(9,6), longitude DECIMAL(9,6));", + "sql_explanation": "Creates a new table named \u0027stations\u0027 with columns \u0027station_id\u0027, \u0027name\u0027, \u0027latitude\u0027, and \u0027longitude\u0027. Integer, VARCHAR, and DECIMAL data types are used to ensure compatibility with different data types." +}, { + "id": "3315", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show ferry routes with bike racks for the \u0027South Harbor\u0027", + "sql_context": "CREATE TABLE ferries (id INT PRIMARY KEY, route_id INT, station VARCHAR(20), bike_racks BOOLEAN);", + "sql": "SELECT DISTINCT route_id, station FROM ferries WHERE station \u003d \u0027South Harbor\u0027 AND bike_racks \u003d TRUE;", + "sql_explanation": "The query shows ferry routes with bike racks for the \u0027South Harbor\u0027. It uses the DISTINCT keyword to list unique ferry routes and the WHERE clause to filter the station and bike_racks." +}, { + "id": "3847", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the \u0027vehicles\u0027 table for a train with license plate \u0027DEF456\u0027 and a model year of 2018", + "sql_context": "CREATE TABLE vehicles (id INT, license_plate TEXT, model_year INT, type TEXT);", + "sql": "INSERT INTO vehicles (license_plate, model_year, type) VALUES (\u0027DEF456\u0027, 2018, \u0027train\u0027);", + "sql_explanation": "* This SQL query inserts a new record into the \u0027vehicles\u0027 table for a train with license plate \u0027DEF456\u0027 and a model year of 2018." +}, { + "id": "3932", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum distance between two subway stations in New York City?", + "sql_context": "CREATE TABLE subway_stations (station_id INT, station_name VARCHAR(255), city VARCHAR(255), distance_to_next_station INT);", + "sql": "SELECT MAX(distance_to_next_station) FROM subway_stations WHERE city \u003d \u0027New York City\u0027;", + "sql_explanation": "This query calculates the maximum distance between two subway stations in New York City by using the MAX() function on the distance_to_next_station column of the subway_stations table, filtering for rows where the city is \u0027New York City\u0027." +}, { + "id": "4116", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total fare collected from the Blue Line on February 14th, 2022?", + "sql_context": "CREATE TABLE routes (route_id INT, route_name VARCHAR(255)); INSERT INTO routes (route_id, route_name) VALUES (1, \u0027Red Line\u0027), (2, \u0027Blue Line\u0027); CREATE TABLE fares (fare_id INT, route_id INT, fare_amount DECIMAL(5,2), fare_date DATE); INSERT INTO fares (fare_id, route_id, fare_amount, fare_date) VALUES (1, 1, 3.50, \u00272022-01-03\u0027), (2, 2, 4.25, \u00272022-02-14\u0027);", + "sql": "SELECT SUM(fare_amount) FROM fares WHERE route_id \u003d 2 AND fare_date \u003d \u00272022-02-14\u0027;", + "sql_explanation": "This query calculates the total fare collected from the Blue Line (route_id 2) on February 14th, 2022 by summing the fare_amount where the route_id is 2 and the fare_date is \u00272022-02-14\u0027." +}, { + "id": "4346", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the fare for \u0027child\u0027 passengers in the \u0027green\u0027 line.", + "sql_context": "CREATE TABLE fares (line VARCHAR(10), passenger_type VARCHAR(10), fare FLOAT);", + "sql": "UPDATE fares SET fare \u003d 2.00 WHERE line \u003d \u0027green\u0027 AND passenger_type \u003d \u0027child\u0027;", + "sql_explanation": "This query updates the fare for \u0027child\u0027 passengers in the \u0027green\u0027 line to 2.00." +}, { + "id": "4654", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum fare collected for the \u0027Red Line\u0027 during any given month?", + "sql_context": "CREATE TABLE fare_by_month (route_name VARCHAR(50), month_year DATE, fare_amount DECIMAL(10,2)); INSERT INTO fare_by_month (route_name, month_year, fare_amount) VALUES (\u0027Red Line\u0027, \u00272022-01-01\u0027, 100.00), (\u0027Red Line\u0027, \u00272022-02-01\u0027, 110.00), (\u0027Red Line\u0027, \u00272022-03-01\u0027, 95.00);", + "sql": "SELECT MIN(fare_amount) FROM fare_by_month WHERE route_name \u003d \u0027Red Line\u0027;", + "sql_explanation": "This query calculates the minimum fare collected for the \u0027Red Line\u0027 during any given month by selecting the minimum value of the \u0027fare_amount\u0027 column for rows with \u0027route_name\u0027 equal to \u0027Red Line\u0027." +}, { + "id": "4714", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many trains in Tokyo have a delay greater than 5 minutes?", + "sql_context": "CREATE TABLE trains (id INT, city VARCHAR(50), delay TIME); INSERT INTO trains (id, city, delay) VALUES (1, \u0027Tokyo\u0027, \u002700:07\u0027), (2, \u0027Tokyo\u0027, \u002700:03\u0027), (3, \u0027Paris\u0027, \u002700:10\u0027), (4, \u0027Paris\u0027, \u002700:02\u0027);", + "sql": "SELECT COUNT(*) FROM trains WHERE city \u003d \u0027Tokyo\u0027 AND delay \u003e \u002700:05:00\u0027;", + "sql_explanation": "The SQL query counts the number of trains in Tokyo with a delay greater than 5 minutes by using the COUNT function, filtering the data where the \u0027city\u0027 column is \u0027Tokyo\u0027 and the \u0027delay\u0027 column is greater than \u002700:05:00\u0027." +}, { + "id": "4756", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the latest service date for the \u0027Body Shop\u0027?", + "sql_context": "CREATE TABLE maintenance (workshop VARCHAR(20), service_date DATE); INSERT INTO maintenance (workshop, service_date) VALUES (\u0027Maintenance\u0027, \u00272021-03-15\u0027), (\u0027Body Shop\u0027, \u00272021-03-17\u0027), (\u0027Maintenance\u0027, \u00272021-03-20\u0027), (\u0027Tires\u0027, \u00272021-03-22\u0027), (\u0027Maintenance\u0027, \u00272021-03-30\u0027), (\u0027Body Shop\u0027, \u00272021-03-31\u0027);", + "sql": "SELECT MAX(service_date) FROM maintenance WHERE workshop \u003d \u0027Body Shop\u0027;", + "sql_explanation": "This SQL query finds the latest service date for the \u0027Body Shop\u0027 by filtering the records based on the \u0027workshop\u0027 column and then finding the maximum value of the \u0027service_date\u0027 column." +}, { + "id": "4768", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum fare for train and bus services?", + "sql_context": "CREATE TABLE fares (fare_id INT, mode_id INT, fare_amount DECIMAL(5,2)); INSERT INTO fares VALUES (1, 1, 2.50); INSERT INTO fares VALUES (2, 1, 3.00); INSERT INTO fares VALUES (3, 2, 1.75);", + "sql": "SELECT MAX(fare_amount) as max_fare FROM fares WHERE mode_id IN (1, 2);", + "sql_explanation": "Calculate the maximum fare_amount from the fares table where mode_id is either 1 (Train) or 2 (Bus) to get the maximum fare for train and bus services." +}, { + "id": "4914", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update rider \u0027Liam Johnson\u0027s\u0027 address to \u0027789 Oak St\u0027", + "sql_context": "CREATE TABLE riders (rider_id INT, name VARCHAR(255), address VARCHAR(255)); INSERT INTO riders (rider_id, name, address) VALUES (1, \u0027John Smith\u0027, \u0027456 Elm St\u0027), (2, \u0027Jane Doe\u0027, \u0027742 Pine St\u0027), (3, \u0027Liam Johnson\u0027, \u0027321 Maple St\u0027);", + "sql": "UPDATE riders SET address \u003d \u0027789 Oak St\u0027 WHERE name \u003d \u0027Liam Johnson\u0027;", + "sql_explanation": "This query updates rider \u0027Liam Johnson\u0027s\u0027 address to \u0027789 Oak St\u0027. It does this by using an UPDATE statement to change the address for \u0027Liam Johnson\u0027 in the riders table." +}, { + "id": "5028", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total fare collected from buses in \u0027south\u0027 and \u0027west\u0027 regions?", + "sql_context": "CREATE TABLE bus_fares (region VARCHAR(10), fare DECIMAL(5,2)); INSERT INTO bus_fares (region, fare) VALUES (\u0027north\u0027, 2.00), (\u0027north\u0027, 2.50), (\u0027south\u0027, 1.50), (\u0027west\u0027, 3.00), (\u0027west\u0027, 2.75);", + "sql": "SELECT SUM(fare) FROM bus_fares WHERE region IN (\u0027south\u0027, \u0027west\u0027);", + "sql_explanation": "This query calculates the total fare collected from buses in the \u0027south\u0027 and \u0027west\u0027 regions by summing up all the fares for those regions." +}, { + "id": "5057", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record for a route transfer from the 73 bus to the Red Line.", + "sql_context": "CREATE TABLE Transfers (route VARCHAR(20), transfer VARCHAR(20)); INSERT INTO Transfers (route, transfer) VALUES (\u00271\u0027, \u0027Red Line\u0027), (\u00279\u0027, \u0027Red Line\u0027);", + "sql": "INSERT INTO Transfers (route, transfer) VALUES (\u002773\u0027, \u0027Red Line\u0027);", + "sql_explanation": "This query inserts a new record into the \u0027Transfers\u0027 table, adding a route transfer from the 73 bus to the Red Line." +}, { + "id": "5157", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum fare for train trips in London?", + "sql_context": "CREATE TABLE if not exists london_train_trips (id INT, trip_id INT, fare DECIMAL(5,2), route_id INT, vehicle_id INT, timestamp TIMESTAMP);", + "sql": "SELECT MIN(fare) FROM london_train_trips WHERE fare IS NOT NULL;", + "sql_explanation": "This query calculates the minimum fare for train trips in London. It does so by selecting the MIN function on the \u0027fare\u0027 column from the \u0027london_train_trips\u0027 table, where the \u0027fare\u0027 is not null." +}, { + "id": "5247", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum fare for the commuter rail in Boston?", + "sql_context": "CREATE TABLE fares (fare_id INT, route_id INT, fare DECIMAL(5,2), fare_type VARCHAR(20)); INSERT INTO fares (fare_id, route_id, fare, fare_type) VALUES (1, 1, 8.00, \u0027Commuter Rail\u0027), (2, 2, 6.50, \u0027Subway\u0027), (3, 3, 9.00, \u0027Commuter Rail\u0027);", + "sql": "SELECT MIN(fare) FROM fares WHERE fare_type \u003d \u0027Commuter Rail\u0027;", + "sql_explanation": "This query finds the minimum fare for the commuter rail in Boston. It uses the MIN aggregation function to find the lowest value in the fare column for records in the fares table where the fare_type is \u0027Commuter Rail\u0027." +}, { + "id": "5498", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average fare for buses in the \u0027Southside\u0027 region?", + "sql_context": "CREATE TABLE Buses (route_id INT, region VARCHAR(20), fare DECIMAL(5,2)); INSERT INTO Buses (route_id, region, fare) VALUES (1, \u0027Southside\u0027, 1.50), (2, \u0027Northpoint\u0027, 2.00), (3, \u0027Southside\u0027, 2.50);", + "sql": "SELECT AVG(fare) FROM Buses WHERE region \u003d \u0027Southside\u0027;", + "sql_explanation": "The SQL query calculates the average fare for buses in the \u0027Southside\u0027 region by using the AVG function on the \u0027fare\u0027 column, filtering by the \u0027region\u0027 column with the value \u0027Southside\u0027." +}, { + "id": "5549", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all routes with accessibility features.", + "sql_context": "CREATE TABLE route (route_id INT, route_name VARCHAR(50), accessible BOOLEAN); INSERT INTO route (route_id, route_name, accessible) VALUES (1, \u0027Red Line\u0027, TRUE), (2, \u0027Green Line\u0027, FALSE), (3, \u0027Blue Line\u0027, TRUE), (4, \u0027Yellow Line\u0027, FALSE);", + "sql": "SELECT route_name FROM route WHERE accessible \u003d TRUE;", + "sql_explanation": "This query lists all routes with accessibility features by selecting the route_name from the route table where accessible equals TRUE." +}, { + "id": "5565", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many trams are there in total in the city of Melbourne?", + "sql_context": "CREATE TABLE trams (id INT, city VARCHAR(20), model VARCHAR(20)); INSERT INTO trams (id, city, model) VALUES (1, \u0027Melbourne\u0027, \u0027Citadis\u0027), (2, \u0027Melbourne\u0027, \u0027Flexity\u0027), (3, \u0027Sydney\u0027, \u0027Citadis\u0027);", + "sql": "SELECT COUNT(*) FROM trams WHERE city \u003d \u0027Melbourne\u0027;", + "sql_explanation": "This query counts the number of trams in the city of Melbourne. It filters the results to only include rows where the city is \u0027Melbourne\u0027 and then counts the number of rows using the COUNT function." +}, { + "id": "5614", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete the \u0027Blue Line\u0027 route", + "sql_context": "CREATE TABLE routes (route_id INT, route_name VARCHAR(255)); INSERT INTO routes (route_id, route_name) VALUES (1, \u0027Green Line\u0027), (2, \u0027Red Line\u0027), (3, \u0027Blue Line\u0027);", + "sql": "DELETE FROM routes WHERE route_name \u003d \u0027Blue Line\u0027;", + "sql_explanation": "This query deletes the \u0027Blue Line\u0027 route. It does this by using a DELETE statement to remove the \u0027Blue Line\u0027 route from the routes table." +}, { + "id": "5659", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum fare for buses in the \u0027city\u0027 schema, excluding fares greater than $3?", + "sql_context": "CREATE SCHEMA city; CREATE TABLE city.buses (id INT, fare DECIMAL); INSERT INTO city.buses (id, fare) VALUES (1, 2.50), (2, 1.75), (3, 3.00);", + "sql": "SELECT MIN(fare) FROM city.buses WHERE fare \u003c 3;", + "sql_explanation": "The SQL query calculates the minimum fare for buses in the \u0027city\u0027 schema, excluding fares greater than $3. It uses the MIN function to find the lowest value of the \u0027fare\u0027 column, and the WHERE clause to filter out records with fares greater than $3." +}, { + "id": "5682", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which metro lines in Barcelona have the least number of passengers?", + "sql_context": "CREATE TABLE MetroLines (LineID int, Passengers int); INSERT INTO MetroLines (LineID, Passengers) VALUES (1, 1000), (2, 800), (3, 800);", + "sql": "SELECT LineID, MIN(Passengers) FROM MetroLines;", + "sql_explanation": "This query retrieves the metro lines with the least number of passengers by using the MIN() function to find the minimum number of passengers and selecting the LineID column." +}, { + "id": "5699", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records from the \u0027routes\u0027 table where the \u0027route_type\u0027 is \u0027Tram\u0027", + "sql_context": "CREATE TABLE routes (id INT, route_number INT, route_type TEXT, city TEXT);", + "sql": "DELETE FROM routes WHERE route_type \u003d \u0027Tram\u0027;", + "sql_explanation": "* This SQL query deletes all records from the \u0027routes\u0027 table where the \u0027route_type\u0027 is \u0027Tram\u0027." +}, { + "id": "5797", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the fare collected from each vehicle type?", + "sql_context": "CREATE TABLE Fares (id INT, vehicle_type VARCHAR(10), fare DECIMAL(5,2)); INSERT INTO Fares (id, vehicle_type, fare) VALUES (1, \u0027Bus\u0027, 2.50), (2, \u0027Tram\u0027, 3.00), (3, \u0027Train\u0027, 5.00);", + "sql": "SELECT vehicle_type, fare FROM Fares;", + "sql_explanation": "This query selects all vehicle types and their respective fares from the Fares table." +}, { + "id": "5806", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum fare for a trip on the subway?", + "sql_context": "CREATE TABLE subway_fares (id INT, route VARCHAR(10), fare FLOAT); INSERT INTO subway_fares (id, route, fare) VALUES (1, \u00271\u0027, 2.50), (2, \u00272\u0027, 3.25), (3, \u00273\u0027, 4.00);", + "sql": "SELECT MAX(fare) FROM subway_fares;", + "sql_explanation": "This SQL query finds the maximum fare for a trip on the subway by using the MAX() function to find the highest fare in the subway_fares table." +}, { + "id": "5811", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all unique station names from the stations table", + "sql_context": "CREATE TABLE stations (station_id INTEGER, name TEXT, latitude REAL, longitude REAL); INSERT INTO stations (station_id, name, latitude, longitude) VALUES (1, \u0027Downtown\u0027, 40.7128, -74.0060);", + "sql": "SELECT DISTINCT name FROM stations;", + "sql_explanation": "The query selects all unique station names (DISTINCT) from the stations table." +}, { + "id": "5845", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all \u0027Monorail\u0027 routes", + "sql_context": "CREATE TABLE monorail_routes (route_id INT PRIMARY KEY, start_location TEXT, end_location TEXT);", + "sql": "DELETE FROM monorail_routes;", + "sql_explanation": "This query deletes all records from the monorail_routes table." +}, { + "id": "1251", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the percentage of animals in the \u0027animal_population\u0027 table that are part of a vulnerable or endangered species", + "sql_context": "CREATE TABLE animal_population (species VARCHAR(50), animal_count INT, conservation_status VARCHAR(20));", + "sql": "SELECT 100.0 * SUM(CASE WHEN conservation_status IN (\u0027vulnerable\u0027, \u0027endangered\u0027) THEN animal_count ELSE 0 END) / SUM(animal_count) as percentage FROM animal_population;", + "sql_explanation": "This query calculates the percentage of animals in the animal_population table that are part of a vulnerable or endangered species." +}, { + "id": "2208", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new records into the \u0027CommunityEducation\u0027 table.", + "sql_context": "CREATE TABLE CommunityEducation (EventID int, EventName varchar(50), Attendance int);", + "sql": "INSERT INTO CommunityEducation (EventID, EventName, Attendance) VALUES (1, \u0027Wildlife Conservation\u0027, 100), (2, \u0027Bird Watching\u0027, 75);", + "sql_explanation": "This query inserts two new records into the \u0027CommunityEducation\u0027 table. It uses the VALUES keyword to specify the data for each column in the new records." +}, { + "id": "2285", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the \u0027animal_population\u0027 table for the \u0027elephants\u0027 in the \u0027Amboseli National Park\u0027 with a population of 120", + "sql_context": "CREATE TABLE animal_population (id INT, animal_type VARCHAR(20), habitat_name VARCHAR(30), population INT);", + "sql": "INSERT INTO animal_population (id, animal_type, habitat_name, population) VALUES (1, \u0027elephants\u0027, \u0027Amboseli National Park\u0027, 120);", + "sql_explanation": "1. This query inserts a new record into the \u0027animal_population\u0027 table. 2. The VALUES clause specifies the column values for the new record: id\u003d1, animal_type\u003d\u0027elephants\u0027, habitat_name\u003d\u0027Amboseli National Park\u0027, and population\u003d120." +}, { + "id": "2672", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a table named \u0027community_education\u0027", + "sql_context": "CREATE TABLE community_education (id INT, name VARCHAR(50), city VARCHAR(50), state VARCHAR(2), country VARCHAR(50));", + "sql": "CREATE TABLE community_education (id INT, name VARCHAR(50), city VARCHAR(50), state VARCHAR(2), country VARCHAR(50));", + "sql_explanation": "1. A new table \u0027community_education\u0027 is being created with 5 columns: id (integer), name (varchar(50)), city (varchar(50)), state (varchar(2)), country (varchar(50))." +}, { + "id": "3147", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of animals in all habitats, and how many community education programs are there in total?", + "sql_context": "CREATE TABLE animal_population (id INT, type VARCHAR(50), animals INT); INSERT INTO animal_population (id, type, animals) VALUES (1, \u0027Forest\u0027, 500), (2, \u0027Savannah\u0027, 750), (3, \u0027Wetlands\u0027, 450); CREATE TABLE education (id INT, type VARCHAR(50), programs INT); INSERT INTO education (id, type, programs) VALUES (1, \u0027Forest\u0027, 10), (2, \u0027Savannah\u0027, 5), (3, \u0027Wetlands\u0027, 15);", + "sql": "SELECT SUM(animals) as total_animals, SUM(programs) as total_programs FROM animal_population, education;", + "sql_explanation": "This SQL query uses the animal_population and education tables and the SUM function to calculate the total number of animals and community education programs, respectively." +}, { + "id": "3318", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of funds donated to the \"AfricanReserve\" for lion conservation?", + "sql_context": "CREATE TABLE Donations (donor_id INT, reserve VARCHAR(50), amount DECIMAL(10, 2), purpose VARCHAR(50)); INSERT INTO Donations (donor_id, reserve, amount, purpose) VALUES (1, \u0027AsianReserve\u0027, 500.00, \u0027TigerConservation\u0027), (2, \u0027AfricanReserve\u0027, 300.00, \u0027LionConservation\u0027), (3, \u0027AsianReserve\u0027, 700.00, \u0027TigerConservation\u0027);", + "sql": "SELECT SUM(amount) FROM Donations WHERE reserve \u003d \u0027AfricanReserve\u0027 AND purpose \u003d \u0027LionConservation\u0027;", + "sql_explanation": "This query calculates the total amount of funds donated to the AfricanReserve for lion conservation. It does this by selecting the sum of the amount column where the reserve is AfricanReserve and the purpose is LionConservation." +}, { + "id": "3567", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find animal species with a population greater than 1000 in a specific region", + "sql_context": "CREATE TABLE animal_population (id INT PRIMARY KEY, species VARCHAR(255), population INT, region VARCHAR(255));", + "sql": "SELECT species FROM animal_population WHERE population \u003e 1000 AND region \u003d \u0027African Savannah\u0027;", + "sql_explanation": "This query selects all species from the \u0027animal_population\u0027 table where the population is greater than 1000 and the region is \u0027African Savannah\u0027." +}, { + "id": "4056", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the budget for \u0027project_a\u0027 and \u0027project_b\u0027 combined?", + "sql_context": "CREATE TABLE projects_2 (project_id INT, project_name VARCHAR(50), budget DECIMAL(10,2)); INSERT INTO projects_2 VALUES (1, \u0027project_a\u0027, 50000.00); INSERT INTO projects_2 VALUES (2, \u0027project_b\u0027, 75000.00); INSERT INTO projects_2 VALUES (3, \u0027project_c\u0027, 60000.00);", + "sql": "SELECT SUM(budget) FROM projects_2 WHERE project_name IN (\u0027project_a\u0027, \u0027project_b\u0027);", + "sql_explanation": "This query calculates the total budget for projects a and b by summing up the budget values for these projects, using the IN keyword to specify the project names." +}, { + "id": "4151", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of the total population for each species in the African conservation programs?", + "sql_context": "CREATE TABLE african_animals (species VARCHAR(50), animal_count INT); INSERT INTO african_animals (species, animal_count) VALUES (\u0027Lions\u0027, 350), (\u0027Elephants\u0027, 600), (\u0027Giraffes\u0027, 250);", + "sql": "SELECT species, animal_count/SUM(animal_count) as percentage FROM african_animals;", + "sql_explanation": "This query calculates the percentage of the total population for each species in the african_animals table." +}, { + "id": "4179", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of all cheetahs in the San Diego Zoo?", + "sql_context": "CREATE TABLE Cheetah (Id INT, Name VARCHAR(50), Age INT, Gender VARCHAR(10)); INSERT INTO Cheetah (Id, Name, Age, Gender) VALUES (1, \u0027Cheetie\u0027, 8, \u0027Female\u0027); INSERT INTO Cheetah (Id, Name, Age, Gender) VALUES (2, \u0027Speedy\u0027, 6, \u0027Male\u0027);", + "sql": "SELECT AVG(Age) FROM Cheetah WHERE Name \u003d \u0027San Diego Zoo\u0027 AND Species \u003d \u0027Cheetah\u0027;", + "sql_explanation": "This SQL query calculates the average age of cheetahs in the San Diego Zoo by using the AVG function on the Age column, filtering the Cheetah table by the specified name and species." +}, { + "id": "4246", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all animal species and their populations, sorted by population in ascending order", + "sql_context": "CREATE TABLE animal_population (id INT, animal_species VARCHAR(50), population INT); INSERT INTO animal_population (id, animal_species, population) VALUES (1, \u0027Tiger\u0027, 2000), (2, \u0027Elephant\u0027, 5000), (3, \u0027Giraffe\u0027, 8000), (4, \u0027Tiger\u0027, 3000);", + "sql": "SELECT animal_species, population FROM animal_population ORDER BY population ASC;", + "sql_explanation": "This query lists all animal species and their populations in ascending order by selecting the \u0027animal_species\u0027 and \u0027population\u0027 columns from the \u0027animal_population\u0027 table and sorting the results by \u0027population\u0027 in ascending order." +}, { + "id": "4251", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records from the \u0027community_education\u0027 table where the center_name is \u0027Tiger Conservation Society\u0027", + "sql_context": "CREATE TABLE community_education (id INT, center_name VARCHAR(50), location VARCHAR(50), num_participants INT);", + "sql": "DELETE FROM community_education WHERE center_name \u003d \u0027Tiger Conservation Society\u0027;", + "sql_explanation": "1. This query selects all records from the \u0027community_education\u0027 table. 2. The WHERE clause filters the records to only those with a center_name value of \u0027Tiger Conservation Society\u0027. 3. The DELETE keyword removes these records from the table." +}, { + "id": "4319", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the number of unique animal species in the animal population data.", + "sql_context": "CREATE TABLE animal_population (id INT, animal_id INT, animal_species VARCHAR(255)); INSERT INTO animal_population (id, animal_id, animal_species) VALUES (1, 101, \u0027Giraffe\u0027), (2, 102, \u0027Zebra\u0027), (3, 103, \u0027Lion\u0027), (4, 104, \u0027Lion\u0027), (5, 105, \u0027Elephant\u0027);", + "sql": "SELECT COUNT(DISTINCT animal_species) AS unique_species FROM animal_population;", + "sql_explanation": "This query calculates the number of unique animal species in the animal_population table using the COUNT and DISTINCT functions." +}, { + "id": "4375", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update animal count in \u0027conservation_program\u0027 for Panda", + "sql_context": "CREATE TABLE conservation_program (id INT PRIMARY KEY, animal_name VARCHAR, num_animals INT); INSERT INTO conservation_program (id, animal_name, num_animals) VALUES (1, \u0027Tiger\u0027, 300), (2, \u0027Panda\u0027, 150), (3, \u0027Rhino\u0027, 70), (4, \u0027Elephant\u0027, 450);", + "sql": "UPDATE conservation_program SET num_animals \u003d 200 WHERE animal_name \u003d \u0027Panda\u0027;", + "sql_explanation": "This query updates the animal count for \u0027Panda\u0027 in the \u0027conservation_program\u0027 table to 200 by updating the \u0027num_animals\u0027 column value." +}, { + "id": "4607", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum and minimum size of protected habitats in square kilometers?", + "sql_context": "CREATE TABLE habitats (id INT, name TEXT, size_km2 FLOAT); INSERT INTO habitats (id, name, size_km2) VALUES (1, \u0027Forest\u0027, 50.3), (2, \u0027Wetlands\u0027, 32.1), (3, \u0027Grasslands\u0027, 87.6);", + "sql": "SELECT MAX(size_km2) as max_size, MIN(size_km2) as min_size FROM habitats;", + "sql_explanation": "This query calculates the maximum and minimum size of protected habitats in square kilometers by using the MAX and MIN aggregate functions to find the largest and smallest values in the size_km2 column of the habitats table." +}, { + "id": "4794", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average population size of \u0027Carnivora\u0027 species?", + "sql_context": "CREATE TABLE AnimalData(species VARCHAR(20), population INT, order_class VARCHAR(20)); INSERT INTO AnimalData VALUES (\u0027Lion\u0027, 50, \u0027Carnivora\u0027), (\u0027Tiger\u0027, 30, \u0027Carnivora\u0027), (\u0027Zebra\u0027, 70, \u0027Herbivora\u0027);", + "sql": "SELECT AVG(population) FROM AnimalData WHERE order_class \u003d \u0027Carnivora\u0027;", + "sql_explanation": "The SQL query uses the AVG function to calculate the average population size of \u0027Carnivora\u0027 species in the \u0027AnimalData\u0027 table." +}, { + "id": "4803", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the community education programs that have not received any donations.", + "sql_context": "CREATE TABLE if NOT EXISTS community_education (program_id INT, program_name VARCHAR(50), donation_count INT); INSERT INTO community_education (program_id, program_name, donation_count) VALUES (1, \u0027Wildlife Conservation 101\u0027, 500); INSERT INTO community_education (program_id, program_name, donation_count) VALUES (2, \u0027Endangered Species Awareness\u0027, 300); INSERT INTO community_education (program_id, program_name, donation_count) VALUES (3, \u0027Habitat Protection Techniques\u0027, 0);", + "sql": "SELECT program_name FROM community_education WHERE donation_count \u003d 0;", + "sql_explanation": "Select the program_name from the community_education table where the donation_count is 0 to list the community education programs that have not received any donations." +}, { + "id": "4819", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total budget allocated for habitat preservation in \u0027Africa\u0027?", + "sql_context": "CREATE TABLE Habitat_Preservation (PreservationID INT, Habitat VARCHAR(20), Budget DECIMAL(10, 2)); INSERT INTO Habitat_Preservation (PreservationID, Habitat, Budget) VALUES (1, \u0027Africa\u0027, 50000.00); INSERT INTO Habitat_Preservation (PreservationID, Habitat, Budget) VALUES (2, \u0027Asia\u0027, 75000.00);", + "sql": "SELECT SUM(Budget) FROM Habitat_Preservation WHERE Habitat \u003d \u0027Africa\u0027;", + "sql_explanation": "This query calculates the total budget allocated for habitat preservation in \u0027Africa\u0027 by using the SUM function on the Budget column." +}, { + "id": "5202", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the population of the \u0027Penguin\u0027 species in the \u0027animals\u0027 table", + "sql_context": "CREATE TABLE animals (id INT PRIMARY KEY, name VARCHAR(50), species VARCHAR(50), population INT);", + "sql": "UPDATE animals SET population \u003d 2000 WHERE species \u003d \u0027Penguin\u0027;", + "sql_explanation": "This SQL query updates the population of the \u0027Penguin\u0027 species in the \u0027animals\u0027 table to 2000." +}, { + "id": "5305", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new animal named Koala with a population of 500 into the \u0027animals\u0027 table.", + "sql_context": "CREATE TABLE animals (id INT PRIMARY KEY, name VARCHAR(50), population INT);", + "sql": "INSERT INTO animals (name, population) VALUES (\u0027Koala\u0027, 500);", + "sql_explanation": "The SQL query adds a new record to the \u0027animals\u0027 table with Koala as the name and 500 as the population using the INSERT INTO statement." +}, { + "id": "5309", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of all volunteers who have led community education programs?", + "sql_context": "CREATE TABLE volunteers (volunteer_id INT, age INT, has_led_program BOOLEAN);", + "sql": "SELECT AVG(age) FROM volunteers WHERE has_led_program \u003d TRUE;", + "sql_explanation": "We\u0027re calculating the average age of volunteers who have led community education programs. We filter the volunteers table to only include those who have led a program (has_led_program \u003d TRUE) and then calculate the average of their ages." +}, { + "id": "5447", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of animals (in the \u0027animals\u0027 table) that have a population size greater than 500?", + "sql_context": "CREATE TABLE animals (id INT, name VARCHAR(50), species VARCHAR(50), population_size INT); INSERT INTO animals (id, name, species, population_size) VALUES (1, \u0027Lion\u0027, \u0027Felidae\u0027, 550);", + "sql": "SELECT COUNT(*) FROM animals WHERE population_size \u003e 500;", + "sql_explanation": "This query counts the total number of animals (in the \u0027animals\u0027 table) that have a population size greater than 500." +}, { + "id": "5599", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of endangered species in the habitats listed in the \"HabitatPreservation\" view?", + "sql_context": "CREATE VIEW HabitatPreservation AS SELECT habitat_id, COUNT(DISTINCT animal_id) AS species_count FROM HabitatAnimals WHERE status \u003d \u0027Endangered\u0027 GROUP BY habitat_id; INSERT INTO HabitatAnimals (habitat_id, animal_id, status) VALUES (1, 1, \u0027Vulnerable\u0027), (1, 2, \u0027Endangered\u0027), (2, 3, \u0027Endangered\u0027), (3, 4, \u0027Critically Endangered\u0027), (3, 5, \u0027Endangered\u0027), (4, 6, \u0027Extinct\u0027);", + "sql": "SELECT MAX(species_count) FROM HabitatPreservation;", + "sql_explanation": "This query calculates the maximum number of endangered species in the habitats listed in the HabitatPreservation view. It does this by selecting the maximum of the species_count column from the HabitatPreservation view." +}, { + "id": "5610", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete the animal \u0027Elephant\u0027 from the database", + "sql_context": "CREATE TABLE habitats (id INT PRIMARY KEY, habitat_type VARCHAR(50)); INSERT INTO habitats (id, habitat_type) VALUES (1, \u0027Forest\u0027); INSERT INTO habitats (id, habitat_type) VALUES (2, \u0027Grassland\u0027); INSERT INTO habitats (id, habitat_type) VALUES (3, \u0027Wetland\u0027); CREATE TABLE animals (id INT PRIMARY KEY, animal_name VARCHAR(50), habitat_id INT, FOREIGN KEY (habitat_id) REFERENCES habitats(id)); INSERT INTO animals (id, animal_name, habitat_id) VALUES (1, \u0027Tiger\u0027, 1); INSERT INTO animals (id, animal_name, habitat_id) VALUES (2, \u0027Elephant\u0027, 2); INSERT INTO animals (id, animal_name, habitat_id) VALUES (3, \u0027Crane\u0027, 3);", + "sql": "DELETE FROM animals WHERE animal_name \u003d \u0027Elephant\u0027;", + "sql_explanation": "This query deletes the \u0027Elephant\u0027 record from the animals table." +}, { + "id": "5670", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many animals are there in the \u0027endangered_species\u0027 table?", + "sql_context": "CREATE TABLE endangered_species (species_id INT, animal_name VARCHAR(50), population INT); INSERT INTO endangered_species (species_id, animal_name, population) VALUES (1, \u0027Giant Panda\u0027, 1800), (2, \u0027Black Rhino\u0027, 5000), (3, \u0027Mountain Gorilla\u0027, 1000);", + "sql": "SELECT SUM(population) FROM endangered_species;", + "sql_explanation": "This SQL query calculates the total number of endangered species by summing up the \u0027population\u0027 column in the \u0027endangered_species\u0027 table." +}, { + "id": "5675", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of animals in the \u0027endangered_species\u0027 table?", + "sql_context": "CREATE TABLE endangered_species (id INT PRIMARY KEY, animal_name VARCHAR, population INT);", + "sql": "SELECT SUM(population) FROM endangered_species;", + "sql_explanation": "This query calculates the total population of all animals in the \u0027endangered_species\u0027 table by summing the \u0027population\u0027 column values." +}, { + "id": "5689", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average population of animals in the \u0027animal_population\u0027 table?", + "sql_context": "CREATE TABLE animal_population (animal_id INT, animal_name VARCHAR(50), population INT); INSERT INTO animal_population (animal_id, animal_name, population) VALUES (1, \u0027Tiger\u0027, 2000), (2, \u0027Elephant\u0027, 5000), (3, \u0027Lion\u0027, 3000);", + "sql": "SELECT AVG(population) FROM animal_population;", + "sql_explanation": "This query calculates the average population of animals in the \u0027animal_population\u0027 table by finding the mean value of the \u0027population\u0027 column." +}, { + "id": "5701", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the programs in the \u0027community_education\u0027 table", + "sql_context": "CREATE TABLE community_education (id INT, program_name VARCHAR(255), target_audience VARCHAR(255), duration_weeks INT);", + "sql": "SELECT program_name FROM community_education;", + "sql_explanation": "1. The SELECT statement is used to retrieve data from a table. 2. \u0027program_name\u0027 is specified in the SELECT clause to only get the program names. 3. The table \u0027community_education\u0027 is specified in the FROM clause to get data from that table." +}, { + "id": "5799", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete a record from the \u0027volunteers\u0027 table", + "sql_context": "CREATE TABLE volunteers (id INT PRIMARY KEY, name VARCHAR(255), age INT, program_id INT);", + "sql": "DELETE FROM volunteers WHERE id \u003d 1;", + "sql_explanation": "The record with id 1 is deleted from the \u0027volunteers\u0027 table." +}, { + "id": "5840", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Count the number of records in the \u0027animals\u0027 table", + "sql_context": "CREATE TABLE animals (id INT PRIMARY KEY, name VARCHAR(100), species VARCHAR(50), population INT); INSERT INTO animals (id, name, species, population) VALUES (1, \u0027Giraffe\u0027, \u0027Mammal\u0027, 30000), (2, \u0027Elephant\u0027, \u0027Mammal\u0027, 5000);", + "sql": "SELECT COUNT(*) FROM animals;", + "sql_explanation": "This counts the number of records in the \u0027animals\u0027 table. The \u0027*\u0027 is a wildcard that selects all records." +}, { + "id": "23", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the market share of \u0027Online Travel Agency A\u0027 compared to \u0027Online Travel Agency B\u0027?", + "sql_context": "CREATE TABLE bookings (booking_id INT, hotel_id INT, agency TEXT, revenue FLOAT);", + "sql": "SELECT (SUM(CASE WHEN agency \u003d \u0027Online Travel Agency A\u0027 THEN revenue ELSE 0 END) / SUM(CASE WHEN agency IN (\u0027Online Travel Agency A\u0027, \u0027Online Travel Agency B\u0027) THEN revenue ELSE 0 END)) * 100 as market_share_A, (SUM(CASE WHEN agency \u003d \u0027Online Travel Agency B\u0027 THEN revenue ELSE 0 END) / SUM(CASE WHEN agency IN (\u0027Online Travel Agency A\u0027, \u0027Online Travel Agency B\u0027) THEN revenue ELSE 0 END)) * 100 as market_share_B FROM bookings;", + "sql_explanation": "This SQL query calculates the market share of Online Travel Agency A and Online Travel Agency B. It does this by using conditional aggregation with the SUM and CASE statement to filter for the revenue of each agency. It then calculates the market share by dividing the revenue of each agency by the total revenue of both agencies." +}, { + "id": "1979", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of OTA bookings for hotels in \u0027Mumbai\u0027 in Q3 2022?", + "sql_context": "CREATE TABLE otas (id INT PRIMARY KEY, hotel_id INT, bookings INT, booking_date DATE);", + "sql": "SELECT SUM(bookings) FROM otas WHERE city \u003d \u0027Mumbai\u0027 AND EXTRACT(QUARTER FROM booking_date) \u003d 3 AND EXTRACT(YEAR FROM booking_date) \u003d 2022;", + "sql_explanation": "The query calculates the sum of OTA bookings for hotels located in \u0027Mumbai\u0027 during Q3 2022." +}, { + "id": "2010", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the hotels in the hotels table that offer either a gym or a spa facility, but not both.", + "sql_context": "CREATE TABLE hotels (hotel_id INT, name VARCHAR(50), facility VARCHAR(50)); INSERT INTO hotels (hotel_id, name, facility) VALUES (1, \u0027Hotel X\u0027, \u0027spa,gym\u0027), (2, \u0027Hotel Y\u0027, \u0027gym\u0027), (3, \u0027Hotel Z\u0027, \u0027spa\u0027);", + "sql": "SELECT * FROM hotels WHERE (facility LIKE \u0027%gym%\u0027 AND facility NOT LIKE \u0027%spa%\u0027) OR (facility LIKE \u0027%spa%\u0027 AND facility NOT LIKE \u0027%gym%\u0027);", + "sql_explanation": "The SQL query selects all columns from the hotels table where the facility column contains either \u0027gym\u0027 or \u0027spa\u0027, but not both. This is achieved using the OR operator and the LIKE and NOT LIKE operators." +}, { + "id": "2199", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of hotels in Paris that have adopted AI technology?", + "sql_context": "CREATE TABLE hotels (hotel_id INT, hotel_name TEXT, city TEXT, ai_adoption BOOLEAN); INSERT INTO hotels (hotel_id, hotel_name, city, ai_adoption) VALUES (1, \u0027Hotel X\u0027, \u0027Paris\u0027, TRUE), (2, \u0027Hotel Y\u0027, \u0027London\u0027, FALSE);", + "sql": "SELECT (COUNT(CASE WHEN ai_adoption \u003d TRUE THEN 1 END) * 100.0 / COUNT(ai_adoption)) AS percentage FROM hotels WHERE city \u003d \u0027Paris\u0027;", + "sql_explanation": "This SQL query calculates the percentage of hotels in Paris that have adopted AI technology. It first counts the number of hotels in Paris where the ai_adoption field is TRUE. Then, it divides this count by the total number of hotels in Paris and multiplies the result by 100.0 to get the percentage." +}, { + "id": "2268", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of OTA bookings in Mexico and Brazil in Q2 2022?", + "sql_context": "CREATE TABLE ota_bookings_2 (booking_id INT, country VARCHAR(255), booking_date DATE); INSERT INTO ota_bookings_2 (booking_id, country, booking_date) VALUES (1, \u0027Mexico\u0027, \u00272022-04-01\u0027), (2, \u0027Brazil\u0027, \u00272022-05-01\u0027), (3, \u0027Mexico\u0027, \u00272022-06-01\u0027);", + "sql": "SELECT COUNT(*) FROM ota_bookings_2 WHERE country IN (\u0027Mexico\u0027, \u0027Brazil\u0027) AND booking_date BETWEEN \u00272022-04-01\u0027 AND \u00272022-06-30\u0027;", + "sql_explanation": "The SQL query counts the number of OTA bookings in Mexico and Brazil in Q2 2022 by filtering the country column to only include Mexico and Brazil and using the BETWEEN operator to filter the booking_date to Q2 2022." +}, { + "id": "2430", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of hotels in the APAC region that adopted AI chatbots?", + "sql_context": "CREATE TABLE hotel_tech (hotel_id INT, hotel_name TEXT, country TEXT, chatbot BOOLEAN);", + "sql": "SELECT COUNT(*) FROM hotel_tech WHERE country IN (\u0027China\u0027, \u0027Japan\u0027, \u0027South Korea\u0027, \u0027India\u0027, \u0027Australia\u0027) AND chatbot \u003d TRUE;", + "sql_explanation": "This query calculates the total number of hotels in the APAC region that adopted AI chatbots. It uses the COUNT function to count the number of records and filters the data by country and chatbot adoption." +}, { + "id": "2523", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue generated by hotel bookings in the APAC region in Q1 2022?", + "sql_context": "CREATE TABLE bookings (booking_id INT, booking_date DATE, region VARCHAR(255), revenue DECIMAL(10,2)); INSERT INTO bookings (booking_id, booking_date, region, revenue) VALUES (1, \u00272022-01-01\u0027, \u0027APAC\u0027, 100), (2, \u00272022-02-01\u0027, \u0027APAC\u0027, 200), (3, \u00272022-03-01\u0027, \u0027APAC\u0027, 300);", + "sql": "SELECT SUM(revenue) FROM bookings WHERE region \u003d \u0027APAC\u0027 AND booking_date \u003e\u003d \u00272022-01-01\u0027 AND booking_date \u003c \u00272022-04-01\u0027;", + "sql_explanation": "This query calculates the total revenue generated by hotel bookings in the APAC region in Q1 2022. It does this by selecting the sum of revenue from the bookings table, filtering for rows where the region is \u0027APAC\u0027 and booking_date is within Q1 2022." +}, { + "id": "2531", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue from the \u0027Luxury\u0027 hotel type for the \u0027Paris\u0027 region in \u00272021\u0027?", + "sql_context": "CREATE TABLE hotels (hotel_type VARCHAR(20), region VARCHAR(20), revenue DECIMAL(10,2)); INSERT INTO hotels (hotel_type, region, revenue) VALUES (\u0027Economy\u0027, \u0027Paris\u0027, 6000.00), (\u0027Luxury\u0027, \u0027Paris\u0027, 12000.00);", + "sql": "SELECT SUM(revenue) FROM hotels WHERE hotel_type \u003d \u0027Luxury\u0027 AND region \u003d \u0027Paris\u0027 AND EXTRACT(YEAR FROM timestamp) \u003d 2021;", + "sql_explanation": "This query calculates the total revenue for \u0027Luxury\u0027 hotel type in the \u0027Paris\u0027 region for the year 2021 by summing the revenue from the \u0027hotels\u0027 table where the hotel_type is \u0027Luxury\u0027, the region is \u0027Paris\u0027, and the year of the timestamp column is 2021." +}, { + "id": "2755", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many virtual tours were engaged in France during the last quarter?", + "sql_context": "CREATE TABLE virtual_tours (tour_id INT, country TEXT, engagement_date DATE); INSERT INTO virtual_tours (tour_id, country, engagement_date) VALUES (1, \u0027France\u0027, \u00272022-01-05\u0027), (2, \u0027Germany\u0027, \u00272022-02-10\u0027), (3, \u0027France\u0027, \u00272022-04-15\u0027);", + "sql": "SELECT COUNT(*) FROM virtual_tours WHERE country \u003d \u0027France\u0027 AND engagement_date \u003e\u003d DATEADD(quarter, -1, GETDATE());", + "sql_explanation": "This query counts the number of virtual tour engagements in France during the last quarter. It does this by using the COUNT function on the * wildcard, filtering the data where the country is France and engagement_date is within the last quarter." +}, { + "id": "2957", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many virtual tours were engaged in the last month for hotels in the US?", + "sql_context": "CREATE TABLE virtual_tours (tour_id INT, hotel_name VARCHAR(255), country VARCHAR(255), engagement_date DATE);", + "sql": "SELECT COUNT(*) FROM virtual_tours WHERE country \u003d \u0027US\u0027 AND engagement_date \u003e\u003d DATEADD(month, -1, GETDATE());", + "sql_explanation": "This query counts the number of virtual tours engaged in the last month for hotels in the US. It filters the data to only include tours in the US and in the last month, and then counts the number of rows that meet these criteria." +}, { + "id": "3181", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of hotels in Africa that have adopted AI technology and offer virtual tours?", + "sql_context": "CREATE TABLE ai_hotels (hotel_id INT, hotel_name TEXT, country TEXT, ai_adoption BOOLEAN, virtual_tour BOOLEAN); INSERT INTO ai_hotels (hotel_id, hotel_name, country, ai_adoption, virtual_tour) VALUES (1, \u0027Hotel X\u0027, \u0027Africa\u0027, true, true), (2, \u0027Hotel Y\u0027, \u0027Europe\u0027, true, false), (3, \u0027Hotel Z\u0027, \u0027Africa\u0027, false, true);", + "sql": "SELECT COUNT(*) FROM ai_hotels WHERE country \u003d \u0027Africa\u0027 AND ai_adoption \u003d true AND virtual_tour \u003d true;", + "sql_explanation": "This query counts the number of rows in the ai_hotels table where the country is \u0027Africa\u0027, ai_adoption is true, and virtual_tour is true." +}, { + "id": "3387", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many virtual tours were added to the database in Q3 of 2021?", + "sql_context": "CREATE TABLE virtual_tours_history (tour_id INT, name TEXT, region TEXT, engagement INT, added_date DATE); INSERT INTO virtual_tours_history (tour_id, name, region, engagement, added_date) VALUES (1, \u0027Tour A\u0027, \u0027Americas\u0027, 1000, \u00272021-01-01\u0027), (2, \u0027Tour B\u0027, \u0027Europe\u0027, 800, \u00272021-07-01\u0027), (3, \u0027Tour C\u0027, \u0027Asia\u0027, 1200, \u00272021-06-01\u0027);", + "sql": "SELECT COUNT(*) FROM virtual_tours_history WHERE added_date BETWEEN \u00272021-07-01\u0027 AND \u00272021-09-30\u0027;", + "sql_explanation": "This query counts the number of virtual tours added to the database in Q3 of 2021 by selecting the count of all virtual tours where the added date is between July 1, 2021 and September 30, 2021." +}, { + "id": "3408", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many virtual tours were added to the database in 2021?", + "sql_context": "CREATE TABLE virtual_tours_history (tour_id INT, name TEXT, region TEXT, engagement INT, added_date DATE); INSERT INTO virtual_tours_history (tour_id, name, region, engagement, added_date) VALUES (1, \u0027Tour A\u0027, \u0027Americas\u0027, 1000, \u00272021-01-01\u0027), (2, \u0027Tour B\u0027, \u0027Europe\u0027, 800, \u00272020-01-01\u0027), (3, \u0027Tour C\u0027, \u0027Asia\u0027, 1200, \u00272021-06-01\u0027);", + "sql": "SELECT COUNT(*) FROM virtual_tours_history WHERE added_date BETWEEN \u00272021-01-01\u0027 AND \u00272021-12-31\u0027;", + "sql_explanation": "This query counts the number of virtual tours added to the database in 2021 by selecting the count of all virtual tours where the added date is between January 1, 2021 and December 31, 2021." +}, { + "id": "3460", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many hotels were adopted by OTAs in the last quarter in the APAC region?", + "sql_context": "CREATE TABLE ota_adoptions (id INT, quarter TEXT, region TEXT, hotel_adoptions INT); INSERT INTO ota_adoptions (id, quarter, region, hotel_adoptions) VALUES (1, \u0027Q1 2022\u0027, \u0027APAC\u0027, 50), (2, \u0027Q2 2022\u0027, \u0027APAC\u0027, 75), (3, \u0027Q1 2022\u0027, \u0027North America\u0027, 60);", + "sql": "SELECT region, hotel_adoptions FROM ota_adoptions WHERE quarter \u003d \u0027Q2 2022\u0027 AND region \u003d \u0027APAC\u0027;", + "sql_explanation": "The SQL query identifies the number of hotels adopted by OTAs in the last quarter (Q2 2022) in the APAC region by selecting all records with \u0027Q2 2022\u0027 as the quarter and \u0027APAC\u0027 as the region." +}, { + "id": "3556", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue generated by \u0027eco-friendly\u0027 hotels in \u0027Asia\u0027 that offer \u0027spa\u0027 facilities?", + "sql_context": "CREATE TABLE hotels(id INT, name TEXT, country TEXT, eco_friendly BOOLEAN, spa BOOLEAN, revenue FLOAT);", + "sql": "SELECT SUM(revenue) FROM hotels WHERE country \u003d \u0027Asia\u0027 AND eco_friendly \u003d TRUE AND spa \u003d TRUE;", + "sql_explanation": "The SQL query calculates the total revenue generated by eco-friendly hotels in Asia that offer spa facilities. It does this by filtering the hotels table for rows where the country is \u0027Asia\u0027, eco_friendly is true, and spa is true. It then calculates the sum of the revenue column for those rows." +}, { + "id": "3772", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new virtual tour record for a hotel in the \u0027Asia\u0027 region on 2023-03-15.", + "sql_context": "CREATE TABLE tour_details (tour_id INT, hotel_id INT, region VARCHAR(20), tour_date DATE);", + "sql": "INSERT INTO tour_details (hotel_id, region, tour_date) VALUES (102, \u0027Asia\u0027, \u00272023-03-15\u0027);", + "sql_explanation": "This query inserts a new virtual tour record for a hotel in the \u0027Asia\u0027 region on 2023-03-15. It uses the \u0027tour_details\u0027 table to insert a new record with a specified \u0027hotel_id\u0027, \u0027region\u0027, and \u0027tour_date\u0027." +}, { + "id": "3898", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the unique virtual tour engagement metrics for each hotel in the virtual_tour_data table?", + "sql_context": "CREATE TABLE virtual_tour_data (hotel_id INT, hotel_name TEXT, country TEXT, total_views INT, avg_view_time FLOAT); INSERT INTO virtual_tour_data (hotel_id, hotel_name, country, total_views, avg_view_time) VALUES (1, \u0027Luxury Resort\u0027, \u0027Mexico\u0027, 2000, 120.5), (2, \u0027Boutique Hotel\u0027, \u0027Spain\u0027, 1500, 98.3), (3, \u0027Beachfront Hotel\u0027, \u0027Brazil\u0027, 2500, 105.8);", + "sql": "SELECT DISTINCT hotel_name, country, total_views, avg_view_time FROM virtual_tour_data;", + "sql_explanation": "The query selects the unique combination of hotel name, country, total views, and average view time for each hotel in the virtual_tour_data table by using the DISTINCT keyword." +}, { + "id": "4033", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many hotels in total have adopted AI technology in Europe?", + "sql_context": "CREATE TABLE hotels (hotel_id INT, hotel_name TEXT, city TEXT, rating FLOAT, ai_adoption BOOLEAN); INSERT INTO hotels (hotel_id, hotel_name, city, rating, ai_adoption) VALUES (1, \u0027Hotel A\u0027, \u0027London\u0027, 4.5, true), (2, \u0027Hotel B\u0027, \u0027Paris\u0027, 4.2, false);", + "sql": "SELECT COUNT(*) FROM hotels WHERE ai_adoption \u003d true AND city IN (\u0027London\u0027, \u0027Paris\u0027);", + "sql_explanation": "This query counts the number of hotels that have adopted AI technology in Europe. It does so by using the COUNT() function on all rows, filtering the rows by ai_adoption and city." +}, { + "id": "4038", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum rating of hotels in Japan that have adopted AI-powered guest communication?", + "sql_context": "CREATE TABLE hotel_features (hotel_id INT, country TEXT, rating FLOAT, ai_guest_comm INT); INSERT INTO hotel_features (hotel_id, country, rating, ai_guest_comm) VALUES (1, \u0027Japan\u0027, 4.5, 1), (2, \u0027Japan\u0027, 4.7, 0), (3, \u0027Canada\u0027, 4.2, 1);", + "sql": "SELECT MIN(rating) FROM hotel_features WHERE country \u003d \u0027Japan\u0027 AND ai_guest_comm \u003d 1;", + "sql_explanation": "This SQL query finds the minimum rating of hotels in Japan that have adopted AI-powered guest communication. It filters the hotel_features table based on the country and AI guest communication and then calculates the minimum rating using the MIN() function." +}, { + "id": "4082", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total revenue from hotels in \u0027Africa\u0027 for \u00272023\u0027?", + "sql_context": "CREATE TABLE hotel_revenue (hotel_id INT, hotel_name TEXT, location TEXT, revenue INT, year INT); INSERT INTO hotel_revenue (hotel_id, hotel_name, location, revenue, year) VALUES (1, \u0027Savannah Resort\u0027, \u0027Kenya\u0027, 350000, 2023), (2, \u0027Victoria Safari Lodge\u0027, \u0027Tanzania\u0027, 400000, 2023), (3, \u0027Atlas Mountain Retreat\u0027, \u0027Morocco\u0027, 450000, 2023);", + "sql": "SELECT SUM(revenue) FROM hotel_revenue WHERE location LIKE \u0027Africa\u0027 AND year \u003d 2023;", + "sql_explanation": "The SQL query calculates the total revenue from hotels in Africa for 2023 by using the SUM function on the revenue column, and filtering the rows using a WHERE clause. The WHERE clause considers only the hotels located in Africa and active in the year 2023, obtained using the location and year columns." +}, { + "id": "4248", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum rating of hotels in the UK that have more than 50 reviews?", + "sql_context": "CREATE TABLE hotels (id INT, name TEXT, country TEXT, rating FLOAT, reviews INT);", + "sql": "SELECT MIN(rating) FROM hotels WHERE country \u003d \u0027United Kingdom\u0027 AND reviews \u003e 50;", + "sql_explanation": "This query calculates the minimum rating of hotels in the UK that have more than 50 reviews. It uses the MIN function to find the minimum rating and filters the data by country and reviews." +}, { + "id": "4390", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average rating of hotels in New York that have adopted AI technology?", + "sql_context": "CREATE TABLE hotels (hotel_id INT, hotel_name TEXT, city TEXT, rating FLOAT, ai_adoption BOOLEAN); INSERT INTO hotels (hotel_id, hotel_name, city, rating, ai_adoption) VALUES (1, \u0027Hotel A\u0027, \u0027New York\u0027, 4.5, true), (2, \u0027Hotel B\u0027, \u0027Los Angeles\u0027, 4.2, false);", + "sql": "SELECT AVG(rating) FROM hotels WHERE city \u003d \u0027New York\u0027 AND ai_adoption \u003d true;", + "sql_explanation": "This query calculates the average rating of hotels in New York that have adopted AI technology. It does so by using the AVG() function on the rating column, filtering the rows by city and ai_adoption." +}, { + "id": "4399", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average rating of hotels in Canada that have not adopted AI technology?", + "sql_context": "CREATE TABLE hotels (hotel_id INT, hotel_name TEXT, city TEXT, rating FLOAT, ai_adoption BOOLEAN); INSERT INTO hotels (hotel_id, hotel_name, city, rating, ai_adoption) VALUES (1, \u0027Hotel A\u0027, \u0027Toronto\u0027, 4.5, false), (2, \u0027Hotel B\u0027, \u0027Montreal\u0027, 4.2, true);", + "sql": "SELECT AVG(rating) FROM hotels WHERE city \u003d \u0027Toronto\u0027 AND ai_adoption \u003d false;", + "sql_explanation": "This query calculates the average rating of hotels in Canada that have not adopted AI technology. It does so by using the AVG() function on the rating column, filtering the rows by city and ai_adoption." +}, { + "id": "4421", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of hotels in Africa that have adopted virtual tour technology?", + "sql_context": "CREATE TABLE hotels (hotel_id INT, hotel_name TEXT, country TEXT, virtual_tour BOOLEAN); INSERT INTO hotels (hotel_id, hotel_name, country, virtual_tour) VALUES (1, \u0027The Nile River\u0027, \u0027Egypt\u0027, true), (2, \u0027The Sahara Desert\u0027, \u0027Tunisia\u0027, false), (3, \u0027The African Safari\u0027, \u0027South Africa\u0027, true);", + "sql": "SELECT COUNT(*) FROM hotels WHERE virtual_tour \u003d true AND country \u003d \u0027Africa\u0027;", + "sql_explanation": "This query calculates the total number of hotels in Africa that have adopted virtual tour technology. It does so by filtering the hotels table for rows where the country is \u0027Africa\u0027 and virtual_tour is true, and then calculating the count of rows for those filters." +}, { + "id": "4468", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many hotels in Canada have adopted cloud-based PMS technology?", + "sql_context": "CREATE TABLE hotels (hotel_id INT, hotel_name TEXT, country TEXT, pms_adoption BOOLEAN);", + "sql": "SELECT COUNT(*) FROM hotels WHERE country \u003d \u0027Canada\u0027 AND pms_adoption \u003d TRUE;", + "sql_explanation": "The SQL query counts the number of hotels in the hotels table that have adopted cloud-based PMS technology and are located in Canada." +}, { + "id": "4503", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average rating of hotels in the US that have implemented AI-powered services?", + "sql_context": "CREATE TABLE hotels (hotel_id INT, hotel_name TEXT, country TEXT, ai_services TEXT, rating FLOAT); INSERT INTO hotels (hotel_id, hotel_name, country, ai_services, rating) VALUES (1, \u0027The Smart Hotel\u0027, \u0027USA\u0027, \u0027yes\u0027, 4.5), (2, \u0027The Traditional Inn\u0027, \u0027USA\u0027, \u0027no\u0027, 4.2), (3, \u0027The AI Resort\u0027, \u0027USA\u0027, \u0027yes\u0027, 4.8);", + "sql": "SELECT AVG(rating) FROM hotels WHERE country \u003d \u0027USA\u0027 AND ai_services \u003d \u0027yes\u0027", + "sql_explanation": "This query calculates the average rating of hotels in the USA that have implemented AI-powered services by filtering the hotels table based on the country and ai_services columns and then computing the average of the rating column." +}, { + "id": "4520", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total revenue for hotels in Paris that have adopted AI technology.", + "sql_context": "CREATE TABLE hotels (hotel_id INT, city TEXT, ai_adoption BOOLEAN, revenue FLOAT); INSERT INTO hotels (hotel_id, city, ai_adoption, revenue) VALUES (1, \u0027Paris\u0027, TRUE, 200.0), (2, \u0027London\u0027, FALSE, 150.0), (3, \u0027Paris\u0027, TRUE, 250.0);", + "sql": "SELECT SUM(revenue) FROM hotels WHERE city \u003d \u0027Paris\u0027 AND ai_adoption \u003d TRUE;", + "sql_explanation": "This SQL query calculates the total revenue for hotels in Paris that have adopted AI technology. It first filters the hotels table to only include rows where the city field is \u0027Paris\u0027 and the ai_adoption field is TRUE, then calculates the sum of the revenue field for the matching rows." +}, { + "id": "4548", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue generated by \u0027OTAs\u0027 in \u00272022\u0027?", + "sql_context": "CREATE TABLE otas (id INT, ota_name TEXT, revenue INT); INSERT INTO otas (id, ota_name, revenue) VALUES (1, \u0027Expedia\u0027, 500000), (2, \u0027Booking.com\u0027, 600000), (3, \u0027Priceline\u0027, 400000);", + "sql": "SELECT SUM(revenue) FROM otas WHERE EXTRACT(YEAR FROM CURRENT_DATE) \u003d 2022;", + "sql_explanation": "The SQL query calculates the total revenue generated by OTAs in 2022 by using the SUM function on the revenue column. The WHERE clause filters the rows to only consider OTAs that were active in the year 2022, which is obtained using the EXTRACT function to extract the year from the current date." +}, { + "id": "4576", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average rating of hotels in Paris, France?", + "sql_context": "CREATE TABLE hotels (hotel_id INT, hotel_name VARCHAR(255), city VARCHAR(255), country VARCHAR(255), rating FLOAT); INSERT INTO hotels (hotel_id, hotel_name, city, country, rating) VALUES (1, \u0027Hotel Ritz\u0027, \u0027Paris\u0027, \u0027France\u0027, 4.8), (2, \u0027Hotel de Crillon\u0027, \u0027Paris\u0027, \u0027France\u0027, 4.7);", + "sql": "SELECT AVG(rating) FROM hotels WHERE city \u003d \u0027Paris\u0027 AND country \u003d \u0027France\u0027;", + "sql_explanation": "The SQL query calculates the average rating of hotels located in Paris, France by using the AVG() function on the rating column. It filters the data using the WHERE clause to only consider hotels in Paris, France." +}, { + "id": "4889", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many 5-star hotels in the \u0027Asia\u0027 region have adopted AI-powered services?", + "sql_context": "CREATE TABLE asiahotels (id INT, name VARCHAR(255), star_rating INT, has_ai BOOLEAN); INSERT INTO asiahotels (id, name, star_rating, has_ai) VALUES (1, \u0027AI Smart Hotel\u0027, 5, 1); INSERT INTO asiahotels (id, name, star_rating, has_ai) VALUES (2, \u0027Traditional Hotel\u0027, 5, 0);", + "sql": "SELECT COUNT(*) FROM asiahotels WHERE star_rating \u003d 5 AND has_ai \u003d 1;", + "sql_explanation": "The SQL query determines the number of 5-star hotels in the \u0027Asia\u0027 region that have adopted AI-powered services by using the COUNT() function to count all records where the \u0027star_rating\u0027 field is equal to 5 and the \u0027has_ai\u0027 field is set to 1." +}, { + "id": "5425", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average rating of hotels in the \u0027Americas\u0027 region?", + "sql_context": "CREATE TABLE hotels (id INT, name TEXT, region TEXT, rating FLOAT); INSERT INTO hotels (id, name, region, rating) VALUES (1, \u0027Hotel X\u0027, \u0027Americas\u0027, 4.2), (2, \u0027Hotel Y\u0027, \u0027Americas\u0027, 3.9), (3, \u0027Hotel Z\u0027, \u0027Europe\u0027, 4.5);", + "sql": "SELECT AVG(rating) FROM hotels WHERE region \u003d \u0027Americas\u0027;", + "sql_explanation": "The SQL query calculates the average rating of hotels in the \u0027Americas\u0027 region by using the AVG function on the \u0027rating\u0027 column, filtering the rows with the WHERE clause to only include hotels from the \u0027Americas\u0027 region." +}, { + "id": "5588", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average rating of hotels in \u0027Paris\u0027?", + "sql_context": "CREATE TABLE hotels (id INT, name TEXT, city TEXT, rating FLOAT); INSERT INTO hotels (id, name, city, rating) VALUES (1, \u0027Hotel Ritz\u0027, \u0027Paris\u0027, 4.8), (2, \u0027Hotel George V\u0027, \u0027Paris\u0027, 4.9);", + "sql": "SELECT AVG(rating) FROM hotels WHERE city \u003d \u0027Paris\u0027;", + "sql_explanation": "The SQL query calculates the average rating of hotels in Paris by using the AVG function on the rating column, and filtering the rows with a WHERE clause to only consider hotels located in Paris." +}, { + "id": "5708", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of AI-powered features in hotels across the globe?", + "sql_context": "CREATE TABLE hotel_features (id INT, hotel_name TEXT, location TEXT, ai_features INT); INSERT INTO hotel_features (id, hotel_name, location, ai_features) VALUES (1, \u0027Hotel A\u0027, \u0027Asia\u0027, 5), (2, \u0027Hotel B\u0027, \u0027Europe\u0027, 7), (3, \u0027Hotel C\u0027, \u0027Americas\u0027, 3), (4, \u0027Hotel D\u0027, \u0027Africa\u0027, 6), (5, \u0027Hotel E\u0027, \u0027Australia\u0027, 4);", + "sql": "SELECT AVG(ai_features) FROM hotel_features;", + "sql_explanation": "This query calculates the average number of AI-powered features in hotels across the globe. It does this by selecting the average (AVG) of the ai_features column in the hotel_features table." +}, { + "id": "2477", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total number of algorithmic fairness research papers published in even-numbered years", + "sql_context": "CREATE TABLE research_papers (id INT, publication_year INT, topic VARCHAR(255)); INSERT INTO research_papers (id, publication_year, topic) VALUES (1, 2012, \u0027AI Safety\u0027), (2, 2013, \u0027Explainable AI\u0027), (3, 2018, \u0027Algorithmic Fairness\u0027), (4, 2019, \u0027Creative AI\u0027), (5, 2020, \u0027AI Safety\u0027), (6, 2021, \u0027AI Safety\u0027);", + "sql": "SELECT SUM(CASE WHEN publication_year % 2 \u003d 0 THEN 1 ELSE 0 END) FROM research_papers WHERE topic \u003d \u0027Algorithmic Fairness\u0027;", + "sql_explanation": "1. Filter research_papers rows with the topic \u0027Algorithmic Fairness\u0027. 2. Use a CASE statement to count records with even publication_year values. 3. Sum the resulting values to get the total count." +}, { + "id": "2643", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update records in the \u0027creative_applications\u0027 table where the \u0027application_name\u0027 is \u0027AI Poet\u0027 and the \u0027user_rating\u0027 is less than 4", + "sql_context": "CREATE TABLE creative_applications (id INT PRIMARY KEY, application_name VARCHAR(50), art_form VARCHAR(20), num_users INT, user_rating INT);", + "sql": "UPDATE creative_applications SET user_rating \u003d user_rating + 2 WHERE application_name \u003d \u0027AI Poet\u0027 AND user_rating \u003c 4;", + "sql_explanation": "This SQL query updates records in the \u0027creative_applications\u0027 table where the \u0027application_name\u0027 is \u0027AI Poet\u0027 and the \u0027user_rating\u0027 is less than 4. It increments the user rating by 2 for those records that meet the specified conditions." +}, { + "id": "2782", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which AI safety conferences had less than 50 papers in 2019 or 2020?", + "sql_context": "CREATE TABLE AI_Safety_Conferences (id INT, conference TEXT, year INT, papers INT); INSERT INTO AI_Safety_Conferences (id, conference, year, papers) VALUES (1, \u0027Conference1\u0027, 2019, 45), (2, \u0027Conference2\u0027, 2020, 60), (3, \u0027Conference3\u0027, 2019, 75), (4, \u0027Conference4\u0027, 2020, 30);", + "sql": "SELECT conference FROM AI_Safety_Conferences WHERE (year \u003d 2019 AND papers \u003c 50) OR (year \u003d 2020 AND papers \u003c 50);", + "sql_explanation": "This query selects the \u0027conference\u0027 column from the \u0027AI_Safety_Conferences\u0027 table, filtering rows where the \u0027year\u0027 column value is either 2019 or 2020 and the \u0027papers\u0027 column value is less than 50." +}, { + "id": "2805", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the algorithmic_fairness table for the \u0027Lorenz Curve\u0027 algorithm with a fairness score of 0.85.", + "sql_context": "CREATE TABLE algorithmic_fairness (algorithm_id INT, algorithm_name TEXT, fairness_score REAL);", + "sql": "INSERT INTO algorithmic_fairness (algorithm_id, algorithm_name, fairness_score) VALUES (1, \u0027Lorenz Curve\u0027, 0.85);", + "sql_explanation": "This SQL query inserts a new record into the \u0027algorithmic_fairness\u0027 table for the \u0027Lorenz Curve\u0027 algorithm with a fairness score of 0.85. It uses the INSERT INTO clause to insert a new record into the table and the VALUES clause to specify the column values for the new record." +}, { + "id": "2834", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which AI safety incidents were reported by the Indigenous community in 2022?", + "sql_context": "CREATE TABLE CommunityIncidents (community VARCHAR(255), incident_year INT, incident_type VARCHAR(255)); INSERT INTO CommunityIncidents (community, incident_year, incident_type) VALUES (\u0027Indigenous\u0027, 2022, \u0027Algorithmic bias\u0027), (\u0027LGBTQ+\u0027, 2021, \u0027Data privacy\u0027), (\u0027Women in Tech\u0027, 2022, \u0027Model explainability\u0027);", + "sql": "SELECT community, incident_type FROM CommunityIncidents WHERE community \u003d \u0027Indigenous\u0027 AND incident_year \u003d 2022;", + "sql_explanation": "This query retrieves all AI safety incidents reported by the Indigenous community in 2022 by filtering the records based on the community and incident_year columns." +}, { + "id": "3034", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the \u0027calibration_data3\u0027 table with \u0027algorithm\u0027 \u003d \u0027Support Vector Machine\u0027, \u0027precision\u0027 \u003d 0.88, \u0027recall\u0027 \u003d 0.86", + "sql_context": "CREATE TABLE calibration_data3 (id INT, algorithm VARCHAR(20), precision DECIMAL(3,2), recall DECIMAL(3,2)); INSERT INTO calibration_data3 (id, algorithm, precision, recall) VALUES (1, \u0027Support Vector Machine\u0027, 0.88, 0.86);", + "sql": "INSERT INTO calibration_data3 (algorithm, precision, recall) VALUES (\u0027Support Vector Machine\u0027, 0.88, 0.86);", + "sql_explanation": "This query inserts a new record into the calibration_data3 table with the specified values for the \u0027algorithm\u0027, \u0027precision\u0027, and \u0027recall\u0027 columns. The \u0027id\u0027 column is auto-incremented by the database." +}, { + "id": "3533", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the \u0027calibration_data\u0027 table with \u0027algorithm\u0027 \u003d \u0027Deep Learning\u0027, \u0027precision\u0027 \u003d 0.8, \u0027recall\u0027 \u003d 0.7", + "sql_context": "CREATE TABLE calibration_data (id INT, algorithm VARCHAR(20), precision DECIMAL(3,2), recall DECIMAL(3,2)); INSERT INTO calibration_data (id, algorithm, precision, recall) VALUES (1, \u0027Deep Learning\u0027, 0.8, 0.7);", + "sql": "INSERT INTO calibration_data (algorithm, precision, recall) VALUES (\u0027Deep Learning\u0027, 0.8, 0.7);", + "sql_explanation": "This query inserts a new record into the calibration_data table with the specified values for the \u0027algorithm\u0027, \u0027precision\u0027, and \u0027recall\u0027 columns. The \u0027id\u0027 column is auto-incremented by the database." +}, { + "id": "3547", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the AI safety scores for models used in the education sector in Asia.", + "sql_context": "CREATE TABLE ai_safety_scores (id INT, model_name VARCHAR(50), sector VARCHAR(50), region VARCHAR(50), score FLOAT); INSERT INTO ai_safety_scores VALUES (1, \u0027EduModel1\u0027, \u0027Education\u0027, \u0027Asia\u0027, 0.88), (2, \u0027EduModel2\u0027, \u0027Education\u0027, \u0027Europe\u0027, 0.92), (3, \u0027EduModel3\u0027, \u0027Education\u0027, \u0027Asia\u0027, 0.82);", + "sql": "SELECT model_name, score FROM ai_safety_scores WHERE sector \u003d \u0027Education\u0027 AND region \u003d \u0027Asia\u0027;", + "sql_explanation": "This query retrieves the AI safety scores for models used in the education sector in Asia." +}, { + "id": "3680", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the explainable AI techniques used in the UK and Canada?", + "sql_context": "CREATE TABLE Explainable_AI (id INT, technique TEXT, location TEXT); INSERT INTO Explainable_AI (id, technique, location) VALUES (1, \u0027SHAP\u0027, \u0027UK\u0027), (2, \u0027LIME\u0027, \u0027Canada\u0027), (3, \u0027anchors\u0027, \u0027UK\u0027), (4, \u0027TreeExplainer\u0027, \u0027Canada\u0027);", + "sql": "SELECT DISTINCT location, technique FROM Explainable_AI WHERE location IN (\u0027UK\u0027, \u0027Canada\u0027);", + "sql_explanation": "This query selects unique combinations of the \u0027location\u0027 and \u0027technique\u0027 columns from the \u0027Explainable_AI\u0027 table, filtering rows where the \u0027location\u0027 column value is either \u0027UK\u0027 or \u0027Canada\u0027." +}, { + "id": "3687", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many algorithms in the \u0027explainable_ai\u0027 category have a fairness score greater than 0.8?", + "sql_context": "CREATE TABLE algorithms (algorithm_id INT, name TEXT, category TEXT, fairness_score DECIMAL);", + "sql": "SELECT COUNT(*) FROM algorithms WHERE category \u003d \u0027explainable_ai\u0027 AND fairness_score \u003e 0.8;", + "sql_explanation": "This query counts the number of rows in the \u0027algorithms\u0027 table where the category is \u0027explainable_ai\u0027 and the fairness_score is greater than 0.8. This is done by using the AND operator to filter the rows based on both the category and fairness_score columns." +}, { + "id": "3822", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get the top 2 AI risks based on probability", + "sql_context": "CREATE TABLE ai_risks (id INT PRIMARY KEY, risk VARCHAR(100), impact DECIMAL(5,2), probability DECIMAL(5,2)); INSERT INTO ai_risks (id, risk, impact, probability) VALUES (1, \u0027Bias\u0027, 0.75, 0.20); INSERT INTO ai_risks (id, risk, impact, probability) VALUES (2, \u0027Privacy\u0027, 0.80, 0.15); INSERT INTO ai_risks (id, risk, impact, probability) VALUES (3, \u0027Security\u0027, 0.65, 0.35); INSERT INTO ai_risks (id, risk, impact, probability) VALUES (4, \u0027Explainability\u0027, 0.50, 0.40);", + "sql": "SELECT risk, probability FROM ai_risks ORDER BY probability DESC FETCH FIRST 2 ROWS ONLY;", + "sql_explanation": "This SQL query selects the risk and probability columns from the ai_risks table and orders the results in descending order based on the probability column. The FETCH FIRST 2 ROWS ONLY clause limits the results to the top 2 rows with the highest probability." +}, { + "id": "3841", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average complexity score of explainable_ai_algorithms in the \u0027creative\u0027 category?", + "sql_context": "CREATE TABLE explainable_ai_algorithms (algorithm_id INTEGER, algorithm_name TEXT, complexity_score FLOAT, category TEXT);", + "sql": "SELECT AVG(complexity_score) FROM explainable_ai_algorithms WHERE category \u003d \u0027creative\u0027;", + "sql_explanation": "This query calculates the average complexity score for all explainable_ai_algorithms in the \u0027creative\u0027 category by summing up the complexity_score column values for rows with the category \u0027creative\u0027 and dividing by the count of algorithm_id entries in this subset." +}, { + "id": "3862", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average satisfaction score for explainable AI models developed in the last 3 years?", + "sql_context": "CREATE TABLE explainable_ai (model_name TEXT, satisfaction_score INTEGER, date DATE); INSERT INTO explainable_ai (model_name, satisfaction_score, date) VALUES (\u0027Model1\u0027, 80, \u00272020-01-01\u0027), (\u0027Model2\u0027, 85, \u00272019-04-03\u0027), (\u0027Model3\u0027, 90, \u00272021-05-22\u0027);", + "sql": "SELECT AVG(satisfaction_score) FROM explainable_ai WHERE date \u003e\u003d DATE(\u0027now\u0027, \u0027-3 year\u0027);", + "sql_explanation": "Calculate the average satisfaction score for explainable AI models from the past 3 years." +}, { + "id": "3901", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Total number of AI safety and AI fairness projects.", + "sql_context": "CREATE TABLE safety_fairness_projects (id INT PRIMARY KEY, project_name VARCHAR(50), category VARCHAR(50)); INSERT INTO safety_fairness_projects (id, project_name, category) VALUES (1, \u0027Secure AI\u0027, \u0027Safety\u0027), (2, \u0027Robust AI\u0027, \u0027Safety\u0027), (3, \u0027Privacy-Preserving AI\u0027, \u0027Safety\u0027), (4, \u0027Verification \u0026 Validation\u0027, \u0027Safety\u0027), (5, \u0027Ethical AI\u0027, \u0027Safety\u0027), (6, \u0027Assessing Data Bias\u0027, \u0027Fairness\u0027), (7, \u0027Reducing Bias in Models\u0027, \u0027Fairness\u0027), (8, \u0027Fair Feature Selection\u0027, \u0027Fairness\u0027), (9, \u0027Bias Correction Algorithms\u0027, \u0027Fairness\u0027), (10, \u0027Fairness-aware Clustering\u0027, \u0027Fairness\u0027);", + "sql": "SELECT COUNT(*) FROM safety_fairness_projects WHERE category IN (\u0027Safety\u0027, \u0027Fairness\u0027);", + "sql_explanation": "This SQL query calculates the total number of AI safety and AI fairness projects in the table \u0027safety_fairness_projects\u0027. The query uses the WHERE clause to filter the data by selecting rows with a \u0027category\u0027 value of either \u0027Safety\u0027 or \u0027Fairness\u0027. The COUNT(*) function returns the number of rows in the filtered data." +}, { + "id": "3992", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many explainable AI research papers were published in 2021?", + "sql_context": "CREATE TABLE Research_Papers (title TEXT, year INT, domain TEXT); INSERT INTO Research_Papers VALUES (\u0027Explainable AI\u0027, 2021, \u0027Safety\u0027), (\u0027Explainable AI\u0027, 2021, \u0027Fairness\u0027);", + "sql": "SELECT COUNT(*) FROM Research_Papers WHERE domain \u003d \u0027Explainable AI\u0027 AND year \u003d 2021;", + "sql_explanation": "This SQL query uses the COUNT(*) aggregate function to count the rows from the Research_Papers table, filtering rows by the domain set to \u0027Explainable AI\u0027 and year equal to 2021, to get how many explainable AI research papers were published in 2021." +}, { + "id": "4107", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the algorithm names and their corresponding risk_level in the ai_safety table where the risk_level is \u0027medium\u0027 or \u0027high\u0027", + "sql_context": "CREATE TABLE ai_safety (algorithm TEXT, risk_level TEXT, dataset TEXT, last_updated TIMESTAMP);", + "sql": "SELECT algorithm, risk_level FROM ai_safety WHERE risk_level IN (\u0027medium\u0027, \u0027high\u0027);", + "sql_explanation": "This query finds the algorithm names and their corresponding risk_level in the ai_safety table where the risk_level is \u0027medium\u0027 or \u0027high\u0027." +}, { + "id": "4227", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of papers published by each researcher, ordered by the number of papers in descending order.", + "sql_context": "CREATE TABLE ai_researchers (id INT, name VARCHAR(100), published_papers INT); INSERT INTO ai_researchers (id, name, published_papers) VALUES (1, \u0027Alice\u0027, 3), (2, \u0027Bob\u0027, 0), (3, \u0027Charlotte\u0027, 2), (4, \u0027David\u0027, 1), (5, \u0027Eva\u0027, 0);", + "sql": "SELECT name, published_papers FROM ai_researchers ORDER BY published_papers DESC;", + "sql_explanation": "The SQL query uses the COUNT aggregate function to count the number of papers published by each researcher and orders them in descending order based on the number of papers. The LIMIT clause restricts the result set to the top 2 researchers with the most papers." +}, { + "id": "4245", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many creative AI models have a safety score above 0.95 and were developed in South Asia?", + "sql_context": "CREATE TABLE safety_scores (model_id INT, region VARCHAR(50), safety FLOAT); INSERT INTO safety_scores (model_id, region, safety) VALUES (1, \u0027South Asia\u0027, 0.97), (2, \u0027Europe\u0027, 0.78), (3, \u0027South Asia\u0027, 0.92), (4, \u0027North America\u0027, 0.65), (5, \u0027South America\u0027, 0.98);", + "sql": "SELECT COUNT(*) FROM safety_scores WHERE region \u003d \u0027South Asia\u0027 AND safety \u003e 0.95;", + "sql_explanation": "This query counts the number of creative AI models with a safety score above 0.95 that were developed in South Asia by filtering models based on the region and safety score, and then summing the count of models from South Asia with a safety score above 0.95." +}, { + "id": "4269", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all unique AI safety research topics addressed before 2018.", + "sql_context": "CREATE TABLE AI_Safety_Topics (id INT, topic TEXT, published_date DATE); INSERT INTO AI_Safety_Topics (id, topic, published_date) VALUES (1, \u0027Topic1\u0027, \u00272017-01-01\u0027), (2, \u0027Topic2\u0027, \u00272018-05-15\u0027), (3, \u0027Topic3\u0027, \u00272016-03-20\u0027), (4, \u0027Topic4\u0027, \u00272018-12-31\u0027);", + "sql": "SELECT DISTINCT topic FROM AI_Safety_Topics WHERE published_date \u003c \u00272018-01-01\u0027;", + "sql_explanation": "This query selects unique \u0027topic\u0027 column values from the \u0027AI_Safety_Topics\u0027 table, filtering rows where the \u0027published_date\u0027 column value is earlier than \u00272018-01-01\u0027." +}, { + "id": "4376", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete AI safety incidents for algorithm \u0027ALG2\u0027 in 2023", + "sql_context": "CREATE TABLE ai_safety_incidents (algorithm_name VARCHAR(255), incidents INT, year INT); INSERT INTO ai_safety_incidents (algorithm_name, incidents, year) VALUES (\u0027ALG1\u0027, 10, 2023), (\u0027ALG2\u0027, 15, 2023), (\u0027ALG3\u0027, 5, 2023), (\u0027ALG4\u0027, 20, 2023), (\u0027ALG5\u0027, 12, 2023), (\u0027ALG6\u0027, 18, 2023), (\u0027ALG2\u0027, 25, 2023), (\u0027ALG8\u0027, 11, 2023);", + "sql": "DELETE FROM ai_safety_incidents WHERE algorithm_name \u003d \u0027ALG2\u0027 AND year \u003d 2023;", + "sql_explanation": "This query deletes records of AI safety incidents for algorithm \u0027ALG2\u0027 in 2023 from the ai_safety_incidents table." +}, { + "id": "4560", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records of users who have not interacted with the explainable AI system in the past month", + "sql_context": "CREATE TABLE interactions (id INT, user_id INT, interaction_date DATE); INSERT INTO interactions (id, user_id, interaction_date) VALUES (1, 1001, \u00272022-02-01\u0027), (2, 1002, \u00272022-02-15\u0027), (3, 1003, \u00272022-02-20\u0027), (4, 1001, \u00272022-02-25\u0027), (5, 1004, \u00272022-03-01\u0027), (6, 1003, \u00272022-02-03\u0027);", + "sql": "DELETE FROM interactions WHERE interaction_date \u003c NOW() - INTERVAL 1 MONTH;", + "sql_explanation": "1. Filter interactions rows with interaction_date older than 1 month from the current date and time. 2. Delete those records from the interactions table." +}, { + "id": "4753", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete unsafe AI algorithms with incidents greater than 200 in 2022", + "sql_context": "CREATE TABLE unsafe_ai_algorithms (algorithm_name VARCHAR(255), incidents INT, year INT); INSERT INTO unsafe_ai_algorithms (algorithm_name, incidents, year) VALUES (\u0027ALG1\u0027, 120, 2022), (\u0027ALG2\u0027, 150, 2022), (\u0027ALG3\u0027, 80, 2022), (\u0027ALG4\u0027, 200, 2022), (\u0027ALG5\u0027, 70, 2022), (\u0027ALG6\u0027, 190, 2022), (\u0027ALG7\u0027, 130, 2022), (\u0027ALG8\u0027, 100, 2022);", + "sql": "DELETE FROM unsafe_ai_algorithms WHERE incidents \u003e 200 AND year \u003d 2022;", + "sql_explanation": "This query deletes records of unsafe AI algorithms with incidents greater than 200 in 2022 from the unsafe_ai_algorithms table." +}, { + "id": "4762", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average explainability score for models developed by \u0027Team A\u0027?", + "sql_context": "CREATE TABLE model_data (model_id INT, team VARCHAR(255), explainability_score FLOAT); INSERT INTO model_data (model_id, team, explainability_score) VALUES (1, \u0027Team A\u0027, 0.75), (2, \u0027Team B\u0027, 0.82), (3, \u0027Team A\u0027, 0.87);", + "sql": "SELECT AVG(explainability_score) FROM model_data WHERE team \u003d \u0027Team A\u0027;", + "sql_explanation": "This query calculates the average explainability score for models developed by \u0027Team A\u0027. It does this by selecting the average value of the \u0027explainability_score\u0027 column where the \u0027team\u0027 column is equal to \u0027Team A\u0027." +}, { + "id": "4876", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which organizations have developed models with a fairness score greater than 0.9?", + "sql_context": "CREATE TABLE organizations_fairness (org_id INT, fairness_score FLOAT); INSERT INTO organizations_fairness (org_id, fairness_score) VALUES (1, 0.85), (2, 0.92), (3, 0.88), (4, 0.7), (5, 0.95);", + "sql": "SELECT org_id FROM organizations_fairness WHERE fairness_score \u003e 0.9;", + "sql_explanation": "This query retrieves the organizations that have developed models with a fairness score greater than 0.9 by filtering the records based on the fairness_score." +}, { + "id": "4945", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get the \u0027algorithm\u0027 and \u0027recall\u0027 values for records with \u0027precision\u0027 \u003e 0.9 in the \u0027evaluation_data\u0027 table", + "sql_context": "CREATE TABLE evaluation_data (id INT, algorithm VARCHAR(20), precision DECIMAL(3,2), recall DECIMAL(3,2)); INSERT INTO evaluation_data (id, algorithm, precision, recall) VALUES (1, \u0027Random Forest\u0027, 0.92, 0.85), (2, \u0027XGBoost\u0027, 0.95, 0.87), (3, \u0027Naive Bayes\u0027, 0.88, 0.83);", + "sql": "SELECT algorithm, recall FROM evaluation_data WHERE precision \u003e 0.9;", + "sql_explanation": "This query selects the \u0027algorithm\u0027 and \u0027recall\u0027 values for records with \u0027precision\u0027 \u003e 0.9 in the evaluation_data table. It filters records based on the condition provided in the WHERE clause." +}, { + "id": "5062", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum training time for models using the \u0027adaboost\u0027 algorithm across all datasets?", + "sql_context": "CREATE TABLE training_times (id INT, dataset VARCHAR(255), algorithm VARCHAR(255), time FLOAT); INSERT INTO training_times (id, dataset, algorithm, time) VALUES (1, \u0027MNIST\u0027, \u0027adaboost\u0027, 2.4), (2, \u0027CIFAR-10\u0027, \u0027adaboost\u0027, 3.1), (3, \u0027ImageNet\u0027, \u0027svm\u0027, 4.5);", + "sql": "SELECT MAX(time) FROM training_times WHERE algorithm \u003d \u0027adaboost\u0027;", + "sql_explanation": "Determine the maximum training time for models using the \u0027adaboost\u0027 algorithm by querying the training_times table and filtering on the algorithm field with \u0027adaboost\u0027 and computing the maximum time." +}, { + "id": "5139", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average satisfaction score for creative AI applications in the USA?", + "sql_context": "CREATE TABLE creative_ai (id INT, country VARCHAR(50), application VARCHAR(50), satisfaction FLOAT); INSERT INTO creative_ai (id, country, application, satisfaction) VALUES (1, \u0027USA\u0027, \u0027Text Generation\u0027, 4.3), (2, \u0027Canada\u0027, \u0027Image Recognition\u0027, 4.5);", + "sql": "SELECT AVG(satisfaction) FROM creative_ai WHERE country \u003d \u0027USA\u0027;", + "sql_explanation": "This query calculates the average satisfaction score for creative AI applications in the USA by selecting the satisfaction column values where the country is \u0027USA\u0027, and then computing the average of those values." +}, { + "id": "5195", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the explainability rating for the AI system named \u0027AI Judge\u0027?", + "sql_context": "CREATE TABLE explainable_ai (ai_system TEXT, rating FLOAT); INSERT INTO explainable_ai (ai_system, rating) VALUES (\u0027AI Judge\u0027, 0.75), (\u0027AI Translator\u0027, 0.90), (\u0027AI Artist\u0027, 0.60);", + "sql": "SELECT rating FROM explainable_ai WHERE ai_system \u003d \u0027AI Judge\u0027;", + "sql_explanation": "This query retrieves the explainability rating for the AI system named \u0027AI Judge\u0027 by selecting the rating column from the explainable_ai table, filtered by the ai_system column with the value \u0027AI Judge\u0027." +}, { + "id": "5321", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the safety score for ModelA to 0.90.", + "sql_context": "CREATE TABLE models (id INT, name TEXT, safety_score FLOAT); INSERT INTO models (id, name, safety_score) VALUES (1, \u0027ModelA\u0027, 0.85), (2, \u0027ModelB\u0027, 0.92);", + "sql": "UPDATE models SET safety_score \u003d 0.90 WHERE name \u003d \u0027ModelA\u0027;", + "sql_explanation": "This query updates the safety score for ModelA to 0.90. It uses the UPDATE statement to modify the \u0027safety_score\u0027 column in the \u0027models\u0027 table where the \u0027name\u0027 is \u0027ModelA\u0027. The new safety score is set to 0.90." +}, { + "id": "5374", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get the \u0027id\u0027 of all \u0027explainable\u0027 models.", + "sql_context": "CREATE TABLE models (id INT, name TEXT, explainability TEXT); INSERT INTO models (id, name, explainability) VALUES (1, \u0027modelA\u0027, \u0027explainable\u0027), (2, \u0027modelB\u0027, \u0027non_explainable\u0027);", + "sql": "SELECT id FROM models WHERE explainability \u003d \u0027explainable\u0027;", + "sql_explanation": "*This query selects the \u0027id\u0027 column from the \u0027models\u0027 table where the \u0027explainability\u0027 column is equal to \u0027explainable\u0027.*" +}, { + "id": "5392", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average explainability score for models trained on the \u0027european_healthcare\u0027 dataset?", + "sql_context": "CREATE TABLE european_healthcare (model_name TEXT, explainability_score FLOAT); INSERT INTO european_healthcare (model_name, explainability_score) VALUES (\u0027model1\u0027, 0.85), (\u0027model2\u0027, 0.90), (\u0027model3\u0027, 0.88);", + "sql": "SELECT AVG(explainability_score) FROM european_healthcare;", + "sql_explanation": "This SQL query calculates the average explainability score for models trained on the \u0027european_healthcare\u0027 dataset. It does this by using the AVG function, which returns the average value of a numeric column. In this case, it\u0027s calculating the average explainability score for all models in the \u0027european_healthcare\u0027 table." +}, { + "id": "5589", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many AI safety principles are associated with each AI model?", + "sql_context": "CREATE TABLE ai_models (model_name TEXT, safety_principles INT); INSERT INTO ai_models (model_name, safety_principles) VALUES (\u0027ModelA\u0027, 3), (\u0027ModelB\u0027, 4), (\u0027ModelC\u0027, 2), (\u0027ModelD\u0027, 3);", + "sql": "SELECT model_name, safety_principles FROM ai_models;", + "sql_explanation": "This query retrieves the number of AI safety principles associated with each AI model. It simply selects the model_name and safety_principles columns from the ai_models table." +}, { + "id": "5656", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all papers related to the AI Safety category.", + "sql_context": "CREATE TABLE papers (paper_id INT, title VARCHAR(100), author_id INT, published_date DATE, category VARCHAR(50)); INSERT INTO papers (paper_id, title, author_id, published_date, category) VALUES (1, \u0027Fairness in AI\u0027, 1, \u00272021-06-01\u0027, \u0027Algorithmic Fairness\u0027); INSERT INTO papers (paper_id, title, author_id, published_date, category) VALUES (2, \u0027AI Safety Challenges\u0027, 2, \u00272021-07-15\u0027, \u0027AI Safety\u0027); INSERT INTO papers (paper_id, title, author_id, published_date, category) VALUES (3, \u0027Interpretable AI Models\u0027, 1, \u00272020-12-20\u0027, \u0027Explainable AI\u0027);", + "sql": "DELETE FROM papers WHERE category \u003d \u0027AI Safety\u0027;", + "sql_explanation": "This query deletes all rows from the papers table where the category is \u0027AI Safety\u0027. It uses the DELETE statement to remove the matching rows based on the specified condition in the WHERE clause." +}, { + "id": "5657", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average number of papers in all AI safety conferences.", + "sql_context": "CREATE TABLE aisafety_conferences (conference VARCHAR(255), num_papers INT); INSERT INTO aisafety_conferences (conference, num_papers) VALUES (\u0027AAAI Safety\u0027, 15), (\u0027NeurIPS Ethics\u0027, 20), (\u0027IJCAI Safety\u0027, 12);", + "sql": "SELECT AVG(num_papers) FROM aisafety_conferences", + "sql_explanation": "This SQL query calculates the average value of the \u0027num_papers\u0027 column from the \u0027aisafety_conferences\u0027 table." +}, { + "id": "5718", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete models developed in South America.", + "sql_context": "CREATE TABLE models (id INT, name TEXT, country TEXT); INSERT INTO models (id, name, country) VALUES (1, \u0027ModelA\u0027, \u0027US\u0027), (2, \u0027ModelB\u0027, \u0027Canada\u0027), (3, \u0027ModelC\u0027, \u0027Brazil\u0027);", + "sql": "DELETE FROM models WHERE country \u003d \u0027Brazil\u0027;", + "sql_explanation": "This query deletes models developed in South America. It uses the DELETE statement to remove records from the \u0027models\u0027 table where the \u0027country\u0027 is \u0027Brazil\u0027, which represents a country in South America." +}, { + "id": "5745", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the unique countries of origin for all AI researchers?", + "sql_context": "CREATE TABLE Researchers (researcher_id INT, researcher_name VARCHAR(50), country VARCHAR(50)); INSERT INTO Researchers (researcher_id, researcher_name, country) VALUES (101, \u0027Alice\u0027, \u0027USA\u0027), (102, \u0027Bob\u0027, \u0027Canada\u0027), (103, \u0027Charlie\u0027, \u0027India\u0027), (104, \u0027Diana\u0027, \u0027USA\u0027), (105, \u0027Eve\u0027, \u0027Brazil\u0027), (106, \u0027Frank\u0027, \u0027USA\u0027), (107, \u0027Grace\u0027, \u0027Germany\u0027);", + "sql": "SELECT DISTINCT country FROM Researchers;", + "sql_explanation": "This SQL query retrieves the country column from the Researchers table, listing all unique countries of origin for AI researchers by using the DISTINCT keyword." +}, { + "id": "5776", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What \u0027domains\u0027 are represented in the \u0027model_data\u0027 table?", + "sql_context": "CREATE TABLE model_data (id INT, model_name TEXT, domain TEXT); INSERT INTO model_data (id, model_name, domain) VALUES (1, \u0027modelA\u0027, \u0027healthcare\u0027), (2, \u0027modelB\u0027, \u0027finance\u0027), (3, \u0027modelC\u0027, \u0027creative\u0027);", + "sql": "SELECT DISTINCT domain FROM model_data;", + "sql_explanation": "*This query selects distinct \u0027domain\u0027 values from the \u0027model_data\u0027 table, providing unique domains represented in the table.*" +}, { + "id": "5821", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of creative AI applications?", + "sql_context": "CREATE TABLE creative_ai (application_name TEXT, application_type TEXT); INSERT INTO creative_ai (application_name, application_type) VALUES (\u0027App4\u0027, \u0027Video Generation\u0027), (\u0027App5\u0027, \u0027Data Visualization\u0027), (\u0027App6\u0027, \u0027Motion Detection\u0027);", + "sql": "SELECT COUNT(*) FROM creative_ai;", + "sql_explanation": "Count all the applications in the \u0027creative_ai\u0027 table." +}, { + "id": "2675", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many social impact investments were made in the Asia-Pacific region in the last 12 months?", + "sql_context": "CREATE TABLE investments (id INT, region VARCHAR(255), date DATE); INSERT INTO investments (id, region, date) VALUES (1, \u0027Asia-Pacific\u0027, \u00272021-02-15\u0027), (2, \u0027Europe\u0027, \u00272020-12-21\u0027), (3, \u0027Asia-Pacific\u0027, \u00272021-01-03\u0027);", + "sql": "SELECT COUNT(*) FROM investments WHERE region \u003d \u0027Asia-Pacific\u0027 AND date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 12 MONTH);", + "sql_explanation": "This query counts the number of social impact investments made in the Asia-Pacific region in the last 12 months. It does this by using the COUNT function on all rows and filtering the data using the WHERE clause to only include rows with the region \u0027Asia-Pacific\u0027 and date greater than or equal to the current date minus 12 months." +}, { + "id": "2992", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the sum of investments for projects with a climate action focus in the Asia-Pacific region.", + "sql_context": "CREATE TABLE projects_investments (id INT, name TEXT, focus TEXT, region TEXT, investment FLOAT); INSERT INTO projects_investments (id, name, focus, region, investment) VALUES (1, \u0027Clean Energy Project\u0027, \u0027Climate Action\u0027, \u0027Asia-Pacific\u0027, 100000.0), (2, \u0027Sustainable Agriculture Program\u0027, \u0027Biodiversity\u0027, \u0027Asia-Pacific\u0027, 150000.0);", + "sql": "SELECT SUM(investment) FROM projects_investments WHERE focus \u003d \u0027Climate Action\u0027 AND region \u003d \u0027Asia-Pacific\u0027;", + "sql_explanation": "This SQL query calculates the total investment for projects with a climate action focus in the Asia-Pacific region by using the SUM function on the investment column and filtering the rows with the WHERE clause for the climate action focus and the Asia-Pacific region." +}, { + "id": "3865", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the count of ESG rated investments in South America?", + "sql_context": "CREATE TABLE if not exists investments(investment_id INT, investment_type VARCHAR(20), region VARCHAR(20)); INSERT INTO investments (investment_id, investment_type, region) VALUES (1, \u0027ESG\u0027, \u0027South America\u0027), (2, \u0027Non-ESG\u0027, \u0027North America\u0027), (3, \u0027ESG\u0027, \u0027Europe\u0027);", + "sql": "SELECT COUNT(*) FROM investments WHERE investment_type \u003d \u0027ESG\u0027 AND region LIKE \u0027South%\u0027;", + "sql_explanation": "This query counts the number of records in the \u0027investments\u0027 table where the \u0027investment_type\u0027 is \u0027ESG\u0027 and the \u0027region\u0027 starts with \u0027South\u0027." +}, { + "id": "4025", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum investment amount for the \u0027Latin America\u0027 region?", + "sql_context": "CREATE TABLE impact_investments (id INT, region VARCHAR(20), investment_year INT, investment_amount FLOAT); INSERT INTO impact_investments (id, region, investment_year, investment_amount) VALUES (1, \u0027Latin America\u0027, 2020, 100000), (2, \u0027Africa\u0027, 2019, 120000), (3, \u0027Latin America\u0027, 2021, 150000);", + "sql": "SELECT MIN(investment_amount) FROM impact_investments WHERE region \u003d \u0027Latin America\u0027;", + "sql_explanation": "This SQL query calculates the minimum investment amount for the \u0027Latin America\u0027 region. It does this by using the MIN function on the investment_amount column, filtering the rows with a WHERE clause to only consider those in the \u0027Latin America\u0027 region." +}, { + "id": "4035", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all investment strategies with a risk level above 30 and their associated sectors.", + "sql_context": "CREATE TABLE investment_strategies (id INT, strategy VARCHAR(50), risk_level INT, sector VARCHAR(20)); INSERT INTO investment_strategies (id, strategy, risk_level, sector) VALUES (1, \u0027Impact Bonds\u0027, 30, \u0027social impact\u0027), (2, \u0027Green Equity Funds\u0027, 20, \u0027environment\u0027), (3, \u0027Sustainable Real Estate\u0027, 40, \u0027real estate\u0027);", + "sql": "SELECT strategy, risk_level, sector FROM investment_strategies WHERE risk_level \u003e 30;", + "sql_explanation": "The SQL query lists all investment strategies with a risk level above 30 and their associated sectors by selecting the strategy, risk_level, and sector columns from the investment_strategies table, filtered by the risk_level column greater than 30." +}, { + "id": "4150", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum investment in water conservation initiatives for organizations with over 5000 employees?", + "sql_context": "CREATE TABLE water_conservation_investments (id INT, organization_name VARCHAR(50), employees INT, investment DECIMAL(10,2)); INSERT INTO water_conservation_investments (id, organization_name, employees, investment) VALUES (1, \u0027Corp X\u0027, 10000, 15000.00), (2, \u0027Corp Y\u0027, 3000, 5000.00), (3, \u0027Corp Z\u0027, 5000, 8000.00);", + "sql": "SELECT MAX(investment) FROM water_conservation_investments WHERE employees \u003e 5000;", + "sql_explanation": "This SQL query finds the maximum investment in water conservation initiatives for organizations with over 5000 employees. It does this by using the MAX function on the investment column, filtering the data with a WHERE clause for organizations with more than 5000 employees." +}, { + "id": "4206", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the sum of investments in companies in the \u0027Real Estate\u0027 sector in the year 2021?", + "sql_context": "CREATE TABLE investments (id INT, company_id INT, sector VARCHAR(255), year INT, amount FLOAT); INSERT INTO investments (id, company_id, sector, year, amount) VALUES (1, 1, \u0027Real Estate\u0027, 2021, 200000.0); INSERT INTO investments (id, company_id, sector, year, amount) VALUES (2, 2, \u0027Real Estate\u0027, 2021, 300000.0); INSERT INTO investments (id, company_id, sector, year, amount) VALUES (3, 3, \u0027Real Estate\u0027, 2021, 400000.0);", + "sql": "SELECT SUM(amount) FROM investments WHERE sector \u003d \u0027Real Estate\u0027 AND year \u003d 2021;", + "sql_explanation": "This query calculates the sum of investments in companies in the \u0027Real Estate\u0027 sector in the year 2021 by using the SUM function on the amount column and filtering rows with the WHERE clause based on the sector and year columns." +}, { + "id": "4265", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum investment in a single company in the \u0027Healthcare\u0027 sector in the year 2019?", + "sql_context": "CREATE TABLE investments (id INT, company_id INT, sector VARCHAR(255), year INT, amount FLOAT); INSERT INTO investments (id, company_id, sector, year, amount) VALUES (1, 1, \u0027Healthcare\u0027, 2019, 400000.0); INSERT INTO investments (id, company_id, sector, year, amount) VALUES (2, 2, \u0027Healthcare\u0027, 2019, 500000.0); INSERT INTO investments (id, company_id, sector, year, amount) VALUES (3, 3, \u0027Healthcare\u0027, 2019, 600000.0);", + "sql": "SELECT MIN(amount) FROM investments WHERE sector \u003d \u0027Healthcare\u0027 AND year \u003d 2019;", + "sql_explanation": "This query calculates the minimum investment in a single company in the \u0027Healthcare\u0027 sector in the year 2019 by using the MIN function on the amount column and filtering rows with the WHERE clause based on the sector and year columns." +}, { + "id": "4602", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "Find the top 5 highest risk investments by ESG rating", + "sql_context": "CREATE TABLE investments (id INT, company_id INT, ESG_rating FLOAT, risk_level INT)", + "sql": "SELECT * FROM investments ORDER BY ESG_rating DESC, risk_level ASC LIMIT 5", + "sql_explanation": "This query retrieves the top 5 highest risk investments by ESG rating by sorting the investments table based on ESG_rating in descending order and risk_level in ascending order and limiting the results to 5." +}, { + "id": "4633", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum \u0027amount\u0027 invested by \u0027SustainableFund\u0027?", + "sql_context": "CREATE TABLE InvestmentsMax (id INT, investor VARCHAR(255), sector VARCHAR(255), amount DECIMAL(10,2));", + "sql": "SELECT MAX(amount) FROM InvestmentsMax WHERE investor \u003d \u0027SustainableFund\u0027;", + "sql_explanation": "The SQL query calculates the maximum \u0027amount\u0027 for rows where the \u0027investor\u0027 is \u0027SustainableFund\u0027 to find the maximum investment amount." +}, { + "id": "4682", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum impact score achieved by a company in the technology sector?", + "sql_context": "CREATE TABLE company_impact (id INT, name VARCHAR(50), sector VARCHAR(20), impact_score FLOAT); INSERT INTO company_impact (id, name, sector, impact_score) VALUES (1, \u0027Company X\u0027, \u0027Technology\u0027, 85.0), (2, \u0027Company Y\u0027, \u0027Finance\u0027, 80.0), (3, \u0027Company Z\u0027, \u0027Technology\u0027, 87.5);", + "sql": "SELECT MIN(impact_score) FROM company_impact WHERE sector \u003d \u0027Technology\u0027;", + "sql_explanation": "This query calculates the minimum impact score achieved by a company in the technology sector. It does so by using the MIN function on the impact_score column, while filtering the data for rows with a sector value of \u0027Technology\u0027." +}, { + "id": "4719", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records related to water conservation strategies from the investment_strategies table.", + "sql_context": "CREATE TABLE investment_strategies (id INT, strategy VARCHAR(50)); INSERT INTO investment_strategies (id, strategy) VALUES (1, \u0027Green energy\u0027); INSERT INTO investment_strategies (id, strategy) VALUES (2, \u0027Water conservation\u0027); INSERT INTO investment_strategies (id, strategy) VALUES (3, \u0027Sustainable agriculture\u0027);", + "sql": "DELETE FROM investment_strategies WHERE strategy \u003d \u0027Water conservation\u0027;", + "sql_explanation": "The SQL query deletes all records related to water conservation strategies by selecting all records with the strategy \u0027Water conservation\u0027 and removing them from the investment_strategies table." +}, { + "id": "4757", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum ESG score for companies in the renewable energy sector?", + "sql_context": "CREATE TABLE companies (id INT, name VARCHAR(255), sector VARCHAR(255), ESG_score FLOAT); INSERT INTO companies (id, name, sector, ESG_score) VALUES (1, \u0027Vestas Wind Systems\u0027, \u0027Renewable Energy\u0027, 92.1), (2, \u0027Siemens Gamesa Renewable Energy\u0027, \u0027Renewable Energy\u0027, 89.5), (3, \u0027Tesla\u0027, \u0027Automotive\u0027, 78.3);", + "sql": "SELECT MAX(ESG_score) FROM companies WHERE sector \u003d \u0027Renewable Energy\u0027;", + "sql_explanation": "This SQL query calculates the maximum ESG score for companies in the renewable energy sector. It first filters the companies table to only include rows where the sector is \u0027Renewable Energy\u0027. Then, it calculates the maximum ESG_score for these rows using the MAX() function." +}, { + "id": "4808", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the total number of green investments by US-based funds?", + "sql_context": "CREATE TABLE fund_investments(fund_id INT, investment_id INT, investment_type VARCHAR(20));", + "sql": "SELECT COUNT(*) FROM fund_investments WHERE investment_type \u003d \u0027green\u0027;", + "sql_explanation": "Counts the number of records in the fund_investments table where the investment_type is \u0027green\u0027." +}, { + "id": "4906", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average impact score for investments in the Education sector?", + "sql_context": "CREATE TABLE investments (id INT, sector VARCHAR(255), impact_score FLOAT); INSERT INTO investments (id, sector, impact_score) VALUES (1, \u0027Education\u0027, 8.5), (2, \u0027Healthcare\u0027, 7.8), (3, \u0027Education\u0027, 9.2);", + "sql": "SELECT AVG(impact_score) FROM investments WHERE sector \u003d \u0027Education\u0027;", + "sql_explanation": "This SQL query calculates the average impact score for investments in the Education sector by using the AVG function on the impact_score column, while filtering the data for the Education sector." +}, { + "id": "5093", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of employees in the healthcare sector?", + "sql_context": "CREATE TABLE companies (id INT, sector VARCHAR(255), employees INT); INSERT INTO companies (id, sector, employees) VALUES (1, \u0027healthcare\u0027, 4500), (2, \u0027technology\u0027, 5500), (3, \u0027healthcare\u0027, 6000);", + "sql": "SELECT AVG(employees) FROM companies WHERE sector \u003d \u0027healthcare\u0027;", + "sql_explanation": "This query calculates the average number of employees in the healthcare sector. It does this by using the AVG function on the employees column and filtering the data using the WHERE clause to only include rows with the sector \u0027healthcare\u0027." +}, { + "id": "5106", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average ESG score of companies in the technology sector?", + "sql_context": "CREATE TABLE companies (id INT, sector VARCHAR(255), ESG_score FLOAT); INSERT INTO companies (id, sector, ESG_score) VALUES (1, \u0027technology\u0027, 72.5);", + "sql": "SELECT AVG(ESG_score) FROM companies WHERE sector \u003d \u0027technology\u0027;", + "sql_explanation": "This query calculates the average ESG score for companies in the technology sector. It uses the AVG() function to find the mean value of ESG_score column for rows with a sector value of \u0027technology\u0027." +}, { + "id": "5194", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum ESG rating for companies in the \u0027finance\u0027 sector?", + "sql_context": "CREATE TABLE companies (id INT, sector VARCHAR(20), ESG_rating FLOAT); INSERT INTO companies (id, sector, ESG_rating) VALUES (1, \u0027technology\u0027, 7.5), (2, \u0027finance\u0027, 8.2), (3, \u0027technology\u0027, 7.8);", + "sql": "SELECT MAX(ESG_rating) FROM companies WHERE sector \u003d \u0027finance\u0027;", + "sql_explanation": "Calculate the maximum ESG_rating for companies in the \u0027finance\u0027 sector by using the MAX() function. Filter the companies table for records with the sector \u0027finance\u0027." +}, { + "id": "5256", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum ESG score for companies in the \u0027Finance\u0027 sector?", + "sql_context": "CREATE TABLE companies (id INT, sector VARCHAR(20), ESG_score FLOAT); INSERT INTO companies (id, sector, ESG_score) VALUES (1, \u0027Technology\u0027, 85.0), (2, \u0027Finance\u0027, 82.0), (3, \u0027Healthcare\u0027, 88.0), (4, \u0027Finance\u0027, 70.0);", + "sql": "SELECT MAX(ESG_score) FROM companies WHERE sector \u003d \u0027Finance\u0027;", + "sql_explanation": "This query finds the maximum ESG score for companies in the \u0027Finance\u0027 sector. It does so by using the MAX function to find the maximum value of the ESG_score column, while filtering the rows to only include those where the sector is \u0027Finance\u0027." +}, { + "id": "5329", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the lowest risk investment strategy and its associated risk score?", + "sql_context": "CREATE TABLE investment_strategies (id INT, strategy TEXT, risk_score FLOAT); INSERT INTO investment_strategies (id, strategy, risk_score) VALUES (1, \u0027Equity Investment\u0027, 6.5), (2, \u0027Real Estate Investment\u0027, 4.8), (3, \u0027Bond Investment\u0027, 3.2);", + "sql": "SELECT strategy, MIN(risk_score) FROM investment_strategies;", + "sql_explanation": "This SQL query retrieves the lowest risk investment strategy by finding the minimum risk score using the MIN() function and selecting the corresponding strategy name." +}, { + "id": "5426", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all investment strategies with ESG scores above 80.", + "sql_context": "CREATE TABLE investment_strategies (strategy_id INT, sector VARCHAR(20), ESG_score FLOAT); INSERT INTO investment_strategies (strategy_id, sector, ESG_score) VALUES (101, \u0027renewable_energy\u0027, 82.5), (102, \u0027sustainable_agriculture\u0027, 78.3), (103, \u0027green_transportation\u0027, 85.1);", + "sql": "SELECT * FROM investment_strategies WHERE ESG_score \u003e 80;", + "sql_explanation": "List all investment strategies with ESG scores above 80 by filtering on the ESG_score column." +}, { + "id": "5505", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all impact investments in the \u0027Africa\u0027 region.", + "sql_context": "CREATE TABLE impact_investments (id INT, region VARCHAR(20), investment_year INT, investment_amount FLOAT); INSERT INTO impact_investments (id, region, investment_year, investment_amount) VALUES (1, \u0027Asia\u0027, 2020, 150000), (2, \u0027Africa\u0027, 2019, 120000), (3, \u0027Asia\u0027, 2020, 180000);", + "sql": "DELETE FROM impact_investments WHERE region \u003d \u0027Africa\u0027;", + "sql_explanation": "This SQL query deletes all impact investments in the \u0027Africa\u0027 region. It does this by using the DELETE command on the impact_investments table, filtering the rows with a WHERE clause to only consider those in the \u0027Africa\u0027 region." +}, { + "id": "5611", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "Find the risk assessment details for the company with ID 4.", + "sql_context": "CREATE TABLE risk_assessment (company_id INT, risk_level VARCHAR(10), mitigation_strategy TEXT); INSERT INTO risk_assessment (company_id, risk_level, mitigation_strategy) VALUES (4, \u0027medium\u0027, \u0027Regular audits and employee trainings.\u0027), (5, \u0027low\u0027, \u0027Minor improvements in supply chain management.\u0027), (6, \u0027high\u0027, \u0027Immediate actions to reduce environmental impact.\u0027);", + "sql": "SELECT * FROM risk_assessment WHERE company_id \u003d 4;", + "sql_explanation": "Retrieve risk assessment details for the company with ID 4 by filtering on the company_id column." +}, { + "id": "1558", + "domain": "startups venture capital", + "domain_description": "Company founding data, investment rounds, exit strategies, and diversity metrics in the startup ecosystem.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new investment round into the \"investment_rounds\" table for \u0027Charlie Startup\u0027 with $10M raised on 2022-01-01", + "sql_context": "CREATE TABLE investment_rounds (id INT, company_name VARCHAR(100), round_type VARCHAR(50), raised_amount FLOAT, round_date DATE);", + "sql": "INSERT INTO investment_rounds (id, company_name, round_type, raised_amount, round_date) VALUES (3, \u0027Charlie Startup\u0027, \u0027Series B\u0027, 10000000, \u00272022-01-01\u0027);", + "sql_explanation": "The SQL query inserts a new record into the \"investment_rounds\" table for \u0027Charlie Startup\u0027 with $10M raised on 2022-01-01." +}, { + "id": "1663", + "domain": "startups venture capital", + "domain_description": "Company founding data, investment rounds, exit strategies, and diversity metrics in the startup ecosystem.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the \"investment_rounds\" table with the following data: \"Series A\", 2021, 5000000, \"Venture Capital\"", + "sql_context": "CREATE TABLE investment_rounds (round_name VARCHAR(50), investment_year INT, investment_amount INT, investment_type VARCHAR(50));", + "sql": "INSERT INTO investment_rounds (round_name, investment_year, investment_amount, investment_type) VALUES (\u0027Series A\u0027, 2021, 5000000, \u0027Venture Capital\u0027);", + "sql_explanation": "This query inserts a new record into the investment_rounds table with the following data: \"Series A\", 2021, 5000000, \"Venture Capital\". It uses the INSERT INTO statement followed by the VALUES clause, specifying the column values for the new record. The VALUES clause contains a tuple with the values for the round_name, investment_year, investment_amount, and investment_type columns." +}, { + "id": "2249", + "domain": "startups venture capital", + "domain_description": "Company founding data, investment rounds, exit strategies, and diversity metrics in the startup ecosystem.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of companies founded by veterans in the technology industry?", + "sql_context": "CREATE TABLE company (id INT, name TEXT, founding_date DATE, industry TEXT, headquarters TEXT, veteran_founder BOOLEAN);", + "sql": "SELECT (COUNT(*) FILTER (WHERE industry \u003d \u0027technology\u0027 AND veteran_founder \u003d TRUE)) * 100.0 / COUNT(*) AS percentage FROM company;", + "sql_explanation": "This query calculates the percentage of companies founded by veterans in the technology industry. It starts by selecting the count of records where industry is \u0027technology\u0027 and veteran_founder is TRUE, and multiplying the result by 100.0. Then, the query divides the result by the total count of records from the company table." +}, { + "id": "3217", + "domain": "startups venture capital", + "domain_description": "Company founding data, investment rounds, exit strategies, and diversity metrics in the startup ecosystem.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum total funding for startups with a female founder that have had a successful exit strategy?", + "sql_context": "CREATE TABLE IF NOT EXISTS startups(id INT, name TEXT, founder_gender TEXT, total_funding FLOAT, exit_strategy TEXT); INSERT INTO startups (id, name, founder_gender, total_funding, exit_strategy) VALUES (1, \u0027Rent the Runway\u0027, \u0027Female\u0027, 16000000, \u0027IPO\u0027); INSERT INTO startups (id, name, founder_gender, total_funding, exit_strategy) VALUES (2, \u0027Glossier\u0027, \u0027Female\u0027, 12000000, \u0027Acquisition\u0027); INSERT INTO startups (id, name, founder_gender, total_funding, exit_strategy) VALUES (3, \u0027The Wing\u0027, \u0027Female\u0027, 9000000, \u0027Acquisition\u0027);", + "sql": "SELECT MIN(total_funding) FROM startups WHERE founder_gender \u003d \u0027Female\u0027 AND exit_strategy IS NOT NULL;", + "sql_explanation": "This SQL query calculates the minimum total funding for startups with a female founder that have had a successful exit strategy by selecting the MIN of the total_funding column where founder_gender is \u0027Female\u0027 and exit_strategy is not null." +}, { + "id": "3227", + "domain": "startups venture capital", + "domain_description": "Company founding data, investment rounds, exit strategies, and diversity metrics in the startup ecosystem.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new company \u0027Ada Ventures\u0027 into the \u0027companies\u0027 table", + "sql_context": "CREATE TABLE companies (id INT PRIMARY KEY, name VARCHAR(255), industry VARCHAR(255), founding_year INT);", + "sql": "INSERT INTO companies (id, name, industry, founding_year) VALUES (1, \u0027Ada Ventures\u0027, \u0027VC Fund\u0027, 2019);", + "sql_explanation": "This query inserts a new row into the \u0027companies\u0027 table with the specified values for the \u0027name\u0027, \u0027industry\u0027, and \u0027founding_year\u0027 columns. The \u0027id\u0027 is set to 1." +}, { + "id": "3321", + "domain": "startups venture capital", + "domain_description": "Company founding data, investment rounds, exit strategies, and diversity metrics in the startup ecosystem.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average funding amount for female-founded startups in the healthcare sector?", + "sql_context": "CREATE TABLE company (id INT, name TEXT, industry TEXT, founding_date DATE, founder_gender TEXT); INSERT INTO company (id, name, industry, founding_date, founder_gender) VALUES (1, \u0027HealCo\u0027, \u0027Healthcare\u0027, \u00272019-01-01\u0027, \u0027Female\u0027);", + "sql": "SELECT AVG(funding_amount) FROM company WHERE industry \u003d \u0027Healthcare\u0027 AND founder_gender \u003d \u0027Female\u0027;", + "sql_explanation": "This SQL query calculates the average funding amount for female-founded startups in the healthcare sector by filtering the rows based on the industry and founder_gender and then computing the average of the funding_amount column." +}, { + "id": "3648", + "domain": "startups venture capital", + "domain_description": "Company founding data, investment rounds, exit strategies, and diversity metrics in the startup ecosystem.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of startups founded by indigenous people in the healthcare industry?", + "sql_context": "CREATE TABLE startups(id INT, name TEXT, industry TEXT, founder_gender TEXT, founder_race TEXT); INSERT INTO startups VALUES (1, \u0027StartupA\u0027, \u0027Healthcare\u0027, \u0027Female\u0027, \u0027Asian\u0027); INSERT INTO startups VALUES (2, \u0027StartupB\u0027, \u0027Healthcare\u0027, \u0027Female\u0027, \u0027Black\u0027); INSERT INTO startups VALUES (3, \u0027StartupC\u0027, \u0027Tech\u0027, \u0027Male\u0027, \u0027White\u0027); INSERT INTO startups VALUES (4, \u0027StartupD\u0027, \u0027Healthcare\u0027, \u0027Male\u0027, \u0027Indigenous\u0027); INSERT INTO startups VALUES (5, \u0027StartupE\u0027, \u0027Sustainability\u0027, \u0027Non-binary\u0027, \u0027Hispanic\u0027);", + "sql": "SELECT COUNT(*) FROM startups WHERE founder_race \u003d \u0027Indigenous\u0027 AND industry \u003d \u0027Healthcare\u0027;", + "sql_explanation": "This SQL query determines the number of startups founded by indigenous people in the healthcare industry. It does so by using the COUNT function to count the number of rows in the startups table that match the filter criteria of founder_race being \u0027Indigenous\u0027 and industry being \u0027Healthcare\u0027. The results will be the number of startups founded by indigenous people in the healthcare industry." +}, { + "id": "4449", + "domain": "startups venture capital", + "domain_description": "Company founding data, investment rounds, exit strategies, and diversity metrics in the startup ecosystem.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total funding raised by startups with a social impact focus?", + "sql_context": "CREATE TABLE startup (id INT, name TEXT, industry TEXT, funding_round_size INT); INSERT INTO startup (id, name, industry, funding_round_size) VALUES (1, \u0027SocialImpactOne\u0027, \u0027Social Impact\u0027, 2000000); INSERT INTO startup (id, name, industry, funding_round_size) VALUES (2, \u0027TechStart\u0027, \u0027Tech\u0027, 10000000);", + "sql": "SELECT SUM(funding_round_size) FROM startup WHERE industry \u003d \u0027Social Impact\u0027;", + "sql_explanation": "The SQL query calculates the total funding raised by startups with a social impact focus by using the SUM function on the funding_round_size column, where the industry is \u0027Social Impact\u0027." +}, { + "id": "5252", + "domain": "startups venture capital", + "domain_description": "Company founding data, investment rounds, exit strategies, and diversity metrics in the startup ecosystem.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get the total number of startups founded by people with disabilities", + "sql_context": "CREATE TABLE startups(id INT, name TEXT, founding_year INT, founder_disability BOOLEAN); INSERT INTO startups (id, name, founding_year, founder_disability) VALUES (1, \u0027Acme Inc\u0027, 2010, true); INSERT INTO startups (id, name, founding_year, founder_disability) VALUES (2, \u0027Beta Corp\u0027, 2015, false); INSERT INTO startups (id, name, founding_year, founder_disability) VALUES (3, \u0027Gamma LLC\u0027, 2020, true); INSERT INTO startups (id, name, founding_year, founder_disability) VALUES (4, \u0027Delta Inc\u0027, 2018, false);", + "sql": "SELECT COUNT(*) FROM startups WHERE founder_disability \u003d true;", + "sql_explanation": "This query counts the number of records where the founder_disability is true in the startups table." +}, { + "id": "2180", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many solar power projects were completed in California since 2015 with a budget over $5 million?", + "sql_context": "CREATE TABLE projects (id INT, state VARCHAR(20), year_completed INT, budget FLOAT, project_type VARCHAR(20)); INSERT INTO projects (id, state, year_completed, budget, project_type) VALUES (1, \u0027California\u0027, 2014, 4000000, \u0027Wind\u0027), (2, \u0027California\u0027, 2016, 6000000, \u0027Solar\u0027), (3, \u0027California\u0027, 2017, 3000000, \u0027Solar\u0027), (4, \u0027California\u0027, 2018, 7000000, \u0027Wind\u0027);", + "sql": "SELECT COUNT(*) FROM projects WHERE state \u003d \u0027California\u0027 AND year_completed \u003e\u003d 2015 AND project_type \u003d \u0027Solar\u0027 AND budget \u003e 5000000;", + "sql_explanation": "Count the number of solar power projects in California since 2015 with a budget over $5 million." +}, { + "id": "2264", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all smart city technology adoptions in the South American cities with a population greater than 2 million.", + "sql_context": "CREATE TABLE smart_city_tech (tech_id INT, tech_name VARCHAR(30), city VARCHAR(20), population INT); INSERT INTO smart_city_tech (tech_id, tech_name, city, population) VALUES (1, \u0027Smart Grids\u0027, \u0027Buenos Aires\u0027, 3000000), (2, \u0027Smart Lighting\u0027, \u0027Rio de Janeiro\u0027, 6500000), (3, \u0027Smart Traffic Management\u0027, \u0027Santiago\u0027, 2500000);", + "sql": "SELECT tech_name, city FROM smart_city_tech WHERE population \u003e 2000000 AND city IN (\u0027Buenos Aires\u0027, \u0027Rio de Janeiro\u0027, \u0027Santiago\u0027);", + "sql_explanation": "This query lists all smart city technology adoptions in the South American cities with a population greater than 2 million by selecting the \u0027tech_name\u0027 and \u0027city\u0027 columns where the \u0027population\u0027 is greater than 2,000,000 and the \u0027city\u0027 is in the list of (\u0027Buenos Aires\u0027, \u0027Rio de Janeiro\u0027, \u0027Santiago\u0027)." +}, { + "id": "2695", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total carbon offset of projects in the Latin American region?", + "sql_context": "CREATE TABLE CarbonOffset (id INT, country VARCHAR(50), project_offset INT); INSERT INTO CarbonOffset (id, country, project_offset) VALUES (1, \u0027Brazil\u0027, 12000), (2, \u0027Argentina\u0027, 8000), (3, \u0027Mexico\u0027, 15000), (4, \u0027Colombia\u0027, 10000);", + "sql": "SELECT SUM(project_offset) FROM CarbonOffset WHERE country IN (\u0027Brazil\u0027, \u0027Argentina\u0027, \u0027Mexico\u0027, \u0027Colombia\u0027, \u0027Peru\u0027);", + "sql_explanation": "This SQL query calculates the total carbon offset of projects in the Latin American region by summing up the project_offset column values of countries located in Latin America, including Brazil, Argentina, Mexico, Colombia, and Peru." +}, { + "id": "2706", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of green buildings in each city?", + "sql_context": "CREATE TABLE GreenBuildingsByCity (City VARCHAR(50), GreenBuildingCount INT, TotalBuildingCount INT); INSERT INTO GreenBuildingsByCity (City, GreenBuildingCount, TotalBuildingCount) VALUES (\u0027New York\u0027, 5, 10), (\u0027London\u0027, 3, 10);", + "sql": "SELECT City, (GreenBuildingCount * 100.0 / TotalBuildingCount) AS GreenBuildingPercentage FROM GreenBuildingsByCity;", + "sql_explanation": "This SQL query calculates the percentage of green buildings in each city by dividing the green building count by the total building count and multiplying by 100.0." +}, { + "id": "2757", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new Green building project \u0027ProjectE\u0027 in France with renewable energy source \u0027Wind\u0027.", + "sql_context": "CREATE TABLE green_buildings (project_name VARCHAR(50), country VARCHAR(50), renewable_energy_source VARCHAR(50));", + "sql": "INSERT INTO green_buildings (project_name, country, renewable_energy_source) VALUES (\u0027ProjectE\u0027, \u0027France\u0027, \u0027Wind\u0027);", + "sql_explanation": "This query inserts a new Green building project \u0027ProjectE\u0027 in France with renewable energy source \u0027Wind\u0027 by adding a new record to the green_buildings table with the specified values." +}, { + "id": "2858", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of smart city projects in the country of Japan, and what is their total budget?", + "sql_context": "CREATE TABLE smart_city_projects (id INT, name VARCHAR(255), country VARCHAR(255), budget FLOAT);", + "sql": "SELECT COUNT(*) AS total_projects, SUM(budget) AS total_budget FROM smart_city_projects WHERE country \u003d \u0027Japan\u0027;", + "sql_explanation": "The SQL query calculates the total number of smart city projects in Japan and their total budget. It uses the COUNT(*) function to count the number of rows and the SUM() function to calculate the total budget." +}, { + "id": "3160", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record into the \u0027green_buildings\u0027 table for the building named \u0027CN Tower\u0027 in \u0027Toronto\u0027 with a \u0027Gold\u0027 certification.", + "sql_context": "CREATE TABLE green_buildings (id INT, building_name VARCHAR(50), city VARCHAR(50), certification VARCHAR(50));", + "sql": "INSERT INTO green_buildings (building_name, city, certification) VALUES (\u0027CN Tower\u0027, \u0027Toronto\u0027, \u0027Gold\u0027);", + "sql_explanation": "The INSERT INTO statement adds a new record to the \u0027green_buildings\u0027 table for the building named \u0027CN Tower\u0027 in \u0027Toronto\u0027 with a \u0027Gold\u0027 certification." +}, { + "id": "3219", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total energy consumption of smart city initiatives in Tokyo, Japan, for the year 2020?", + "sql_context": "CREATE TABLE energy (id INT, city VARCHAR(20), country VARCHAR(20), year INT, energy_consumption FLOAT); INSERT INTO energy (id, city, country, year, energy_consumption) VALUES (1, \u0027Tokyo\u0027, \u0027Japan\u0027, 2019, 12000), (2, \u0027Tokyo\u0027, \u0027Japan\u0027, 2020, 15000), (3, \u0027Tokyo\u0027, \u0027Japan\u0027, 2021, 16000);", + "sql": "SELECT SUM(energy_consumption) FROM energy WHERE city \u003d \u0027Tokyo\u0027 AND country \u003d \u0027Japan\u0027 AND year \u003d 2020;", + "sql_explanation": "Calculate the total energy consumption of smart city initiatives in Tokyo, Japan, for the year 2020." +}, { + "id": "3421", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average energy savings of green building projects in New York?", + "sql_context": "CREATE TABLE green_building_projects (id INT, project_name VARCHAR(50), city VARCHAR(50), state VARCHAR(50), country VARCHAR(50), energy_savings FLOAT); INSERT INTO green_building_projects (id, project_name, city, state, country, energy_savings) VALUES (1, \u0027New York Green Building\u0027, \u0027New York\u0027, \u0027NY\u0027, \u0027USA\u0027, 20.5);", + "sql": "SELECT AVG(energy_savings) FROM green_building_projects WHERE city \u003d \u0027New York\u0027 AND state \u003d \u0027NY\u0027;", + "sql_explanation": "This query calculates the average energy savings of green building projects in New York by finding the average of the energy_savings values in the green_building_projects table where the city is \u0027New York\u0027 and the state is \u0027NY\u0027." +}, { + "id": "3520", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many renewable energy projects are there in Sydney, Australia?", + "sql_context": "CREATE TABLE renewable_energy_projects (id INT, project_name VARCHAR(50), city VARCHAR(50), country VARCHAR(50), project_type VARCHAR(50)); INSERT INTO renewable_energy_projects (id, project_name, city, country, project_type) VALUES (1, \u0027Sydney Wind Farm\u0027, \u0027Sydney\u0027, \u0027Australia\u0027, \u0027Wind\u0027);", + "sql": "SELECT COUNT(*) FROM renewable_energy_projects WHERE city \u003d \u0027Sydney\u0027 AND country \u003d \u0027Australia\u0027;", + "sql_explanation": "This query counts the number of renewable energy projects in Sydney, Australia by finding the number of rows in the renewable_energy_projects table where the city is \u0027Sydney\u0027 and the country is \u0027Australia\u0027." +}, { + "id": "3614", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total installed capacity (in MW) of wind power projects in the European Union?", + "sql_context": "CREATE TABLE wind_projects_3 (project_id INT, project_name TEXT, country TEXT, capacity_mw FLOAT); INSERT INTO wind_projects_3 (project_id, project_name, country, capacity_mw) VALUES (1, \u0027Wind Farm A\u0027, \u0027Germany\u0027, 100.5), (2, \u0027Wind Farm B\u0027, \u0027France\u0027, 200.3);", + "sql": "SELECT SUM(capacity_mw) FROM wind_projects_3 WHERE country IN (\u0027Germany\u0027, \u0027France\u0027, \u0027Spain\u0027);", + "sql_explanation": "The SQL query calculates the total installed capacity (in MW) of wind power projects in the European Union by grouping the records based on the \u0027country\u0027 column, filtering the records to only include those from Germany, France, and Spain, and summing up the \u0027capacity_mw\u0027 values for the filtered records using the aggregate function SUM." +}, { + "id": "3633", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum installed capacity (in MW) of renewable energy projects in the \u0027renewable_projects\u0027 table?", + "sql_context": "CREATE TABLE if not exists renewable_projects (project_id INT, project_name VARCHAR(255), location VARCHAR(255), installed_capacity FLOAT);", + "sql": "SELECT MAX(installed_capacity) FROM renewable_projects WHERE installed_capacity IS NOT NULL;", + "sql_explanation": "This query calculates the maximum installed capacity of renewable energy projects by finding the highest \u0027installed_capacity\u0027 value in the \u0027renewable_projects\u0027 table." +}, { + "id": "3809", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total installed capacity of hydroelectric power plants in the province of Quebec?", + "sql_context": "CREATE TABLE hydroelectric_power_plants (id INT, plant_name VARCHAR(50), province VARCHAR(50), installed_capacity FLOAT); INSERT INTO hydroelectric_power_plants (id, plant_name, province, installed_capacity) VALUES (1, \u0027Quebec Hydroelectric Power Plant\u0027, \u0027Quebec\u0027, 5000);", + "sql": "SELECT SUM(installed_capacity) FROM hydroelectric_power_plants WHERE province \u003d \u0027Quebec\u0027;", + "sql_explanation": "This SQL query calculates the total installed capacity of hydroelectric power plants in the province of Quebec by summing up the installed_capacity values from the hydroelectric_power_plants table where the province is \u0027Quebec\u0027." +}, { + "id": "3953", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the green building names and certification ratings in Japan?", + "sql_context": "CREATE TABLE GreenBuildings (id INT, name VARCHAR(50), city VARCHAR(50), state VARCHAR(50), country VARCHAR(50), certification VARCHAR(50), certification_rating INT); INSERT INTO GreenBuildings (id, name, city, state, country, certification, certification_rating) VALUES (4, \u0027SakuraEco\u0027, \u0027Tokyo\u0027, \u0027Tokyo\u0027, \u0027Japan\u0027, \u0027CASBEE Gold\u0027, 80);", + "sql": "SELECT g.name, g.certification_rating FROM GreenBuildings g WHERE g.country \u003d \u0027Japan\u0027;", + "sql_explanation": "Selecting green building names and certification ratings in Japan from the GreenBuildings table." +}, { + "id": "3981", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of green buildings in Australia and their average energy savings?", + "sql_context": "CREATE TABLE green_buildings (id INT, building_type VARCHAR(50), energy_savings FLOAT, country VARCHAR(50)); INSERT INTO green_buildings (id, building_type, energy_savings, country) VALUES (1, \u0027Residential\u0027, 12.5, \u0027Australia\u0027), (2, \u0027Commercial\u0027, 20.7, \u0027Canada\u0027), (3, \u0027Industrial\u0027, 30.4, \u0027Japan\u0027), (4, \u0027Public\u0027, 16.3, \u0027Australia\u0027);", + "sql": "SELECT COUNT(*), AVG(energy_savings) FROM green_buildings WHERE country \u003d \u0027Australia\u0027;", + "sql_explanation": "The SQL query calculates the total number of green buildings in Australia and their average energy savings by filtering the records based on the country and then applying the COUNT and AVG functions to the result set." +}, { + "id": "4096", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add a new wind turbine with id 5 and capacity 2000 in the \u0027renewable_energy\u0027 table", + "sql_context": "CREATE TABLE renewable_energy (id INT, type VARCHAR(50), capacity INT);", + "sql": "INSERT INTO renewable_energy (id, type, capacity) VALUES (5, \u0027wind turbine\u0027, 2000);", + "sql_explanation": "1. Target the \u0027renewable_energy\u0027 table. 2. Insert a new row with \u0027id\u0027 5, \u0027type\u0027 \u0027wind turbine\u0027, and \u0027capacity\u0027 2000." +}, { + "id": "4170", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new smart city project called \u0027EcoCity\u0027 into the \u0027smart_cities\u0027 table", + "sql_context": "CREATE TABLE smart_cities (id INT PRIMARY KEY, city_name VARCHAR(100), initiative VARCHAR(50));", + "sql": "INSERT INTO smart_cities (city_name, initiative) VALUES (\u0027EcoCity\u0027, \u0027smart city\u0027);", + "sql_explanation": "This query inserts a new record into the \u0027smart_cities\u0027 table with a \u0027city_name\u0027 of \u0027EcoCity\u0027 and an \u0027initiative\u0027 of \u0027smart city\u0027." +}, { + "id": "4201", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total installed capacity of solar energy projects in the \u0027renewable_energy\u0027 table?", + "sql_context": "CREATE TABLE renewable_energy (project_id INT, project_name VARCHAR(100), location VARCHAR(100), energy_type VARCHAR(50), installed_capacity FLOAT); INSERT INTO renewable_energy (project_id, project_name, location, energy_type, installed_capacity) VALUES (1, \u0027Solar Farm 1\u0027, \u0027Australia\u0027, \u0027Solar\u0027, 30.0), (2, \u0027Wind Farm 1\u0027, \u0027Sweden\u0027, \u0027Wind\u0027, 65.3);", + "sql": "SELECT SUM(installed_capacity) FROM renewable_energy WHERE energy_type \u003d \u0027Solar\u0027;", + "sql_explanation": "The SQL query calculates the total installed capacity of solar energy projects by summing the \u0027installed_capacity\u0027 values from the \u0027renewable_energy\u0027 table where the \u0027energy_type\u0027 is \u0027Solar\u0027." +}, { + "id": "4261", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total investment in renewable energy projects in Germany?", + "sql_context": "CREATE TABLE renewable_energy_projects (id INT, project_name VARCHAR(50), city VARCHAR(50), country VARCHAR(50), investment FLOAT); INSERT INTO renewable_energy_projects (id, project_name, city, country, investment) VALUES (1, \u0027Germany Solar Farm\u0027, \u0027Berlin\u0027, \u0027Germany\u0027, 50000000);", + "sql": "SELECT SUM(investment) FROM renewable_energy_projects WHERE country \u003d \u0027Germany\u0027;", + "sql_explanation": "This query calculates the total investment in renewable energy projects in Germany by summing up the investment values in the renewable_energy_projects table where the country is \u0027Germany\u0027." +}, { + "id": "4266", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which Green building certifications were issued in the European Union?", + "sql_context": "CREATE TABLE green_buildings (building_id INT, building_name TEXT, country TEXT, certification TEXT); INSERT INTO green_buildings (building_id, building_name, country, certification) VALUES (1, \u0027Green Building 1\u0027, \u0027Germany\u0027, \u0027LEED\u0027), (2, \u0027Green Building 2\u0027, \u0027France\u0027, \u0027BREEAM\u0027);", + "sql": "SELECT country, certification FROM green_buildings WHERE country LIKE \u0027Europe%\u0027;", + "sql_explanation": "The SQL query selects the \u0027country\u0027 and \u0027certification\u0027 columns from the \u0027green_buildings\u0027 table and filters the records based on the country column to only include those from the European Union using the \u0027LIKE\u0027 keyword and the wildcard \u0027%\u0027 character." +}, { + "id": "4281", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many wind farms are there in Spain with a capacity less than or equal to 5 MW?", + "sql_context": "CREATE TABLE WindFarmCapacity (id INT, country VARCHAR(20), capacity FLOAT); INSERT INTO WindFarmCapacity (id, country, capacity) VALUES (1, \u0027Spain\u0027, 5.5), (2, \u0027Spain\u0027, 4.5), (3, \u0027Germany\u0027, 6.2);", + "sql": "SELECT COUNT(*) FROM WindFarmCapacity WHERE country \u003d \u0027Spain\u0027 AND capacity \u003c\u003d 5;", + "sql_explanation": "This SQL query counts the number of wind farms in Spain with a capacity less than or equal to 5 MW by using the COUNT function with a wildcard (*), and filtering the data with the WHERE clause to only include rows with the country of \u0027Spain\u0027 and a capacity less than or equal to 5." +}, { + "id": "4385", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all smart city initiatives in Canada with their respective start dates.", + "sql_context": "CREATE TABLE smart_cities (initiative_id INT, initiative_name VARCHAR(255), country VARCHAR(255), start_date DATE);", + "sql": "SELECT initiative_name, start_date FROM smart_cities WHERE country \u003d \u0027Canada\u0027;", + "sql_explanation": "This SQL query retrieves all smart city initiatives in Canada along with their start dates by selecting the initiative_name and start_date columns from the smart_cities table where the country is \u0027Canada\u0027." +}, { + "id": "4400", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average carbon offset per project in the \u0027Europe\u0027 region", + "sql_context": "CREATE TABLE carbon_offset_projects (id INT, project_name VARCHAR(100), region VARCHAR(50), carbon_offset FLOAT);", + "sql": "SELECT AVG(carbon_offset) FROM carbon_offset_projects WHERE region \u003d \u0027Europe\u0027;", + "sql_explanation": "This query calculates the average carbon offset per project in the \u0027Europe\u0027 region by averaging the carbon_offset column values where the region is \u0027Europe\u0027." +}, { + "id": "4423", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average CO2 emissions reduction for green building projects in Canada?", + "sql_context": "CREATE TABLE GreenBuildings (building_id INT, building_name VARCHAR(255), country VARCHAR(255), emissions_reduction FLOAT);", + "sql": "SELECT AVG(emissions_reduction) FROM GreenBuildings WHERE country \u003d \u0027Canada\u0027;", + "sql_explanation": "This query calculates the average CO2 emissions reduction for green building projects in Canada by averaging the emissions_reduction values in the GreenBuildings table where the country is \u0027Canada\u0027." +}, { + "id": "4574", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many green buildings in the \u0027green_buildings\u0027 table have a certification of \u0027LEED Platinum\u0027?", + "sql_context": "CREATE TABLE if not exists green_buildings (building_id int, name varchar(255), city varchar(255), certification varchar(50)); INSERT INTO green_buildings (building_id, name, city, certification) VALUES (1, \u0027Green Building 1\u0027, \u0027City A\u0027, \u0027LEED Gold\u0027), (2, \u0027Green Building 2\u0027, \u0027City B\u0027, \u0027BREEAM Excellent\u0027), (3, \u0027Green Building 3\u0027, \u0027City C\u0027, \u0027LEED Platinum\u0027);", + "sql": "SELECT COUNT(*) FROM green_buildings WHERE certification \u003d \u0027LEED Platinum\u0027;", + "sql_explanation": "The SQL query counts the number of rows in the \u0027green_buildings\u0027 table where the \u0027certification\u0027 column is equal to \u0027LEED Platinum\u0027." +}, { + "id": "4579", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average carbon offset for green building certifications in the city of Toronto?", + "sql_context": "CREATE TABLE green_buildings (building_id INT, building_name VARCHAR(255), city VARCHAR(255), certification_level VARCHAR(255), carbon_offset_tons INT);", + "sql": "SELECT AVG(carbon_offset_tons) FROM green_buildings WHERE city \u003d \u0027Toronto\u0027;", + "sql_explanation": "This SQL query calculates the average carbon offset for green building certifications in the city of Toronto by averaging the carbon_offset_tons values in the green_buildings table where the city is \u0027Toronto\u0027." +}, { + "id": "4591", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List renewable energy projects in \u0027North America\u0027 and their capacities", + "sql_context": "CREATE TABLE renewable_energy (id INT, name VARCHAR(100), location VARCHAR(50), capacity FLOAT, region VARCHAR(10)); INSERT INTO renewable_energy (id, name, location, capacity, region) VALUES (1, \u0027Solar Farm A\u0027, \u0027Los Angeles\u0027, 100.5, \u0027North America\u0027); INSERT INTO renewable_energy (id, name, location, capacity, region) VALUES (2, \u0027Wind Farm C\u0027, \u0027New York\u0027, 150.6, \u0027North America\u0027);", + "sql": "SELECT name, capacity FROM renewable_energy WHERE region \u003d \u0027North America\u0027;", + "sql_explanation": "This query retrieves the names and capacities of renewable energy projects in \u0027North America\u0027 by selecting the \u0027name\u0027 and \u0027capacity\u0027 columns from the \u0027renewable_energy\u0027 table where the \u0027region\u0027 is \u0027North America\u0027." +}, { + "id": "4610", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average energy consumption of green buildings in the USA?", + "sql_context": "CREATE TABLE green_buildings (id INT, country VARCHAR(255), city VARCHAR(255), energy_consumption FLOAT); INSERT INTO green_buildings (id, country, city, energy_consumption) VALUES (1, \u0027USA\u0027, \u0027New York\u0027, 5000), (2, \u0027USA\u0027, \u0027Los Angeles\u0027, 4500), (3, \u0027Canada\u0027, \u0027Toronto\u0027, 3000);", + "sql": "SELECT AVG(energy_consumption) FROM green_buildings WHERE country \u003d \u0027USA\u0027;", + "sql_explanation": "This SQL query calculates the average energy consumption of green buildings in the USA. It uses the AVG function to find the mean value of the energy_consumption column, and filters the data to only include rows where the country is \u0027USA\u0027." +}, { + "id": "4687", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all green building initiatives in Latin America and the Caribbean?", + "sql_context": "CREATE TABLE initiatives (id INT, name VARCHAR(50), region VARCHAR(50)); INSERT INTO initiatives (id, name, region) VALUES (1, \u0027Green Building 1\u0027, \u0027Latin America\u0027), (2, \u0027Smart City Initiative\u0027, \u0027North America\u0027), (3, \u0027Renewable Energy Project\u0027, \u0027Asia\u0027), (4, \u0027Green Building 2\u0027, \u0027Caribbean\u0027);", + "sql": "SELECT * FROM initiatives WHERE region IN (\u0027Latin America\u0027, \u0027Caribbean\u0027);", + "sql_explanation": "This SQL query lists all green building initiatives in Latin America and the Caribbean by selecting all records from the \u0027initiatives\u0027 table where the \u0027region\u0027 column matches either \u0027Latin America\u0027 or \u0027Caribbean\u0027." +}, { + "id": "4825", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many renewable energy projects have been implemented in the Asia Pacific region?", + "sql_context": "CREATE TABLE renewable_energy (id INT, project_name VARCHAR(50), location VARCHAR(50)); INSERT INTO renewable_energy (id, project_name, location) VALUES (1, \u0027SolarFarm AP\u0027, \u0027Asia Pacific\u0027), (2, \u0027WindFarm EU\u0027, \u0027Europe\u0027), (3, \u0027HydroAP\u0027, \u0027Asia Pacific\u0027), (4, \u0027GeoThermal NA\u0027, \u0027North America\u0027);", + "sql": "SELECT COUNT(*) FROM renewable_energy WHERE location \u003d \u0027Asia Pacific\u0027;", + "sql_explanation": "This query counts the number of projects in the renewable_energy table located in the Asia Pacific region." +}, { + "id": "4849", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many public bike sharing programs are available in a given city?", + "sql_context": "CREATE TABLE City (city_id INT, city_name VARCHAR(50)); CREATE TABLE Program (program_id INT, program_name VARCHAR(50), city_id INT);", + "sql": "SELECT COUNT(*) as num_programs FROM Program WHERE city_id \u003d \u0027CityId\u0027;", + "sql_explanation": "This query filters the results to only include a given city_id. It then calculates the count of programs, providing the number of public bike sharing programs available in a given city." +}, { + "id": "4852", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average investment in renewable energy projects in the \u0027renewable_energy\u0027 schema located in \u0027TX\u0027.", + "sql_context": "CREATE TABLE renewable_energy (id INT, project_name VARCHAR(50), location VARCHAR(50), investment FLOAT); INSERT INTO renewable_energy (id, project_name, location, investment) VALUES (1, \u0027Solar Farm\u0027, \u0027Arizona\u0027, 12000000), (2, \u0027Wind Turbines\u0027, \u0027Texas\u0027, 8000000);", + "sql": "SELECT AVG(investment) FROM renewable_energy WHERE location \u003d \u0027Texas\u0027;", + "sql_explanation": "This query calculates the average value of the \u0027investment\u0027 column in the \u0027renewable_energy\u0027 table for projects located in \u0027Texas\u0027." +}, { + "id": "4981", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the sum of capacities for smart city projects in \u0027Quebec\u0027?", + "sql_context": "CREATE TABLE SmartCityProjects (id INT, project_name VARCHAR(100), project_type VARCHAR(50), city VARCHAR(50), state VARCHAR(50), capacity INT);", + "sql": "SELECT SUM(capacity) FROM SmartCityProjects WHERE state \u003d \u0027Quebec\u0027;", + "sql_explanation": "This query calculates the sum of \u0027capacity\u0027 values for smart city projects in \u0027Quebec\u0027." +}, { + "id": "5133", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum installed capacity of wind energy projects in Spain?", + "sql_context": "CREATE TABLE wind_projects (id INT, country VARCHAR(50), capacity FLOAT); INSERT INTO wind_projects (id, country, capacity) VALUES (1, \u0027Spain\u0027, 234.56), (2, \u0027Italy\u0027, 123.45), (3, \u0027Spain\u0027, 678.90);", + "sql": "SELECT MAX(capacity) FROM wind_projects WHERE country \u003d \u0027Spain\u0027;", + "sql_explanation": "This query calculates the maximum installed capacity of wind energy projects in Spain by selecting the maximum capacity value from the wind_projects table where the country is \u0027Spain\u0027." +}, { + "id": "5271", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total installed capacity of wind energy projects in the \u0027renewables\u0027 schema?", + "sql_context": "CREATE SCHEMA if not exists renewables; CREATE TABLE if not exists renewables.wind_projects (project_id int, name varchar(255), location varchar(255), installed_capacity float); INSERT INTO renewables.wind_projects (project_id, name, location, installed_capacity) VALUES (1, \u0027Wind Farm 1\u0027, \u0027Country A\u0027, 100.0), (2, \u0027Wind Farm 2\u0027, \u0027Country B\u0027, 150.0);", + "sql": "SELECT SUM(installed_capacity) FROM renewables.wind_projects;", + "sql_explanation": "The SQL query sums the \u0027installed_capacity\u0027 column from the \u0027wind_projects\u0027 table in the \u0027renewables\u0027 schema." +}, { + "id": "5348", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total CO2 offset for each carbon offset initiative?", + "sql_context": "CREATE TABLE CarbonOffsetInitiatives (id INT, name TEXT, co2_offset_amount INT); INSERT INTO CarbonOffsetInitiatives (id, name, co2_offset_amount) VALUES (1, \u0027TreePlanting1\u0027, 500), (2, \u0027EnergyEfficiency1\u0027, 800);", + "sql": "SELECT name, co2_offset_amount FROM CarbonOffsetInitiatives;", + "sql_explanation": "This query displays the total CO2 offset for each carbon offset initiative by selecting the \u0027name\u0027 and \u0027co2_offset_amount\u0027 columns from the \u0027CarbonOffsetInitiatives\u0027 table." +}, { + "id": "5361", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get the total number of green building certifications awarded by agencies in the green_building_agency table.", + "sql_context": "CREATE SCHEMA IF NOT EXISTS green_buildings; CREATE TABLE IF NOT EXISTS green_buildings.green_building_agency ( agency_id INT NOT NULL, name VARCHAR(255) NOT NULL, certification VARCHAR(255) NOT NULL, PRIMARY KEY (agency_id));", + "sql": "SELECT COUNT(*) FROM green_buildings.green_building_agency;", + "sql_explanation": "The SQL query counts all records from the green_building_agency table in the green_buildings schema." +}, { + "id": "5442", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records in the \"CarbonOffset\" table where the \"project_type\" is \"forestry\"", + "sql_context": "CREATE TABLE CarbonOffset (id INT, project_name VARCHAR(20), project_type VARCHAR(20), amount INT); INSERT INTO CarbonOffset (id, project_name, project_type, amount) VALUES (1, \u0027ForestryProject1\u0027, \u0027forestry\u0027, 5000), (2, \u0027RenewableEnergyProject1\u0027, \u0027renewable\u0027, 6000), (3, \u0027EnergyEfficiencyProject1\u0027, \u0027energy_efficiency\u0027, 4000);", + "sql": "DELETE FROM CarbonOffset WHERE project_type \u003d \u0027forestry\u0027;", + "sql_explanation": "This query deletes all records in the CarbonOffset table where the project_type is forestry. It does not specify which columns to delete, so it will delete the entire row." +}, { + "id": "5570", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records with a \u0027country\u0027 value of \u0027China\u0027 from the \u0027green_buildings\u0027 table", + "sql_context": "CREATE TABLE green_buildings (id INT PRIMARY KEY, name VARCHAR(100), country VARCHAR(50));", + "sql": "DELETE FROM green_buildings WHERE country \u003d \u0027China\u0027;", + "sql_explanation": "This query deletes all records in the \u0027green_buildings\u0027 table where the \u0027country\u0027 is \u0027China\u0027." +}, { + "id": "5791", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "basic SQL", + "sql_complexity_description": "basic SQL with a simple select statement", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total number of green buildings in the database.", + "sql_context": "CREATE TABLE green_buildings (id INT, name VARCHAR(255), location VARCHAR(255), certification_level VARCHAR(50));", + "sql": "SELECT COUNT(*) FROM green_buildings;", + "sql_explanation": "This SQL query counts the number of rows in the green_buildings table, which represents the total number of green buildings in the database." +}, { + "id": "100", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of videos with captions from indigenous creators in North America and Oceania, in the last year?", + "sql_context": "CREATE TABLE videos (id INT, title VARCHAR(50), has_captions BOOLEAN, creator_name VARCHAR(50), creator_region VARCHAR(50)); INSERT INTO videos (id, title, has_captions, creator_name, creator_region) VALUES (1, \u0027Video1\u0027, true, \u0027Maria Garcia\u0027, \u0027North America\u0027), (2, \u0027Video2\u0027, false, \u0027David Kim\u0027, \u0027Oceania\u0027), (3, \u0027Video3\u0027, true, \u0027Emily Chen\u0027, \u0027North America\u0027), (4, \u0027Video4\u0027, true, \u0027James Thompson\u0027, \u0027Oceania\u0027);", + "sql": "SELECT creator_region, 100.0 * COUNT(CASE WHEN has_captions \u003d true THEN 1 END) / COUNT(*) as pct FROM videos WHERE creator_region IN (\u0027North America\u0027, \u0027Oceania\u0027) AND post_date \u003e\u003d NOW() - INTERVAL 365 DAY AND creator_name IN (SELECT name FROM creators WHERE is_indigenous \u003d true) GROUP BY creator_region;", + "sql_explanation": "This query calculates the percentage of videos with captions from indigenous creators in North America and Oceania, in the last year. It does this by filtering the rows in the videos table to only include rows where the creator_region is either \u0027North America\u0027 or \u0027Oceania\u0027 and the post date is within the last 365 days. It then filters the rows to only include videos from indigenous creators using a subquery. It then uses a case statement to only include videos with captions in the numerator of the calculation. It uses a group by clause to group the results by creator_region. Finally, it calculates the percentage of videos with captions from indigenous creators in each region by dividing the count of such videos by the total count of videos in each region." +}, { + "id": "1610", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many pieces of \u0027Documentary\u0027 content are available in the \u0027Hindi\u0027 language?", + "sql_context": "CREATE TABLE Languages (LanguageID int, LanguageName varchar(50)); INSERT INTO Languages (LanguageID, LanguageName) VALUES (1, \u0027English\u0027), (2, \u0027Spanish\u0027), (3, \u0027French\u0027), (4, \u0027Hindi\u0027), (5, \u0027Chinese\u0027); CREATE TABLE Content (ContentID int, ContentType varchar(50), LanguageID int); INSERT INTO Content (ContentID, ContentType, LanguageID) VALUES (1, \u0027Movie\u0027, 1), (2, \u0027Podcast\u0027, 2), (3, \u0027Blog\u0027, 3), (4, \u0027News\u0027, 1), (5, \u0027Documentary\u0027, 2), (6, \u0027Blog\u0027, 4), (7, \u0027Music\u0027, 4), (8, \u0027Documentary\u0027, 5), (9, \u0027Documentary\u0027, 4);", + "sql": "SELECT COUNT(ContentID) FROM Content WHERE ContentType \u003d \u0027Documentary\u0027 AND LanguageID \u003d (SELECT LanguageID FROM Languages WHERE LanguageName \u003d \u0027Hindi\u0027);", + "sql_explanation": "This query calculates the number of \u0027Documentary\u0027 content items in the \u0027Hindi\u0027 language by using the SELECT statement with the COUNT function, filtering rows with a ContentType of \u0027Documentary\u0027 and a LanguageID corresponding to \u0027Hindi\u0027 using a subquery." +}, { + "id": "1864", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new content item with type \u0027Music\u0027 and language \u0027Arabic\u0027", + "sql_context": "CREATE TABLE Content (ContentID int, ContentType varchar(50), LanguageID int); CREATE TABLE Languages (LanguageID int, LanguageName varchar(50)); INSERT INTO Languages (LanguageID, LanguageName) VALUES (1, \u0027English\u0027), (2, \u0027Spanish\u0027), (3, \u0027French\u0027), (4, \u0027German\u0027), (6, \u0027Arabic\u0027);", + "sql": "INSERT INTO Content (ContentID, ContentType, LanguageID) VALUES (7, \u0027Music\u0027, (SELECT LanguageID FROM Languages WHERE LanguageName \u003d \u0027Arabic\u0027));", + "sql_explanation": "The query inserts a new content item with a ContentType of \u0027Music\u0027 and a LanguageID corresponding to the language \u0027Arabic\u0027 by using the INSERT INTO statement and the VALUES clause. The LanguageID is selected from the Languages table with a subquery that matches the LanguageName to \u0027Arabic\u0027." +}, { + "id": "2283", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of media contents produced by Asian producers?", + "sql_context": "CREATE TABLE MediaProducers (ProducerID INT PRIMARY KEY, ProducerName VARCHAR(50), Ethnicity VARCHAR(30), YearsInMedia INT); INSERT INTO MediaProducers (ProducerID, ProducerName, Ethnicity, YearsInMedia) VALUES (1, \u0027Producer 1\u0027, \u0027Hispanic\u0027, 12), (2, \u0027Producer 2\u0027, \u0027Asian\u0027, 15);", + "sql": "SELECT SUM(Views) FROM MediaRepresentation WHERE ProducerID IN (SELECT ProducerID FROM MediaProducers WHERE Ethnicity \u003d \u0027Asian\u0027);", + "sql_explanation": "Select the sum of Views from the MediaRepresentation table where the ProducerID is in the MediaProducers table and Ethnicity is Asian." +}, { + "id": "1138", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 5 most streamed K-pop songs in the United States?", + "sql_context": "CREATE TABLE music_streaming (id INT, user_id INT, artist VARCHAR(50), song VARCHAR(50), genre VARCHAR(20), streamed_on DATE, streams INT); CREATE VIEW song_streams AS SELECT song, streamed_on, SUM(streams) AS total_streams FROM music_streaming GROUP BY song, streamed_on;", + "sql": "SELECT song, total_streams FROM song_streams WHERE genre \u003d \u0027K-pop\u0027 AND user_id IN (SELECT id FROM users WHERE country \u003d \u0027United States\u0027) ORDER BY total_streams DESC LIMIT 5;", + "sql_explanation": "The query lists the top 5 most streamed K-pop songs in the United States. It creates a CTE named song_streams that calculates the total streams for each song and date in the music_streaming table. Then, it selects the song and total_streams columns from the song_streams CTE where the genre is \"K-pop\" and user_id is in the list of IDs from the users table where the country is \"United States\", ordered by total_streams in descending order, and limited to the top 5 rows." +}, { + "id": "771", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total mass of all satellites in low Earth orbit (LEO) and geostationary orbit (GEO), and show the percentage of mass in each orbit type.", + "sql_context": "CREATE TABLE Satellite_Orbits ( id INT, satellite_name VARCHAR(255), orbit_type VARCHAR(255), mass FLOAT );", + "sql": "SELECT orbit_type, SUM(mass) as total_mass, (SUM(mass) / (SELECT SUM(mass) FROM Satellite_Orbits) * 100) as percentage FROM Satellite_Orbits WHERE orbit_type IN (\u0027LEO\u0027, \u0027GEO\u0027) GROUP BY orbit_type;", + "sql_explanation": "The SQL query calculates the total mass of all satellites in low Earth orbit (LEO) and geostationary orbit (GEO), and shows the percentage of mass in each orbit type. It applies the SUM function to the \u0027mass\u0027 column, filters the results using the WHERE clause to only include rows with an orbit type of \u0027LEO\u0027 or \u0027GEO\u0027, and groups the data by the \u0027orbit_type\u0027 column. It then calculates the percentage of mass for each orbit type by dividing the total mass of each orbit type by the total mass of all satellites and multiplying by 100." +}, { + "id": "2934", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which satellite was launched on the same day as \u0027Cassini\u0027?", + "sql_context": "CREATE TABLE spacecrafts (spacecraft_id INT, name VARCHAR(100), launch_date DATE); INSERT INTO spacecrafts (spacecraft_id, name, launch_date) VALUES (1, \u0027Voyager 1\u0027, \u00271977-09-05\u0027); INSERT INTO spacecrafts (spacecraft_id, name, launch_date) VALUES (2, \u0027Voyager 2\u0027, \u00271977-08-20\u0027); INSERT INTO spacecrafts (spacecraft_id, name, launch_date) VALUES (3, \u0027Pioneer 10\u0027, \u00271972-03-03\u0027); INSERT INTO spacecrafts (spacecraft_id, name, launch_date) VALUES (4, \u0027Pioneer 11\u0027, \u00271973-04-06\u0027); INSERT INTO spacecrafts (spacecraft_id, name, launch_date) VALUES (5, \u0027Cassini\u0027, \u00271997-10-15\u0027);", + "sql": "SELECT name FROM spacecrafts WHERE launch_date \u003d (SELECT launch_date FROM spacecrafts WHERE name \u003d \u0027Cassini\u0027);", + "sql_explanation": "The SQL query uses a subquery to get the launch_date of \u0027Cassini\u0027. Then, it selects the name of the spacecrafts with the same launch_date as \u0027Cassini\u0027." +}, { + "id": "2971", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total mass of space debris in orbit around Earth?", + "sql_context": "CREATE TABLE SpaceDebris (id INT PRIMARY KEY, name VARCHAR(255), type VARCHAR(255), mass FLOAT, orbit_date DATE);", + "sql": "SELECT SUM(mass) as total_mass FROM SpaceDebris WHERE orbit_date \u003d (SELECT MAX(orbit_date) FROM SpaceDebris);", + "sql_explanation": "The SQL query calculates the total mass of space debris in orbit around Earth by summing the mass column for the records with the latest orbit_date. It uses a subquery to determine the latest orbit_date value." +}, { + "id": "1240", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of energy from renewable sources, per state, compared to the national average in 2020?", + "sql_context": "CREATE TABLE energy_sources (id INT, state VARCHAR(50), year INT, renewable_energy FLOAT); INSERT INTO energy_sources (id, state, year, renewable_energy) VALUES (1, \u0027California\u0027, 2020, 33.0), (2, \u0027Texas\u0027, 2020, 20.0), (3, \u0027United States\u0027, 2020, 12.0);", + "sql": "SELECT state, (renewable_energy / (SELECT AVG(renewable_energy) FROM energy_sources WHERE year \u003d 2020) - 1) * 100.0 AS percentage FROM energy_sources WHERE year \u003d 2020;", + "sql_explanation": "The SQL query calculates the percentage of energy from renewable sources (percentage) per state (state) compared to the national average ((SELECT AVG(renewable_energy) FROM energy_sources WHERE year \u003d 2020)) in 2020 (WHERE year \u003d 2020) by dividing the renewable energy (renewable_energy) by the average renewable energy ((SELECT AVG(renewable_energy) FROM energy_sources WHERE year \u003d 2020)) in 2020 (WHERE year \u003d 2020), subtracting 1, and multiplying it by 100.0 ((renewable_energy / (SELECT AVG(renewable_energy) FROM energy_sources WHERE year \u003d 2020) - 1) * 100.0). The query does not group the records, returning all records for 2020." +}, { + "id": "1655", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete players who haven\u0027t played \u0027PuzzleParadise\u0027", + "sql_context": "player (player_id, name, email, age, gender, country, total_games_played); game (game_id, name, genre, release_year); player_game (player_id, game_id, last_played)", + "sql": "DELETE FROM player WHERE player_id NOT IN (SELECT player_id FROM player_game WHERE game_id \u003d (SELECT game_id FROM game WHERE name \u003d \u0027PuzzleParadise\u0027))", + "sql_explanation": "This query deletes records from the player table where the player_id is not present in the subquery. The subquery fetches player_ids that have played \u0027PuzzleParadise\u0027." +}, { + "id": "4345", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List games from \u0027game_design\u0027 table with ratings higher than the average rating", + "sql_context": "CREATE TABLE game_design (game_id INT, game_name VARCHAR(50), genre VARCHAR(50), rating FLOAT);", + "sql": "SELECT * FROM game_design WHERE rating \u003e (SELECT AVG(rating) FROM game_design);", + "sql_explanation": "This query lists games from the \u0027game_design\u0027 table with ratings higher than the average rating." +}, { + "id": "860", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Provide labor productivity metrics for the past 3 years for mines in Zambia.", + "sql_context": "CREATE TABLE labor_productivity (id INT, mine_id INT, year INT, productivity INT);CREATE TABLE mine (id INT, name VARCHAR(255), location VARCHAR(255)); INSERT INTO mine (id, name, location) VALUES (1, \u0027Zambian Copper\u0027, \u0027Zambia\u0027); INSERT INTO labor_productivity (id, mine_id, year, productivity) VALUES (1, 1, 2020, 150);", + "sql": "SELECT year, productivity FROM labor_productivity JOIN mine ON labor_productivity.mine_id \u003d mine.id WHERE mine.location \u003d \u0027Zambia\u0027 AND year \u003e\u003d (SELECT MAX(year) - 3 FROM labor_productivity);", + "sql_explanation": "This SQL query joins the labor_productivity and mine tables on the mine_id column. It then filters for mines located in Zambia and returns the productivity for the past 3 years." +}, { + "id": "1522", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total production volume of lithium in Argentina for the year 2019?", + "sql_context": "CREATE TABLE production (id INT, mine_id INT, year INT, product TEXT, production_volume INT); INSERT INTO production (id, mine_id, year, product, production_volume) VALUES (1, 1, 2019, \u0027Lithium\u0027, 3000);", + "sql": "SELECT SUM(production_volume) FROM production WHERE year \u003d 2019 AND product \u003d \u0027Lithium\u0027 AND mine_id IN (SELECT id FROM mines WHERE location \u003d \u0027Argentina\u0027);", + "sql_explanation": "This query calculates the total production volume of lithium in Argentina for the year 2019 by filtering the \u0027production\u0027 table for rows with \u0027Argentina\u0027 as the location, \u0027Lithium\u0027 as the product, and the year 2019, then calculating the sum of the production_volume." +}, { + "id": "1575", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the names and production metrics of mines located in a state that starts with the letter \u0027W\u0027 and have a productivity score below 3.8.", + "sql_context": "CREATE TABLE mines (id INT, name VARCHAR(50), location VARCHAR(50), production_metric FLOAT, PRIMARY KEY(id)); INSERT INTO mines (id, name, location, production_metric) VALUES (9, \u0027Westfield Mine\u0027, \u0027Wyoming\u0027, 28000); INSERT INTO mines (id, name, location, production_metric) VALUES (10, \u0027Windy Ridge\u0027, \u0027Washington\u0027, 31000);", + "sql": "SELECT name, production_metric FROM mines WHERE location LIKE \u0027W%\u0027 AND id NOT IN (SELECT mine_id FROM labor_productivity WHERE productivity_score \u003e 3.8);", + "sql_explanation": "This query selects the \u0027name\u0027 and \u0027production_metric\u0027 columns from the \u0027mines\u0027 table where the \u0027location\u0027 starts with \u0027W\u0027 and the \u0027id\u0027 is not in the subquery that filters for \u0027productivity_score\u0027 greater than 3.8 in the \u0027labor_productivity\u0027 table." +}, { + "id": "1665", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total production volume of copper in Chile for the year 2020?", + "sql_context": "CREATE TABLE production (id INT, mine_id INT, year INT, product TEXT, production_volume INT); INSERT INTO production (id, mine_id, year, product, production_volume) VALUES (1, 1, 2020, \u0027Copper\u0027, 25000);", + "sql": "SELECT SUM(production_volume) FROM production WHERE year \u003d 2020 AND product \u003d \u0027Copper\u0027 AND mine_id IN (SELECT id FROM mines WHERE location \u003d \u0027Chile\u0027);", + "sql_explanation": "This query calculates the total production volume of copper in Chile for the year 2020 by filtering the \u0027production\u0027 table for rows with \u0027Chile\u0027 as the location, \u0027Copper\u0027 as the product, and the year 2020, then calculating the sum of the production_volume." +}, { + "id": "1739", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of mines in each location with extraction rates above the overall average.", + "sql_context": "CREATE TABLE mining_sites (id INT, name VARCHAR(50), location VARCHAR(50), extraction_rate DECIMAL(5,2)); INSERT INTO mining_sites (id, name, location, extraction_rate) VALUES (1, \u0027Gold Mine\u0027, \u0027Colorado\u0027, 12.5), (2, \u0027Silver Mine\u0027, \u0027Nevada\u0027, 15.2), (3, \u0027Copper Mine\u0027, \u0027Arizona\u0027, 18.9), (4, \u0027Iron Mine\u0027, \u0027Minnesota\u0027, 21.1);", + "sql": "SELECT location, COUNT(*) as mine_count FROM mining_sites WHERE extraction_rate \u003e (SELECT AVG(extraction_rate) FROM mining_sites) GROUP BY location;", + "sql_explanation": "The SQL query calculates the average extraction rate for all rows in the mining_sites table, then filters for rows where the extraction_rate column is greater than this average value. It then groups the results by the location column and calculates the count of mines for each location." +}, { + "id": "4222", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all expeditions that have a maximum depth less than the average depth of all expeditions?", + "sql_context": "CREATE TABLE Expeditions(ExpeditionID INT, AvgDepth DECIMAL(5,2), MaxDepth INT); INSERT INTO Expeditions(ExpeditionID, AvgDepth, MaxDepth) VALUES (1, 3500.50, 6500), (2, 4200.30, 4200), (3, 2100.75, 2100), (4, 5100.90, 5100), (5, 2900.40, 7000);", + "sql": "DELETE FROM Expeditions WHERE MaxDepth \u003c (SELECT AVG(AvgDepth) FROM Expeditions);", + "sql_explanation": "We delete any records where the MaxDepth field is less than the average depth of all expeditions by using a subquery in the DELETE statement to calculate the average depth, and then deleting any records where the MaxDepth field is less than this value." +}, { + "id": "56", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of days it took to resolve the top 3 longest-lasting ethical labor violation cases.", + "sql_context": "CREATE TABLE Violation_Resolutions (violation_id INT, resolution_date DATE); INSERT INTO Violation_Resolutions (violation_id, resolution_date) VALUES (1, \u00272022-01-15\u0027), (2, \u00272022-02-20\u0027), (3, \u00272022-03-01\u0027), (4, \u00272022-04-10\u0027), (5, \u00272022-05-22\u0027); CREATE TABLE Ethical_Violations (violation_id INT, violation_date DATE); INSERT INTO Ethical_Violations (violation_id, violation_date) VALUES (1, \u00272021-12-01\u0027), (2, \u00272022-01-05\u0027), (3, \u00272022-01-10\u0027), (4, \u00272022-02-01\u0027), (5, \u00272022-03-01\u0027);", + "sql": "SELECT DATEDIFF(day, violation_date, resolution_date) as days_to_resolve FROM (SELECT violation_id, MIN(resolution_date) as resolution_date FROM Violation_Resolutions GROUP BY violation_id ORDER BY resolution_date DESC LIMIT 3) as top_3_violations INNER JOIN Ethical_Violations ON top_3_violations.violation_id \u003d Ethical_Violations.violation_id;", + "sql_explanation": "This query joins the Violation_Resolutions and Ethical_Violations tables on the violation_id column and filters for the top 3 longest-lasting ethical labor violation cases. It then calculates the number of days it took to resolve each case by subtracting the violation_date from the resolution_date." +}, { + "id": "86", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the daily sales trend for the top 5 retailers in the last week?", + "sql_context": "CREATE TABLE Retailer (id INT, name VARCHAR(255)); CREATE TABLE Sales (id INT, retailer_id INT, sale_date DATE, revenue FLOAT);", + "sql": "SELECT r.name, sale_date, SUM(revenue) as daily_sales FROM Sales s JOIN Retailer r ON s.retailer_id \u003d r.id WHERE retailer_id IN (SELECT id FROM Retailer ORDER BY SUM(revenue) DESC LIMIT 5) AND sale_date \u003e\u003d (CURRENT_DATE - INTERVAL \u00271 week\u0027) GROUP BY ROLLUP(r.name, sale_date) ORDER BY r.name, sale_date DESC;", + "sql_explanation": "The SQL query calculates the daily sales trend for the top 5 retailers in the last week by first filtering the records based on the retailer id and sale date. It then joins the Sales and Retailer tables and calculates the daily sales for each retailer using the SUM function. The query uses the ROLLUP function to calculate the total sales for each day and the overall total sales. It then orders the results by retailer name and sale date in descending order." +}, { + "id": "704", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many products in the \u0027Eco-friendly\u0027 category were sold by vendors in the Americas in the past year?", + "sql_context": "CREATE TABLE sales (sale_id INT, sale_date DATE, vendor_id INT, product_category VARCHAR(50)); INSERT INTO sales (sale_id, sale_date, vendor_id, product_category) VALUES (1, \u00272022-01-01\u0027, 1, \u0027Eco-friendly\u0027), (2, \u00272022-02-01\u0027, 2, \u0027Conventional\u0027), (3, \u00272022-03-01\u0027, 3, \u0027Eco-friendly\u0027);", + "sql": "SELECT COUNT(*) FROM sales WHERE product_category \u003d \u0027Eco-friendly\u0027 AND vendor_id IN (SELECT vendor_id FROM vendors WHERE vendor_location LIKE \u0027Americas%\u0027) AND sale_date \u003e\u003d DATEADD(year, -1, GETDATE());", + "sql_explanation": "This SQL query calculates the number of products in the \u0027Eco-friendly\u0027 category sold by vendors in the Americas in the past year by using the COUNT function to count the number of rows that meet the specified criteria. The WHERE clause filters for sales with the \u0027Eco-friendly\u0027 product category and vendors located in the Americas, and the AND clause filters for sales that occurred in the past year using the GETDATE() and DATEADD() functions." +}, { + "id": "1182", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records for products that have not been sold for the last 6 months in stores located in \u0027Washington\u0027 and \u0027Oregon\u0027", + "sql_context": "CREATE TABLE Stores (store_id INT, store_name VARCHAR(50), state VARCHAR(50)); INSERT INTO Stores (store_id, store_name, state) VALUES (1, \u0027Eco-Market\u0027, \u0027Washington\u0027), (2, \u0027Green Vista\u0027, \u0027Oregon\u0027); CREATE TABLE Inventory (inventory_id INT, product_id INT, product_name VARCHAR(50), store_id INT, last_sale_date DATE); INSERT INTO Inventory (inventory_id, product_id, product_name, store_id, last_sale_date) VALUES (1, 1, \u0027Almond Milk\u0027, 1, \u00272022-03-15\u0027), (2, 2, \u0027Quinoa\u0027, 2, \u00272022-05-01\u0027);", + "sql": "DELETE FROM Inventory WHERE last_sale_date \u003c DATE_SUB(CURRENT_DATE, INTERVAL 6 MONTH) AND store_id IN (SELECT store_id FROM Stores WHERE state IN (\u0027Washington\u0027, \u0027Oregon\u0027));", + "sql_explanation": "This query first identifies products in stores from Washington and Oregon that haven\u0027t been sold in the last 6 months using a subquery. Then, it deletes the corresponding records from the Inventory table by checking the last_sale_date." +}, { + "id": "1316", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of Fair Trade products in Mexico?", + "sql_context": "CREATE TABLE products (product_id int, product_name varchar(255), is_fair_trade boolean, country varchar(50)); INSERT INTO products (product_id, product_name, is_fair_trade, country) VALUES (1, \u0027Organic Coffee\u0027, true, \u0027Mexico\u0027);", + "sql": "SELECT 100.0 * COUNT(*) / (SELECT COUNT(*) FROM products WHERE country \u003d \u0027Mexico\u0027) AS pct_fair_trade FROM products WHERE country \u003d \u0027Mexico\u0027 AND is_fair_trade \u003d true;", + "sql_explanation": "Computes the percentage of Fair Trade products in Mexico by dividing the count of Fair Trade products by the total number of products in Mexico and multiplying by 100." +}, { + "id": "2658", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average delivery time for products sourced from Fair Trade suppliers?", + "sql_context": "CREATE TABLE Suppliers (SupplierID int, SupplierName varchar(255), IsFairTrade boolean); INSERT INTO Suppliers (SupplierID, SupplierName, IsFairTrade) VALUES (1, \u0027Supplier A\u0027, true), (2, \u0027Supplier B\u0027, false); CREATE TABLE Orders (OrderID int, SupplierID int, DeliveryTime int);", + "sql": "SELECT AVG(DeliveryTime) FROM Orders WHERE SupplierID IN (SELECT SupplierID FROM Suppliers WHERE IsFairTrade \u003d true);", + "sql_explanation": "This query calculates the average delivery time for orders from Fair Trade suppliers. It first selects the DeliveryTime from the Orders table, but only for orders with a SupplierID that matches a supplier marked as Fair Trade in the Suppliers table. It then calculates the average of these delivery times." +}, { + "id": "1", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which team has the highest number of wins in the \u0027basketball_games\u0027 table?", + "sql_context": "CREATE TABLE basketball_teams (team_id INT, name VARCHAR(50)); CREATE TABLE basketball_games (game_id INT, home_team INT, away_team INT, home_team_score INT, away_team_score INT); INSERT INTO basketball_teams (team_id, name) VALUES (1, \u0027Boston Celtics\u0027), (2, \u0027Los Angeles Lakers\u0027), (3, \u0027Chicago Bulls\u0027); INSERT INTO basketball_games (game_id, home_team, away_team, home_team_score, away_team_score) VALUES (1, 1, 2, 85, 80), (2, 2, 3, 95, 90), (3, 3, 1, 75, 85);", + "sql": "SELECT name AS team, MAX(home_team_wins + away_team_wins) AS highest_wins FROM (SELECT name, CASE WHEN home_team \u003d team_id AND home_team_score \u003e away_team_score THEN 1 ELSE 0 END + CASE WHEN away_team \u003d team_id AND away_team_score \u003e home_team_score THEN 1 ELSE 0 END AS home_team_wins, CASE WHEN home_team \u003d team_id AND home_team_score \u003c away_team_score THEN 1 ELSE 0 END + CASE WHEN away_team \u003d team_id AND away_team_score \u003c home_team_score THEN 1 ELSE 0 END AS away_team_wins FROM basketball_teams JOIN basketball_games ON basketball_teams.team_id \u003d basketball_games.home_team OR basketball_teams.team_id \u003d basketball_games.away_team) AS subquery GROUP BY name;", + "sql_explanation": "The SQL query calculates the number of wins for each team in the \u0027basketball_games\u0027 table by using a subquery to join the \u0027basketball_teams\u0027 table with the \u0027basketball_games\u0027 table on the \u0027team_id\u0027 column. The subquery calculates the number of wins at home and away for each team by using the CASE statement. The outer query then uses the MAX function to find the team with the highest number of wins. The query then uses the GROUP BY clause to group the results by the \u0027name\u0027 column. The query then returns the name of each team and the highest number of wins." +}, { + "id": "67", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who has the highest number of three-point field goals for the Celtics?", + "sql_context": "CREATE TABLE teams (team_id INT, team_name VARCHAR(50)); INSERT INTO teams (team_id, team_name) VALUES (1, \u0027Celtics\u0027); CREATE TABLE games (game_id INT, home_team_id INT, away_team_id INT, home_team_score INT, away_team_score INT, home_team_three_point INT, away_team_three_point INT); INSERT INTO games (game_id, home_team_id, away_team_id, home_team_score, away_team_score, home_team_three_point, away_team_three_point) VALUES (1, 1, 2, 100, 90, 12, 10), (2, 2, 1, 80, 85, 15, 11), (3, 1, 3, 110, 105, 13, 14), (4, 4, 1, 70, 75, 9, 12);", + "sql": "SELECT home_team_three_point, away_team_three_point, (home_team_three_point + away_team_three_point) as total_three_point FROM games WHERE home_team_id \u003d (SELECT team_id FROM teams WHERE team_name \u003d \u0027Celtics\u0027) OR away_team_id \u003d (SELECT team_id FROM teams WHERE team_name \u003d \u0027Celtics\u0027) ORDER BY total_three_point DESC LIMIT 1;", + "sql_explanation": "The SQL query finds the total number of three-point field goals for the Celtics in each game and orders them in descending order. The query then returns the player with the highest number of three-point field goals." +}, { + "id": "97", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of basketball games won by the Toronto Raptors", + "sql_context": "CREATE TABLE teams (id INT, name VARCHAR(50), sport VARCHAR(20)); INSERT INTO teams (id, name, sport) VALUES (1, \u0027Toronto Raptors\u0027, \u0027Basketball\u0027); CREATE TABLE games (id INT, home_team_id INT, away_team_id INT, home_team_score INT, away_team_score INT);", + "sql": "SELECT COUNT(*) FROM games WHERE (home_team_id \u003d (SELECT id FROM teams WHERE name \u003d \u0027Toronto Raptors\u0027 AND sport \u003d \u0027Basketball\u0027) AND home_team_score \u003e away_team_score) OR (away_team_id \u003d (SELECT id FROM teams WHERE name \u003d \u0027Toronto Raptors\u0027 AND sport \u003d \u0027Basketball\u0027) AND away_team_score \u003e home_team_score);", + "sql_explanation": "This query finds the number of games the Toronto Raptors won by checking if the home_team_score is greater than the away_team_score when the Raptors are the home team, or if the away_team_score is greater than the home_team_score when the Raptors are the away team." +}, { + "id": "186", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the team with the highest winning percentage in the current MLB season.", + "sql_context": "CREATE TABLE teams (team_id INT, team_name VARCHAR(50), season_id INT); CREATE TABLE games (game_id INT, team_id INT, won INT, season_id INT);", + "sql": "SELECT team_id, AVG(won) * 100.0 / (SELECT COUNT(*) FROM games WHERE team_id \u003d teams.team_id) AS win_percentage FROM games JOIN teams ON games.team_id \u003d teams.team_id WHERE season_id \u003d (SELECT MAX(season_id) FROM teams) GROUP BY team_id ORDER BY win_percentage DESC LIMIT 1;", + "sql_explanation": "The SQL query calculates the winning percentage for each MLB team in the current season and returns the team with the highest winning percentage." +}, { + "id": "285", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of wins for the New York Yankees?", + "sql_context": "CREATE TABLE teams (id INT, name TEXT, city TEXT, league TEXT); INSERT INTO teams (id, name, city, league) VALUES (5, \u0027New York Yankees\u0027, \u0027New York\u0027, \u0027American League\u0027); CREATE TABLE games (id INT, home_team_id INT, away_team_id INT, home_team_wins INT, away_team_wins INT);", + "sql": "SELECT SUM(home_team_wins) + SUM(away_team_wins) FROM games WHERE home_team_id \u003d (SELECT id FROM teams WHERE name \u003d \u0027New York Yankees\u0027 AND city \u003d \u0027New York\u0027) OR away_team_id \u003d (SELECT id FROM teams WHERE name \u003d \u0027New York Yankees\u0027 AND city \u003d \u0027New York\u0027);", + "sql_explanation": "This query calculates the total number of wins for the New York Yankees. It uses the SUM() function to sum the number of wins by the Yankees as the home team and the number of wins by the Yankees as the away team, and then filters the games using the OR operator." +}, { + "id": "300", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of home runs hit by players from the same team in a single game in the MLB?", + "sql_context": "CREATE TABLE games (game_id INT, date DATE, team1 TEXT, team2 TEXT, home_runs INT);", + "sql": "SELECT AVG(home_runs) FROM games WHERE team1 \u003d (SELECT team1 FROM games WHERE game_id \u003d (SELECT MAX(game_id) FROM games WHERE home_runs \u003e 0)) OR team2 \u003d (SELECT team1 FROM games WHERE game_id \u003d (SELECT MAX(game_id) FROM games WHERE home_runs \u003e 0));", + "sql_explanation": "This query calculates the average number of home runs in a single game by selecting the home_runs column from the games table, filtering the teams to only include the team that hit a home run in the most recent game, and taking the average of the results." +}, { + "id": "944", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the percentage of games won by each MLB team.", + "sql_context": "CREATE TABLE teams (team_id INT, team_name VARCHAR(50)); CREATE TABLE games (game_id INT, team_id INT, won INT);", + "sql": "SELECT team_id, AVG(won) * 100.0 / (SELECT COUNT(*) FROM games WHERE team_id \u003d teams.team_id) AS win_percentage FROM games JOIN teams ON games.team_id \u003d teams.team_id GROUP BY team_id;", + "sql_explanation": "The SQL query calculates the percentage of games won by each MLB team." +}, { + "id": "968", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which teams have the highest average points per game in the last 30 days?", + "sql_context": "CREATE TABLE teams (team_id INT, team_name VARCHAR(255), avg_points DECIMAL(5,2)); INSERT INTO teams (team_id, team_name, avg_points) VALUES (1, \u0027TeamA\u0027, 85.5), (2, \u0027TeamB\u0027, 78.9), (3, \u0027TeamC\u0027, 92.2);", + "sql": "SELECT team_name FROM teams WHERE avg_points \u003d (SELECT MAX(avg_points) FROM teams WHERE avg_points \u003e\u003d (SELECT AVG(avg_points) FROM teams WHERE date \u003e\u003d DATEADD(day, -30, GETDATE())));", + "sql_explanation": "This query first calculates the average points of the past 30 days, then finds the teams with the highest average points by using subqueries and comparing the avg_points with the maximum average points of the last 30 days." +}, { + "id": "1499", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which basketball players have the highest scoring average in the Eastern Conference?", + "sql_context": "CREATE TABLE players (player_id INT, name TEXT, team TEXT, position TEXT, points_per_game FLOAT); INSERT INTO players (player_id, name, team, position, points_per_game) VALUES (1, \u0027John Doe\u0027, \u0027Boston Celtics\u0027, \u0027Guard\u0027, 23.4), (2, \u0027Jane Smith\u0027, \u0027Philadelphia 76ers\u0027, \u0027Forward\u0027, 21.2);", + "sql": "SELECT p.name, p.points_per_game FROM players p WHERE p.team IN (SELECT t.team FROM teams t WHERE t.conference \u003d \u0027Eastern\u0027) ORDER BY p.points_per_game DESC;", + "sql_explanation": "This query first selects the name and points per game of all players. It then filters the teams to only those in the Eastern Conference. Finally, it orders the results by points per game in descending order." +}, { + "id": "1737", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of spectators at the World Cup matches per team?", + "sql_context": "CREATE TABLE world_cup (match_date DATE, home_team VARCHAR(100), away_team VARCHAR(100), spectators INT); INSERT INTO world_cup VALUES (\u00272018-06-14\u0027, \u0027Russia\u0027, \u0027Saudi Arabia\u0027, 78011), (\u00272018-06-15\u0027, \u0027Egypt\u0027, \u0027Uruguay\u0027, 45000), (\u00272018-06-16\u0027, \u0027Morocco\u0027, \u0027Iran\u0027, 39579), (\u00272018-06-17\u0027, \u0027Portugal\u0027, \u0027Spain\u0027, 62716);", + "sql": "SELECT AVG(spectators) FROM world_cup WHERE home_team \u003d ANY (SELECT team FROM (VALUES (\u0027Russia\u0027), (\u0027Egypt\u0027), (\u0027Morocco\u0027), (\u0027Portugal\u0027)) AS t(team));", + "sql_explanation": "This query calculates the average number of spectators at the World Cup matches per team by using the AVG() function to find the average spectators for all matches in the world_cup table where the home_team is any of the teams in the specified list." +}, { + "id": "1876", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which player scored the most points in a single NBA game in the 2020-2021 season?", + "sql_context": "CREATE TABLE nba_scores (game_id INT, player_name VARCHAR(50), team VARCHAR(50), points INT);", + "sql": "SELECT player_name, points FROM nba_scores WHERE points \u003d (SELECT MAX(points) FROM nba_scores WHERE season_year \u003d 2021) AND season_year \u003d 2021;", + "sql_explanation": "This query finds the player who scored the most points in a single NBA game in the 2020-2021 season by finding the maximum points in the nba_scores table for that season and then selecting the player name and points for that game." +}, { + "id": "2126", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 3 players with the most assists in descending order.", + "sql_context": "CREATE TABLE players (player_id INT, name VARCHAR(50), age INT, position VARCHAR(50), team VARCHAR(50), assists INT);", + "sql": "SELECT name, assists FROM (SELECT name, SUM(assists) AS assists FROM players GROUP BY name) AS subquery ORDER BY assists DESC LIMIT 3;", + "sql_explanation": "The subquery calculates the total assists by each player by grouping the players table by the name column and summing the assists column. The outer query then orders the results in descending order based on the total assists and limits the results to the top 3 players." +}, { + "id": "2352", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of goals scored by players from the La Liga in soccer games, excluding players with less than 15 games played?", + "sql_context": "CREATE TABLE La_Liga_Teams (Team VARCHAR(50), Goals INT); INSERT INTO La_Liga_Teams (Team, Goals) VALUES (\u0027Real Madrid\u0027, 83), (\u0027Barcelona\u0027, 75), (\u0027Atletico Madrid\u0027, 67);", + "sql": "SELECT AVG(Goals) FROM La_Liga_Teams WHERE Goals \u003e (SELECT AVG(Goals) FROM La_Liga_Teams) GROUP BY Goals HAVING COUNT(*) \u003e\u003d 15;", + "sql_explanation": "1. Filters the La_Liga_Teams table for goals greater than the average goals for the league. 2. Groups the filtered results by goals. 3. Filters the grouped results to only include those with more than 15 records. 4. Calculates the average goals for the qualifying teams." +}, { + "id": "2548", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average goals conceded per game for the top 5 teams in the English Premier League", + "sql_context": "CREATE TABLE teams (id INT PRIMARY KEY, name TEXT, league TEXT, goals_conceded INT, goals_scored INT, games_played INT); INSERT INTO teams (id, name, league, goals_conceded, goals_scored, games_played) VALUES (1, \u0027Manchester City\u0027, \u0027English Premier League\u0027, 14, 51, 23), (2, \u0027Liverpool\u0027, \u0027English Premier League\u0027, 14, 50, 22), (3, \u0027Chelsea\u0027, \u0027English Premier League\u0027, 17, 45, 23), (4, \u0027Arsenal\u0027, \u0027English Premier League\u0027, 21, 38, 23), (5, \u0027Manchester United\u0027, \u0027English Premier League\u0027, 21, 43, 23), (6, \u0027Tottenham Hotspur\u0027, \u0027English Premier League\u0027, 22, 41, 23);", + "sql": "SELECT AVG(goals_conceded/games_played) FROM (SELECT * FROM teams ORDER BY goals_conceded ASC LIMIT 5) AS top_five_teams;", + "sql_explanation": "This query first selects the top 5 teams with the least number of goals conceded and then calculates the average number of goals conceded per game for those teams." +}, { + "id": "2615", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which football teams have the highest number of wins in their respective leagues?", + "sql_context": "CREATE TABLE football_teams (team_name TEXT, league TEXT, wins INT); INSERT INTO football_teams (team_name, league, wins) VALUES (\u0027Arsenal\u0027, \u0027Premier League\u0027, 21), (\u0027Barcelona\u0027, \u0027La Liga\u0027, 25), (\u0027Juventus\u0027, \u0027Serie A\u0027, 27);", + "sql": "SELECT team_name, league, wins FROM football_teams WHERE wins \u003d (SELECT MAX(wins) FROM football_teams GROUP BY league);", + "sql_explanation": "This SQL query finds the teams with the highest number of wins in their respective leagues. It uses a subquery to find the maximum number of wins in each league and then selects the teams that have this number of wins." +}, { + "id": "773", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of peacekeeping operations that a single country has been involved in?", + "sql_context": "CREATE TABLE Peacekeeping_Operations (id INT, country VARCHAR(50), year INT); INSERT INTO Peacekeeping_Operations (id, country, year) VALUES (1, \u0027United States\u0027, 2018), (2, \u0027United States\u0027, 2019), (3, \u0027United States\u0027, 2020), (4, \u0027United Kingdom\u0027, 2018), (5, \u0027Canada\u0027, 2019), (6, \u0027Australia\u0027, 2020), (7, \u0027France\u0027, 2018), (8, \u0027Germany\u0027, 2019), (9, \u0027Italy\u0027, 2020);", + "sql": "SELECT country, MAX(year) FROM Peacekeeping_Operations GROUP BY country HAVING COUNT(year) \u003d (SELECT MAX(pe.cnt) FROM (SELECT COUNT(*) AS cnt FROM Peacekeeping_Operations GROUP BY country) AS pe);", + "sql_explanation": "This query calculates the maximum number of peacekeeping operations that a single country has been involved in. It does this by using the MAX function on the \u0027year\u0027 column, which finds the maximum year value for each unique \u0027country\u0027 value. The HAVING clause filters the records based on the count of year values being equal to the maximum count of year values for all countries." +}, { + "id": "1549", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the name of each humanitarian assistance program and the year it was launched by countries in the G20?", + "sql_context": "CREATE TABLE humanitarian_assistance (name VARCHAR(50), country VARCHAR(50), year INT); INSERT INTO humanitarian_assistance (name, country, year) VALUES (\u0027Food Aid Program\u0027, \u0027USA\u0027, 1954), (\u0027Disaster Relief Fund\u0027, \u0027Japan\u0027, 1987), (\u0027Emergency Humanitarian Aid\u0027, \u0027Germany\u0027, 2001), (\u0027Humanitarian Action Program\u0027, \u0027France\u0027, 1986), (\u0027International Humanitarian Partnership\u0027, \u0027UK\u0027, 2011), (\u0027Global Humanitarian Assistance Initiative\u0027, \u0027Canada\u0027, 2010), (\u0027Humanitarian Aid Program\u0027, \u0027China\u0027, 1991), (\u0027Humanitarian Relief Program\u0027, \u0027Brazil\u0027, 2005), (\u0027Humanitarian Response Program\u0027, \u0027Russia\u0027, 2008), (\u0027Humanitarian Assistance and Disaster Relief Program\u0027, \u0027India\u0027, 2004), (\u0027Australian Humanitarian Initiative\u0027, \u0027Australia\u0027, 2013), (\u0027South African Humanitarian Assistance Program\u0027, \u0027South Africa\u0027, 2010), (\u0027Mexican Humanitarian Aid Program\u0027, \u0027Mexico\u0027, 1984), (\u0027Indonesian Humanitarian Assistance Program\u0027, \u0027Indonesia\u0027, 2005);", + "sql": "SELECT ha.name, ha.year FROM humanitarian_assistance ha INNER JOIN (SELECT DISTINCT country FROM humanitarian_assistance) hac ON ha.country \u003d hac.country;", + "sql_explanation": "This query retrieves the name and year of each humanitarian assistance program from the humanitarian_assistance table that are from countries in the humanitarian_assistance table." +}, { + "id": "2407", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Remove any duplicate records from the \"peacekeeping_ops\" table", + "sql_context": "CREATE TABLE peacekeeping_ops (id INT, country VARCHAR(50), region VARCHAR(50)); INSERT INTO peacekeeping_ops (id, country, region) VALUES (1, \u0027Nigeria\u0027, \u0027Africa\u0027), (2, \u0027Egypt\u0027, \u0027Africa\u0027), (3, \u0027Iraq\u0027, \u0027Middle East\u0027), (4, \u0027Nigeria\u0027, \u0027Africa\u0027);", + "sql": "DELETE FROM peacekeeping_ops WHERE id IN (SELECT MAX(id) FROM peacekeeping_ops GROUP BY country, region HAVING COUNT(*) \u003e 1);", + "sql_explanation": "This query removes any duplicate records from the \"peacekeeping_ops\" table by grouping by the \"country\" and \"region\" columns, keeping the record with the highest \"id\" value for each group." +}, { + "id": "139", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total revenue of the \u0027Premium\u0027 product line in Q1 2022?", + "sql_context": "CREATE TABLE products (product_id INT, product_line VARCHAR(10)); INSERT INTO products (product_id, product_line) VALUES (1, \u0027Premium\u0027); CREATE TABLE sales (sale_id INT, product_id INT, sale_date DATE); INSERT INTO sales (sale_id, product_id, sale_date) VALUES (1, 1, \u00272022-01-05\u0027);", + "sql": "SELECT SUM(revenue) FROM (SELECT sales.product_id, SUM(price * quantity) AS revenue FROM sales JOIN products ON sales.product_id \u003d products.product_id WHERE sales.sale_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-03-31\u0027 AND products.product_line \u003d \u0027Premium\u0027 GROUP BY sales.product_id) AS subquery;", + "sql_explanation": "This query calculates the total revenue of the \u0027Premium\u0027 product line in Q1 2022 by using a subquery to first sum the revenue for each product_id, then summing the revenue for the \u0027Premium\u0027 product line in Q1 2022." +}, { + "id": "817", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of organic cotton used in garments manufactured in Europe?", + "sql_context": "CREATE TABLE materials (garment_id INT, material VARCHAR(255), organic BOOLEAN); INSERT INTO materials (garment_id, material, organic) VALUES (1, \u0027cotton\u0027, TRUE); INSERT INTO materials (garment_id, material, organic) VALUES (2, \u0027polyester\u0027, FALSE); INSERT INTO materials (garment_id, material, organic) VALUES (3, \u0027silk\u0027, FALSE);", + "sql": "SELECT (COUNT(CASE WHEN organic THEN 1 END)/COUNT(*))*100 FROM materials WHERE material \u003d \u0027cotton\u0027 AND garment_id IN (SELECT garment_id FROM garments WHERE manufacture_location LIKE \u0027Europe%\u0027);", + "sql_explanation": "The query calculates the percentage of organic cotton used in garments manufactured in Europe by counting the number of organic cotton materials and dividing it by the total number of materials, then multiplying the result by 100. It filters the garments based on the manufacture_location starting with \u0027Europe\u0027." +}, { + "id": "2783", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total retail sales for each country, excluding any duplicates based on the \u0027country\u0027 column in the \u0027RetailSales\u0027 table.", + "sql_context": "CREATE TABLE RetailSales (country VARCHAR(50), TotalSales DECIMAL(10,2)); INSERT INTO RetailSales (country, TotalSales) VALUES (\u0027USA\u0027, 12500.00), (\u0027Canada\u0027, 7000.00), (\u0027USA\u0027, 3000.00), (\u0027Brazil\u0027, 9000.00);", + "sql": "SELECT country, SUM(TotalSales) FROM (SELECT DISTINCT country, TotalSales FROM RetailSales) AS A GROUP BY country;", + "sql_explanation": "This SQL query uses a subquery to first eliminate duplicate country values in the \u0027RetailSales\u0027 table. The outer query then groups the remaining records by country and calculates the total retail sales for each country using the SUM function." +}, { + "id": "4442", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete garments with a rating below 3.", + "sql_context": "CREATE TABLE low_rated_garments AS SELECT * FROM garments WHERE rating \u003c 3;", + "sql": "DELETE FROM garments WHERE id IN (SELECT garment_id FROM low_rated_garments);", + "sql_explanation": "This query first creates a table called \u0027low_rated_garments\u0027 that selects all rows from the \u0027garments\u0027 table where the \u0027rating\u0027 column is less than 3. It then deletes all rows from the \u0027garments\u0027 table where the \u0027id\u0027 column appears in the \u0027low_rated_garments\u0027 table." +}, { + "id": "281", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of unique customers who benefited from socially responsible lending in Florida?", + "sql_context": "CREATE TABLE customers (id INT, loan_id INT, name TEXT, city TEXT); INSERT INTO customers (id, loan_id, name, city) VALUES (1, 1, \u0027Ali\u0027, \u0027Florida\u0027), (2, 1, \u0027Aisha\u0027, \u0027Florida\u0027), (3, 2, \u0027Sara\u0027, \u0027Georgia\u0027), (4, 3, \u0027Hamza\u0027, \u0027California\u0027);", + "sql": "SELECT COUNT(DISTINCT customers.id) FROM customers JOIN transactions ON customers.loan_id \u003d transactions.id WHERE transactions.is_shariah_compliant \u003d FALSE AND transactions.id IN (SELECT loan_id FROM transactions WHERE transaction_type \u003d \u0027Disbursement\u0027);", + "sql_explanation": "This query counts the number of unique customers who benefited from socially responsible lending in Florida by joining the \u0027customers\u0027 and \u0027transactions\u0027 tables, filtering for non-Shariah-compliant loans and disbursement transactions, and counting distinct customers." +}, { + "id": "784", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum Shariah-compliant finance asset value in the Gulf Cooperation Council (GCC) countries?", + "sql_context": "CREATE TABLE shariah_compliant_finance (id INT, country VARCHAR(255), asset_value DECIMAL(10,2));", + "sql": "SELECT MAX(asset_value) FROM shariah_compliant_finance WHERE country IN (SELECT country FROM (SELECT DISTINCT country FROM shariah_compliant_finance WHERE region \u003d \u0027Gulf Cooperation Council\u0027) t);", + "sql_explanation": "This query finds all countries in the Gulf Cooperation Council using a subquery, and then uses another subquery with the IN operator to filter Shariah-compliant finance assets based on those countries and the asset value." +}, { + "id": "81", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find offenders with multiple offenses in the \u0027criminal_database\u0027 table who have no ethnicity information?", + "sql_context": "CREATE TABLE criminal_database (offender_id INT, offense VARCHAR(255)); CREATE TABLE offender_demographics (offender_id INT, ethnicity VARCHAR(255));", + "sql": "SELECT offender_id FROM criminal_database WHERE offender_id IN (SELECT offender_id FROM (SELECT offender_id, COUNT(offense) as num_offenses FROM criminal_database GROUP BY offender_id HAVING num_offenses \u003e 1) AS temp WHERE offender_id NOT IN (SELECT offender_id FROM offender_demographics)) GROUP BY offender_id;", + "sql_explanation": "The SQL query uses IN and nested subqueries with GROUP BY, HAVING, and COUNT to find offenders with multiple offenses in the \u0027criminal_database\u0027 table who have no ethnicity information." +}, { + "id": "1004", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of legal technology patents granted to applicants in California since 2010?", + "sql_context": "CREATE TABLE legal_technology_patents (patent_id INT, grant_date DATE, state VARCHAR(20));", + "sql": "SELECT (COUNT(*) * 100.0 / (SELECT COUNT(*) FROM legal_technology_patents WHERE grant_date \u003e\u003d \u00272010-01-01\u0027)) AS percentage FROM legal_technology_patents WHERE state \u003d \u0027California\u0027;", + "sql_explanation": "This query calculates the percentage of legal technology patents granted to applicants in California since 2010 by dividing the count of patents granted in California by the total count of patents granted since 2010." +}, { + "id": "1385", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What percentage of cases in 2021 were resolved through restorative justice?", + "sql_context": "CREATE TABLE cases (id INT, case_name VARCHAR(255), resolution_type VARCHAR(255), year INT); INSERT INTO cases (id, case_name, resolution_type, year) VALUES (1, \u0027Case A\u0027, \u0027Restorative Justice\u0027, 2021), (2, \u0027Case B\u0027, \u0027Court Proceedings\u0027, 2021);", + "sql": "SELECT (COUNT(*) * 100.0 / (SELECT COUNT(*) FROM cases WHERE year \u003d 2021)) as Percentage FROM cases WHERE resolution_type \u003d \u0027Restorative Justice\u0027 AND year \u003d 2021;", + "sql_explanation": "This query calculates the percentage of cases in the year 2021 that were resolved through restorative justice, by using a subquery to calculate the total number of cases in the year 2021 and the main query to calculate the number of cases resolved through restorative justice and the percentage of such cases." +}, { + "id": "1502", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of hours spent on legal research by attorneys at a legal aid clinic in the city of Los Angeles in the month of December 2019?", + "sql_context": "CREATE TABLE legal_aid_clinics (clinic_id INT, city VARCHAR(20), state VARCHAR(20)); CREATE TABLE attorney_hours (attorney_id INT, clinic_id INT, hours INT, month INT, year INT); INSERT INTO legal_aid_clinics (clinic_id, city, state) VALUES (1, \u0027Chicago\u0027, \u0027Illinois\u0027), (2, \u0027Los Angeles\u0027, \u0027California\u0027); INSERT INTO attorney_hours (attorney_id, clinic_id, hours, month, year) VALUES (1, 1, 20, 11, 2019), (2, 1, 15, 11, 2019), (3, 2, 10, 12, 2019), (4, 2, 15, 12, 2019);", + "sql": "SELECT SUM(hours) FROM attorney_hours WHERE clinic_id \u003d (SELECT clinic_id FROM legal_aid_clinics WHERE city \u003d \u0027Los Angeles\u0027) AND month \u003d 12 AND year \u003d 2019;", + "sql_explanation": "This query calculates the total number of hours spent on legal research by attorneys at a legal aid clinic in the city of Los Angeles in the month of December 2019. It does this by using the SUM() function to calculate the sum of the \u0027hours\u0027 column in the \u0027attorney_hours\u0027 table, while filtering for rows where the \u0027clinic_id\u0027 column is equal to the \u0027clinic_id\u0027 of the legal aid clinic in Los Angeles, the \u0027month\u0027 column is equal to December, and the \u0027year\u0027 column is equal to 2019. The \u0027clinic_id\u0027 of the legal aid clinic in Los Angeles is determined using a subquery that selects the \u0027clinic_id\u0027 from the \u0027legal_aid_clinics\u0027 table where the \u0027city\u0027 column is equal to \u0027Los Angeles\u0027." +}, { + "id": "2674", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of hours worked by legal technology professionals in a year?", + "sql_context": "CREATE TABLE legal_tech_professionals (professional_id INT, hours_worked INT, year INT);", + "sql": "SELECT SUM(hours_worked) FROM legal_tech_professionals WHERE year \u003d (SELECT MAX(year) FROM legal_tech_professionals);", + "sql_explanation": "This query finds the total number of hours worked by legal technology professionals in a year. It does this by using a subquery to find the most recent year in the \u0027legal_tech_professionals\u0027 table, and then using the SUM function to find the total value in the \u0027hours_worked\u0027 column for that year." +}, { + "id": "105", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Total number of visitors for each exhibition, including visitors who did not engage with any installations?", + "sql_context": "CREATE TABLE Exhibitions (ExhibitionID INT, Name VARCHAR(50)); INSERT INTO Exhibitions (ExhibitionID, Name) VALUES (1, \u0027Impressionists\u0027), (2, \u0027Ancient Art\u0027); CREATE TABLE Visitors (VisitorID INT, ExhibitionID INT); INSERT INTO Visitors (VisitorID, ExhibitionID) VALUES (1, 1), (2, 1), (3, 2), (4, NULL);", + "sql": "SELECT E.Name, COUNT(V.VisitorID) + (SELECT COUNT(*) FROM Visitors V2 WHERE V2.ExhibitionID IS NULL AND NOT EXISTS (SELECT 1 FROM Visitors V3 WHERE V3.VisitorID \u003d V2.VisitorID)) AS TotalVisitors FROM Exhibitions E LEFT JOIN Visitors V ON E.ExhibitionID \u003d V.ExhibitionID GROUP BY E.ExhibitionID, E.Name;", + "sql_explanation": "Calculate the total number of visitors for each exhibition, including visitors who did not engage with any installations, by joining Exhibitions and Visitors tables, counting the number of visitors for each exhibition, and then adding the number of visitors who did not engage with any installations." +}, { + "id": "119", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of visitors who attended exhibitions in the \u0027History\u0027 category, and the percentage of visitors who attended each exhibition in this category?", + "sql_context": "CREATE TABLE Exhibitions (id INT, name VARCHAR(20), category VARCHAR(20), visitors INT); INSERT INTO Exhibitions VALUES (1, \u0027Exhibition A\u0027, \u0027Art\u0027, 3000), (2, \u0027Exhibition B\u0027, \u0027Science\u0027, 2000), (3, \u0027Exhibition C\u0027, \u0027Art\u0027, 4000), (4, \u0027Exhibition D\u0027, \u0027History\u0027, 5000); CREATE TABLE Visitors (id INT, exhibition_id INT, age INT, country VARCHAR(20)); INSERT INTO Visitors VALUES (1, 1, 35, \u0027USA\u0027), (2, 1, 45, \u0027Canada\u0027), (3, 2, 25, \u0027Mexico\u0027), (4, 3, 50, \u0027Brazil\u0027), (5, 3, 30, \u0027USA\u0027);", + "sql": "SELECT E.category, E.name, SUM(E.visitors) AS total_visitors, (SUM(E.visitors) * 100.0 / (SELECT SUM(visitors) FROM Exhibitions WHERE category \u003d \u0027History\u0027)) AS percentage FROM Exhibitions E INNER JOIN Visitors V ON E.id \u003d V.exhibition_id WHERE E.category \u003d \u0027History\u0027 GROUP BY E.category, E.name;", + "sql_explanation": "This SQL query calculates the total number of visitors who attended exhibitions in the \u0027History\u0027 category, and the percentage of visitors who attended each exhibition in this category. It first filters the records where the category is \u0027History\u0027. Then, it groups the records by the category and name of the exhibition, and calculates the sum of visitors and the percentage of visitors for each group. The percentage is calculated as the sum of visitors for each exhibition divided by the total number of visitors in the \u0027History\u0027 category." +}, { + "id": "292", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update exhibition visitor counts based on actual data", + "sql_context": "CREATE TABLE DigitalExhibitions (exhibition_id INT, exhibition_name VARCHAR(50), estimated_visitors INT);", + "sql": "UPDATE DigitalExhibitions SET visitors \u003d (SELECT COUNT(*) FROM DigitalVisitors WHERE exhibition_id \u003d DigitalExhibitions.exhibition_id) WHERE EXISTS (SELECT * FROM DigitalVisitors WHERE DigitalExhibitions.exhibition_id \u003d DigitalVisitors.exhibition_id);", + "sql_explanation": "The SQL query updates the visitor counts for each digital exhibition based on the actual visitor data, using a subquery and the EXISTS operator." +}, { + "id": "389", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of digital initiatives per museum in the Middle East?", + "sql_context": "CREATE TABLE Museums (id INT, name VARCHAR(255), region VARCHAR(255)); CREATE TABLE Digital_Initiatives (id INT, museum_id INT, initiative VARCHAR(255)); INSERT INTO Museums (id, name, region) VALUES (1, \u0027Metropolitan Museum of Art\u0027, \u0027North America\u0027), (2, \u0027National Museum of Qatar\u0027, \u0027Middle East\u0027); INSERT INTO Digital_Initiatives (id, museum_id, initiative) VALUES (1, 1, \u0027Virtual Tour\u0027), (2, 1, \u0027Online Collection\u0027), (3, 2, \u0027Interactive Exhibit\u0027);", + "sql": "SELECT AVG(digital_initiatives_per_museum) FROM (SELECT COUNT(di.initiative) AS digital_initiatives_per_museum FROM Digital_Initiatives di JOIN Museums m ON di.museum_id \u003d m.id WHERE m.region \u003d \u0027Middle East\u0027 GROUP BY m.id) AS subquery;", + "sql_explanation": "This query calculates the average number of digital initiatives per museum in the Middle East. It first creates a subquery that groups the Digital_Initiatives table by museum_id and counts the number of initiatives for each museum in the Middle East. The outer query then calculates the average of these counts." +}, { + "id": "1412", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the maximum donation amount given by a visitor from \u0027Asia\u0027?", + "sql_context": "CREATE TABLE Donations (DonationID INT, VisitorID INT, Amount DECIMAL(10,2));", + "sql": "SELECT MAX(d.Amount) FROM Donations d JOIN Visitors v ON d.VisitorID \u003d v.VisitorID WHERE v.Country IN (SELECT CountryName FROM Countries WHERE Region \u003d \u0027Asia\u0027);", + "sql_explanation": "This query joins the Donations table with the Visitors table on the VisitorID column, filters the records where the visitor\u0027s country is in the \u0027Asia\u0027 region, and calculates the maximum donation amount." +}, { + "id": "1418", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of visitors to the \u0027Ancient Civilizations\u0027 exhibition by gender?", + "sql_context": "CREATE TABLE Exhibitions (exhibition_id INT, exhibition_name VARCHAR(255)); INSERT INTO Exhibitions (exhibition_id, exhibition_name) VALUES (1, \u0027Ancient Civilizations\u0027); CREATE TABLE Visitors (visitor_id INT, exhibition_id INT, age INT, gender VARCHAR(50)); INSERT INTO Visitors (visitor_id, exhibition_id, age, gender) VALUES (1, 1, 45, \u0027Female\u0027), (2, 1, 35, \u0027Male\u0027), (3, 1, 42, \u0027Female\u0027), (4, 1, 30, \u0027Male\u0027);", + "sql": "SELECT gender, (COUNT(*) * 100.0 / (SELECT COUNT(*) FROM Visitors WHERE exhibition_id \u003d 1)) AS percentage FROM Visitors WHERE exhibition_id \u003d 1 GROUP BY gender;", + "sql_explanation": "This query calculates the percentage of visitors to the \u0027Ancient Civilizations\u0027 exhibition by gender by dividing the count of visitors by gender by the total number of visitors to the exhibition, grouping by gender and filtering for exhibition_id \u003d 1." +}, { + "id": "1528", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the maximum number of visitors per exhibition in New York in 2022?", + "sql_context": "CREATE TABLE ExhibitionVisitors (exhibition_id INT, num_visitors INT);", + "sql": "SELECT exhibition_id, MAX(num_visitors) FROM ExhibitionVisitors WHERE exhibition_id IN (SELECT id FROM Exhibitions WHERE city \u003d \u0027New York\u0027 AND year \u003d 2022)", + "sql_explanation": "The SQL query finds the maximum number of visitors per exhibition by using the MAX function on the num_visitors column, filtering for New York and 2022." +}, { + "id": "2048", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average number of digital exhibits viewed per visitor in Sydney.", + "sql_context": "CREATE TABLE Visitors (id INT, city VARCHAR(50), digital_exhibits INT, visit_year INT); INSERT INTO Visitors (id, city, digital_exhibits, visit_year) VALUES (1, \u0027Sydney\u0027, 3, 2022);", + "sql": "SELECT AVG(digital_exhibits/visitors) FROM (SELECT city, COUNT(DISTINCT id) visitors FROM Visitors WHERE city \u003d \u0027Sydney\u0027 GROUP BY city);", + "sql_explanation": "Calculate the average number of digital exhibits viewed per visitor in Sydney." +}, { + "id": "420", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many tourists visited New Zealand in 2019 from countries with a population of over 100 million?", + "sql_context": "CREATE TABLE tourism_data (visitor_country VARCHAR(50), destination_country VARCHAR(50), visit_year INT); INSERT INTO tourism_data (visitor_country, destination_country, visit_year) VALUES (\u0027China\u0027, \u0027New Zealand\u0027, 2019), (\u0027India\u0027, \u0027New Zealand\u0027, 2019), (\u0027USA\u0027, \u0027New Zealand\u0027, 2019), (\u0027Indonesia\u0027, \u0027New Zealand\u0027, 2019), (\u0027Pakistan\u0027, \u0027New Zealand\u0027, 2019);", + "sql": "SELECT SUM(*) FROM tourism_data JOIN (SELECT DISTINCT visitor_country FROM population_data WHERE population \u003e 100000000) pop ON visitor_country \u003d pop.visitor_country WHERE visit_year \u003d 2019 AND destination_country \u003d \u0027New Zealand\u0027;", + "sql_explanation": "This query calculates the total number of tourists visiting New Zealand in 2019 from countries with a population of over 100 million by joining the tourism_data table with a subquery that selects the distinct visitor_country values from the population_data table where the population is over 100 million. The join is based on the visitor_country column and the resulting table is filtered to include only rows with a visit_year of 2019 and a destination_country of New Zealand. The sum of the rows is then calculated." +}, { + "id": "1596", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the percentage of tourists who visited the United States in 2019", + "sql_context": "CREATE TABLE tourism_stats (destination VARCHAR(255), year INT, visitors INT); INSERT INTO tourism_stats (destination, year, visitors) VALUES (\u0027United States\u0027, 2019, 30000000);", + "sql": "SELECT (visitors / (SELECT SUM(visitors) FROM tourism_stats) * 100) AS percentage FROM tourism_stats WHERE destination \u003d \u0027United States\u0027 AND year \u003d 2019;", + "sql_explanation": "This query calculates the percentage of tourists who visited the United States in 2019 by dividing the number of visitors to the United States in 2019 by the total number of tourists from all destinations in the tourism_stats table." +}, { + "id": "7", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the difference in the number of trees between the tree species with the highest and lowest carbon sequestration rates in the private_lands schema.", + "sql_context": "CREATE TABLE private_lands.carbon_sequestration (species VARCHAR(255), sequestration_rate DECIMAL(5,2));", + "sql": "SELECT species_high.species AS high_species, species_low.species AS low_species, species_high.sequestration_rate - species_low.sequestration_rate AS difference FROM (SELECT species, MAX(sequestration_rate) AS sequestration_rate FROM private_lands.carbon_sequestration GROUP BY species) AS species_high FULL OUTER JOIN (SELECT species, MIN(sequestration_rate) AS sequestration_rate FROM private_lands.carbon_sequestration GROUP BY species) AS species_low ON species_high.sequestration_rate \u003d species_low.sequestration_rate;", + "sql_explanation": "This query finds the difference in the number of trees between the tree species with the highest and lowest carbon sequestration rates in the private_lands schema by using a FULL OUTER JOIN to combine the results of two subqueries. The first subquery calculates the maximum sequestration_rate value in the carbon_sequestration table, and the second subquery calculates the minimum sequestration_rate value in the carbon_sequestration table. The FULL OUTER JOIN combines the results of these two subqueries, and the ON clause matches the sequestration_rate column from both subqueries. The query then calculates the difference in the carbon sequestration rates between the two species by subtracting the sequestration_rate column from the species_low subquery from the sequestration_rate column from the species_high subquery." +}, { + "id": "423", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many temperate and tropical forests have experienced a decrease in carbon sequestration values in the last 5 years?", + "sql_context": "CREATE TABLE forest_carbon_seq (id INT, region VARCHAR(20), year INT, carbon_value FLOAT);", + "sql": "SELECT region, COUNT(*) as num_forests FROM forest_carbon_seq WHERE region IN (\u0027Temperate\u0027, \u0027Tropical\u0027) AND carbon_value \u003c (SELECT carbon_value FROM forest_carbon_seq WHERE region \u003d t.region AND year \u003d t.year - 5) GROUP BY region;", + "sql_explanation": "This query counts the number of temperate and tropical forests that have experienced a decrease in carbon sequestration values in the last 5 years by filtering the data for records with regions \u0027Temperate\u0027 or \u0027Tropical\u0027 and a carbon value less than the carbon value 5 years ago. The query then groups the data by region and calculates the count for each region." +}, { + "id": "1209", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the name of the species with the highest average tree height?", + "sql_context": "CREATE TABLE species (id INT PRIMARY KEY, name VARCHAR(50)); INSERT INTO species (id, name) VALUES (1, \u0027Spruce\u0027); INSERT INTO species (id, name) VALUES (2, \u0027Pine\u0027); CREATE TABLE trees (id INT PRIMARY KEY, species_id INT, height INT); INSERT INTO trees (id, species_id, height) VALUES (1, 1, 30); INSERT INTO trees (id, species_id, height) VALUES (2, 2, 40);", + "sql": "SELECT s.name FROM species s JOIN (SELECT species_id, AVG(height) AS avg_height FROM trees GROUP BY species_id ORDER BY avg_height DESC LIMIT 1) t ON s.id \u003d t.species_id;", + "sql_explanation": "This query retrieves the name of the species with the highest average tree height. It first calculates the average height for each species, orders the results in descending order, and then returns the name of the species with the highest average height." +}, { + "id": "2604", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the species with the lowest timber volume in the national_parks schema.", + "sql_context": "CREATE TABLE national_parks.timber_volume (species VARCHAR(255), volume DECIMAL(5,2));", + "sql": "SELECT species FROM national_parks.timber_volume WHERE volume \u003d (SELECT MIN(volume) FROM national_parks.timber_volume);", + "sql_explanation": "This query finds the species with the lowest timber volume in the national_parks schema by using a subquery to find the minimum volume value in the timber_volume table, and then using the WHERE clause to filter the results to only include rows with a volume value equal to the minimum value." +}, { + "id": "2936", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum hectares of any forest in India?", + "sql_context": "CREATE TABLE Forests (id INT, name VARCHAR(50), hectares FLOAT, country VARCHAR(50)); INSERT INTO Forests (id, name, hectares, country) VALUES (1, \u0027Amazon Rainforest\u0027, 55000000.0, \u0027Brazil\u0027); CREATE TABLE Forest_Hectares (id INT, forest_id INT, hectares FLOAT); INSERT INTO Forest_Hectares (id, forest_id, hectares) VALUES (1, 1, 55000000.0);", + "sql": "SELECT MAX(hectares) FROM Forest_Hectares WHERE forest_id IN (SELECT id FROM Forests WHERE country \u003d \u0027India\u0027);", + "sql_explanation": "This query retrieves the maximum hectares value for forests in India from the Forest_Hectares table. It does this by first selecting the forest_id values for forests in India from the Forests table, and then using those forest_id values to filter the Forest_Hectares table and find the maximum hectares value for those forests." +}, { + "id": "3829", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete the timber production record with the lowest volume value.", + "sql_context": "CREATE TABLE timber_production (id INT, year INT, volume FLOAT); INSERT INTO timber_production (id, year, volume) VALUES (1, 2020, 1200.5), (2, 2021, 1000), (3, 2022, 1700.3);", + "sql": "DELETE FROM timber_production WHERE volume \u003d (SELECT MIN(volume) FROM timber_production);", + "sql_explanation": "The query deletes the record with the minimum \u0027volume\u0027 value in the \u0027timber_production\u0027 table." +}, { + "id": "1148", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum speed of vessels with a safety record below average in the Pacific Ocean?", + "sql_context": "CREATE TABLE vessels (id INT, name TEXT, type TEXT, speed FLOAT, safety_score FLOAT);CREATE TABLE regions (id INT, name TEXT); INSERT INTO vessels (id, name, type, speed, safety_score) VALUES (1, \u0027VesselC\u0027, \u0027Tanker\u0027, 12.3, 7.5); INSERT INTO regions (id, name) VALUES (1, \u0027Pacific\u0027);", + "sql": "SELECT MAX(v.speed) FROM vessels v JOIN regions r ON v.speed \u003c (SELECT AVG(speed) FROM vessels WHERE regions.name \u003d \u0027Pacific\u0027) AND v.region_id \u003d r.id AND r.name \u003d \u0027Pacific\u0027;", + "sql_explanation": "This query calculates the maximum speed of vessels with a safety record below average in the Pacific Ocean. It first calculates the average speed of vessels in the Pacific Ocean. Then, it filters the records where the speed is less than the average speed and the region is the Pacific. Finally, it calculates the maximum speed of those vessels." +}, { + "id": "2447", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add a new safety incident for \u0027VesselD\u0027 on 2022-02-05.", + "sql_context": "CREATE TABLE Vessels (Id INT, Name VARCHAR(50), Type VARCHAR(50), MaxSpeed DECIMAL(5,2)); CREATE TABLE SafetyIncidents (Id INT, VesselId INT, IncidentDate DATE, Description VARCHAR(255));", + "sql": "INSERT INTO SafetyIncidents (VesselId, IncidentDate) VALUES ((SELECT Id FROM Vessels WHERE Name \u003d \u0027VesselD\u0027), \u00272022-02-05\u0027);", + "sql_explanation": "This query adds a new safety incident for \u0027VesselD\u0027 on 2022-02-05 by using the INSERT INTO statement, and selecting the Id of the vessel from the Vessels table where the name is \u0027VesselD\u0027." +}, { + "id": "3488", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many vessels have a name length greater than 15 characters?", + "sql_context": "CREATE TABLE vessels_2 (vessel_id INT, vessel_name VARCHAR(50)); INSERT INTO vessels_2 (vessel_id, vessel_name) VALUES (1, \u0027Sea Titan\u0027), (2, \u0027Harbor Master\u0027), (3, \u0027Marine\u0027), (4, \u0027Ocean Express\u0027);", + "sql": "SELECT COUNT(*) FROM (SELECT vessel_name FROM vessels_2 WHERE LENGTH(vessel_name) \u003e 15) as temp;", + "sql_explanation": "1. Select rows with vessel_name length greater than 15. 2. Wrap the previous query in a subquery to count the number of rows." +}, { + "id": "353", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the success rate of startups founded by women?", + "sql_context": "CREATE TABLE companies (id INT, name TEXT, founder_gender TEXT); INSERT INTO companies (id, name, founder_gender) VALUES (1, \u0027Foobar Inc\u0027, \u0027Female\u0027), (2, \u0027Gizmos Inc\u0027, \u0027Male\u0027), (3, \u0027Widgets Inc\u0027, \u0027Female\u0027), (4, \u0027Doodads Inc\u0027, \u0027Male\u0027), (5, \u0027Thingamajigs Inc\u0027, \u0027Female\u0027), (6, \u0027Whatchamacallits Inc\u0027, \u0027Female\u0027); CREATE TABLE success (company_id INT, is_successful BOOLEAN); INSERT INTO success (company_id, is_successful) VALUES (1, 1), (2, 0), (3, 1), (4, 1), (5, 1), (6, 0);", + "sql": "SELECT COUNT(*) as num_successful_startups, 100.0 * COUNT(*) / (SELECT COUNT(*) FROM companies) as success_rate FROM success JOIN companies ON success.company_id \u003d companies.id WHERE companies.founder_gender \u003d \u0027Female\u0027 AND is_successful \u003d 1;", + "sql_explanation": "We calculate the success rate of startups founded by women. We first join the companies and success tables on the company_id and id columns respectively. Next, we filter the rows to only include startups founded by women and those that are successful. Finally, we calculate the number of successful startups founded by women and the success rate as a percentage of the total number of startups." +}, { + "id": "694", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the maximum altitude reached by \u0027Boeing\u0027 and \u0027Airbus\u0027 aircraft models.", + "sql_context": "CREATE TABLE Flight_Altitude (aircraft_model VARCHAR(255), altitude INT); INSERT INTO Flight_Altitude (aircraft_model, altitude) VALUES (\u0027B737\u0027, 40000), (\u0027A320\u0027, 41000), (\u0027B747\u0027, 43000), (\u0027B787\u0027, 44000), (\u0027A350\u0027, 45000), (\u0027A380\u0027, 46000), (\u0027Boeing\u0027, 47000), (\u0027Airbus\u0027, 48000);", + "sql": "SELECT aircraft_model, altitude FROM Flight_Altitude WHERE aircraft_model IN (\u0027Boeing\u0027, \u0027Airbus\u0027) AND altitude \u003d (SELECT MAX(altitude) FROM Flight_Altitude WHERE aircraft_model IN (\u0027Boeing\u0027, \u0027Airbus\u0027));", + "sql_explanation": "Finds the maximum altitude reached by \u0027Boeing\u0027 and \u0027Airbus\u0027 aircraft models by using a subquery to find the maximum altitude and then filtering for aircraft models with that altitude." +}, { + "id": "1115", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the spacecraft that have been to the International Space Station (ISS) but not to the Moon.", + "sql_context": "CREATE TABLE SpacecraftMissions (SpacecraftID INT, SpacecraftName VARCHAR(50), Mission VARCHAR(50));", + "sql": "SELECT SpacecraftName FROM SpacecraftMissions WHERE Mission LIKE \u0027%ISS%\u0027 AND SpacecraftName NOT IN (SELECT SpacecraftName FROM SpacecraftMissions WHERE Mission LIKE \u0027%Moon%\u0027);", + "sql_explanation": "This query identifies the spacecraft that have been to the International Space Station (ISS) but not to the Moon." +}, { + "id": "2186", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the percentage of accidents involving each aircraft type.", + "sql_context": "CREATE TABLE Accidents (AccidentID INT, Date DATE, Location VARCHAR(50), AircraftType VARCHAR(50), Description TEXT, Fatalities INT); INSERT INTO Accidents (AccidentID, Date, Location, AircraftType, Description, Fatalities) VALUES (13, \u00272019-08-18\u0027, \u0027Rio de Janeiro\u0027, \u0027Boeing 737\u0027, \u0027Flight control issues\u0027, 0), (14, \u00272021-02-20\u0027, \u0027New Delhi\u0027, \u0027Airbus A320\u0027, \u0027Engine failure\u0027, 1), (15, \u00272022-07-25\u0027, \u0027Cape Town\u0027, \u0027Boeing 747\u0027, \u0027Landing gear malfunction\u0027, 2), (16, \u00272023-01-01\u0027, \u0027Sydney\u0027, \u0027Airbus A380\u0027, \u0027Hydraulic failure\u0027, 0), (17, \u00272024-03-14\u0027, \u0027Toronto\u0027, \u0027Boeing 777\u0027, \u0027Fuel leak\u0027, 1);", + "sql": "SELECT AircraftType, 100.0 * COUNT(*) / (SELECT COUNT(*) FROM Accidents) AS AccidentPercentage FROM Accidents GROUP BY AircraftType;", + "sql_explanation": "The SQL query calculates the percentage of accidents involving each aircraft type by dividing the number of accidents for each aircraft type by the total number of accidents and multiplying by 100.0." +}, { + "id": "3182", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 2 manufacturers of satellites by number of satellites deployed?", + "sql_context": "CREATE TABLE Manufacturers (name VARCHAR(50), satellites INT); INSERT INTO Manufacturers (name, satellites) VALUES (\u0027SpaceX\u0027, 200), (\u0027Boeing\u0027, 100), (\u0027Lockheed Martin\u0027, 75);", + "sql": "SELECT name FROM Manufacturers WHERE satellites IN (SELECT MAX(satellites) FROM Manufacturers) LIMIT 2;", + "sql_explanation": "This query uses a subquery to first find the maximum number of satellites deployed by a manufacturer, and then selects the names of the top 2 manufacturers from the Manufacturers table." +}, { + "id": "158", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total CO2 emission for each chemical product in the past month, and the percentage of the total emission?", + "sql_context": "CREATE TABLE product_co2 (product_id INT, co2_date DATE, product_name TEXT, co2_emission INT); INSERT INTO product_co2 (product_id, co2_date, product_name, co2_emission) VALUES (1, \u00272022-02-01\u0027, \u0027Product P\u0027, 5000), (2, \u00272022-03-05\u0027, \u0027Product Q\u0027, 7000), (3, \u00272022-04-10\u0027, \u0027Product R\u0027, 8000);", + "sql": "SELECT product_name, SUM(co2_emission) AS total_co2, 100.0 * SUM(co2_emission) / (SELECT SUM(co2_emission) FROM product_co2 WHERE co2_date \u003e\u003d DATEADD(month, -1, CURRENT_DATE)) AS pct_of_total FROM product_co2 WHERE co2_date \u003e\u003d DATEADD(month, -1, CURRENT_DATE) GROUP BY product_name;", + "sql_explanation": "This query calculates the total CO2 emission for each chemical product in the past month and the percentage of the total emission by filtering the data with the WHERE clause and using the DATEADD function. It calculates the total CO2 emission for each product by using the SUM function and the percentage of the total emission by using the 100.0 * SUM(co2_emission) / (SELECT SUM(co2_emission) FROM product_co2 WHERE co2_date \u003e\u003d DATEADD(month, -1, CURRENT_DATE)) formula. The results are grouped by the product name." +}, { + "id": "237", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which safety protocols were implemented in the \u0027West Coast Chemical Storage\u0027 facility in the past 6 months?", + "sql_context": "CREATE TABLE facilities (facility_id INT, name TEXT); CREATE TABLE safety_protocols (protocol_id INT, name TEXT, implementation_date DATE);", + "sql": "SELECT safety_protocols.name FROM safety_protocols INNER JOIN facilities ON safety_protocols.facility_id \u003d (SELECT facility_id FROM facilities WHERE name \u003d \u0027West Coast Chemical Storage\u0027) WHERE safety_protocols.implementation_date \u003e DATEADD(month, -6, GETDATE());", + "sql_explanation": "The SQL query performs a subquery to get the facility_id for the \u0027West Coast Chemical Storage\u0027 facility. It then performs an inner join between the safety_protocols and facilities tables on the facility_id column, filtering the results to only include rows where the implementation_date is within the past 6 months. This returns the safety protocols that were implemented in the \u0027West Coast Chemical Storage\u0027 facility in the past 6 months." +}, { + "id": "488", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of working hours per employee in a week for chemical plants in the Mid-Atlantic region, and the corresponding employee and plant?", + "sql_context": "CREATE TABLE employees (id INT, plant TEXT, employee TEXT, hours_worked FLOAT, work_date DATE); INSERT INTO employees (id, plant, employee, hours_worked, work_date) VALUES (1, \u0027Mid-Atlantic Plant 1\u0027, \u0027Employee A\u0027, 42, \u00272021-03-17\u0027), (2, \u0027Mid-Atlantic Plant 2\u0027, \u0027Employee B\u0027, 48, \u00272021-05-09\u0027);", + "sql": "SELECT MAX(hours_worked) AS max_hours, plant, employee FROM employees WHERE plant LIKE \u0027Mid-Atlantic%\u0027 GROUP BY plant, employee HAVING max_hours \u003d (SELECT MAX(hours_worked) FROM employees WHERE plant LIKE \u0027Mid-Atlantic%\u0027);", + "sql_explanation": "1. Filter employees in Mid-Atlantic plants. 2. Group employees by plant and employee. 3. Calculate the maximum working hours for each employee. 4. Find the maximum working hours across all employees and plants. 5. Filter employees with the maximum working hours. 6. Show corresponding employee, plant, and maximum working hours." +}, { + "id": "788", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get the chemical code and production_date for the records with the highest production quantity", + "sql_context": "CREATE TABLE chemical_production (production_date DATE, chemical_code VARCHAR(10), quantity INT); INSERT INTO chemical_production (production_date, chemical_code, quantity) VALUES (\u00272021-01-03\u0027, \u0027A123\u0027, 450), (\u00272021-01-07\u0027, \u0027A123\u0027, 620), (\u00272021-01-12\u0027, \u0027A123\u0027, 390);", + "sql": "SELECT chemical_code, production_date, quantity FROM chemical_production WHERE (chemical_code, quantity) IN (SELECT chemical_code, MAX(quantity) FROM chemical_production GROUP BY chemical_code);", + "sql_explanation": "This query gets the chemical code and production_date for the records with the highest production quantity by selecting all records from the chemical_production table where the chemical_code and quantity is in the subquery that selects the chemical_code and max quantity from the chemical_production table grouped by chemical_code." +}, { + "id": "1181", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top three chemical products with the highest sales in Q1 2022, excluding those with safety violations.", + "sql_context": "CREATE TABLE Chemical_Sales (Product_ID INT, Product_Name VARCHAR(50), Sales INT, Safety_Violation BOOLEAN); INSERT INTO Chemical_Sales (Product_ID, Product_Name, Sales, Safety_Violation) VALUES (1, \u0027ProductA\u0027, 1000, FALSE), (2, \u0027ProductB\u0027, 1200, TRUE), (3, \u0027ProductC\u0027, 1500, FALSE), (4, \u0027ProductD\u0027, 800, FALSE);", + "sql": "SELECT Product_Name, Sales FROM (SELECT Product_Name, Sales, RANK() OVER(ORDER BY Sales DESC) as rnk FROM Chemical_Sales WHERE Safety_Violation \u003d FALSE) tmp WHERE rnk \u003c\u003d 3;", + "sql_explanation": "The SQL query identifies the top three chemical products with the highest sales in Q1 2022, excluding those with safety violations. It first creates a table named Chemical_Sales with four columns: Product_ID, Product_Name, Sales, and Safety_Violation. The INSERT statement adds sample data to the table. The subquery then selects the product name and sales, and assigns a rank based on sales in descending order. The outer query filters the results to show only the top three products." +}, { + "id": "79", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average safety rating for products with more than two unique country sources?", + "sql_context": "CREATE TABLE ingredient_sources (id INT PRIMARY KEY, product_name VARCHAR(100), ingredient_name VARCHAR(100), country_of_origin VARCHAR(50)); INSERT INTO ingredient_sources (id, product_name, ingredient_name, country_of_origin) VALUES (1, \u0027Shampoo\u0027, \u0027Aloe Vera\u0027, \u0027Mexico\u0027); INSERT INTO ingredient_sources (id, product_name, ingredient_name, country_of_origin) VALUES (2, \u0027Conditioner\u0027, \u0027Argan Oil\u0027, \u0027Morocco\u0027);", + "sql": "SELECT product_name, AVG(safety_rating) as avg_safety_rating FROM product_safety_records ps INNER JOIN (SELECT product_name, COUNT(DISTINCT country_of_origin) as num_countries FROM ingredient_sources GROUP BY product_name HAVING num_countries \u003e 2) isrc ON ps.product_name \u003d isrc.product_name GROUP BY product_name;", + "sql_explanation": "This query calculates the average safety rating for products that have more than two unique country sources. It does so by using a subquery to get the product_name and number of unique country sources, then joining this result with the product_safety_records table to calculate the average safety rating." +}, { + "id": "130", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total sales of cruelty-free products by region.", + "sql_context": "CREATE TABLE regions (region_id INT, region_name TEXT); CREATE TABLE sales_regions AS SELECT sales.sale_id, sales.product_id, regions.region_id, regions.region_name FROM sales JOIN regions ON sales.sale_country \u003d regions.region_name; INSERT INTO regions VALUES (1, \u0027North America\u0027), (2, \u0027Europe\u0027), (3, \u0027Asia\u0027), (4, \u0027South America\u0027), (5, \u0027Australia\u0027); INSERT INTO sales_regions VALUES (1, 1, 1, \u0027North America\u0027), (2, 3, 1, \u0027North America\u0027), (3, 4, 1, \u0027North America\u0027), (4, 1, 5, \u0027Australia\u0027), (5, 3, 2, \u0027Europe\u0027), (6, 4, 3, \u0027Asia\u0027), (7, 1, 4, \u0027South America\u0027);", + "sql": "SELECT regions.region_name, SUM(sales_regions.sale_id) as total_sales FROM sales_regions JOIN regions ON sales_regions.region_id \u003d regions.region_id WHERE sales_regions.product_id IN (SELECT products.product_id FROM products WHERE products.is_cruelty_free \u003d true) GROUP BY regions.region_name", + "sql_explanation": "Join sales_regions and regions tables on region_id and filter for cruelty-free products using a subquery, then group by region_name and calculate the total sales for each region." +}, { + "id": "136", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the average preference scores for products with only passing safety inspections?", + "sql_context": "CREATE TABLE ConsumerPreferences (ProductID INT, ConsumerID INT, PreferenceScore INT, PRIMARY KEY (ProductID, ConsumerID)); INSERT INTO ConsumerPreferences (ProductID, ConsumerID, PreferenceScore) VALUES (1, 1, 5), (1, 2, 4), (2, 1, 3), (2, 2, 5); CREATE TABLE ProductSafetyRecords (ProductID INT, InspectionDate DATE, Result ENUM(\u0027Pass\u0027, \u0027Fail\u0027)); INSERT INTO ProductSafetyRecords (ProductID, InspectionDate, Result) VALUES (1, \u00272021-01-01\u0027, \u0027Pass\u0027), (1, \u00272021-02-01\u0027, \u0027Pass\u0027), (2, \u00272021-01-01\u0027, \u0027Fail\u0027);", + "sql": "SELECT AVG(PreferenceScore) as AvgScore FROM ConsumerPreferences JOIN ProductSafetyRecords ON ProductID \u003d ProductSafetyRecords.ProductID WHERE Result \u003d \u0027Pass\u0027 GROUP BY ProductID HAVING COUNT(*) \u003d (SELECT COUNT(*) FROM ProductSafetyRecords WHERE ProductID \u003d ProductSafetyRecords.ProductID);", + "sql_explanation": "Calculate the average preference score for each product with only passing inspections by joining the ConsumerPreferences and ProductSafetyRecords tables on ProductID, filtering for passing inspections, grouping by ProductID, and checking if the count of rows is equal to the count of all inspections for each ProductID." +}, { + "id": "464", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average rating for vegan and cruelty-free facial creams?", + "sql_context": "CREATE TABLE product_reviews (product_id INT, is_vegan BOOLEAN, is_cruelty_free BOOLEAN, rating INT); INSERT INTO product_reviews (product_id, is_vegan, is_cruelty_free, rating) VALUES (1, true, true, 4), (2, false, false, 3), (3, true, true, 5);", + "sql": "SELECT AVG(rating) FROM product_reviews pr JOIN (SELECT DISTINCT product_id FROM cosmetic_products WHERE product_name LIKE \u0027%facial cream%\u0027 AND is_vegan \u003d true AND is_cruelty_free \u003d true) cp ON pr.product_id \u003d cp.product_id;", + "sql_explanation": "The SQL query performs a subquery to select the distinct product_ids of vegan and cruelty-free facial creams, then performs a join with the product_reviews table and calculates the average rating." +}, { + "id": "1436", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which ingredients are used in more than 50% of the products?", + "sql_context": "CREATE TABLE ingredients (ingredient_id INT PRIMARY KEY, ingredient_name VARCHAR(50)); CREATE TABLE product_ingredients (product_id INT, ingredient_id INT, PRIMARY KEY (product_id, ingredient_id), FOREIGN KEY (product_id) REFERENCES products(product_id), FOREIGN KEY (ingredient_id) REFERENCES ingredients(ingredient_id)); CREATE TABLE products (product_id INT PRIMARY KEY); INSERT INTO ingredients (ingredient_id, ingredient_name) VALUES (1, \u0027Water\u0027), (2, \u0027Glycerin\u0027), (3, \u0027Shea Butter\u0027); INSERT INTO products (product_id) VALUES (1), (2), (3), (4), (5); INSERT INTO product_ingredients (product_id, ingredient_id) VALUES (1, 1), (1, 2), (2, 1), (2, 3), (3, 2), (3, 3), (4, 1), (4, 2), (5, 1);", + "sql": "SELECT ingredient_name, COUNT(*) as product_count FROM product_ingredients GROUP BY ingredient_id HAVING product_count \u003e (SELECT COUNT(*) * 0.5 FROM products);", + "sql_explanation": "This query counts the number of times each ingredient is used in the product_ingredients table, groups them by ingredient_id, and filters the results to only include ingredients that are used in more than 50% of the products. The subquery calculates 50% of the total number of products." +}, { + "id": "1944", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the total sales of lipsticks and eyeshadows?", + "sql_context": "CREATE TABLE sales (id INT, product_id INT, sales_amount DECIMAL(10,2)); INSERT INTO sales (id, product_id, sales_amount) VALUES (1, 1, 100.00), (2, 1, 120.00), (3, 2, 75.00);", + "sql": "SELECT SUM(sales_amount) as total_sales FROM sales WHERE product_id IN (SELECT id FROM product WHERE category IN (\u0027Lipstick\u0027, \u0027Eyeshadow\u0027));", + "sql_explanation": "This query calculates the total sales from the sales table for lipsticks and eyeshadows by selecting the sum of sales amounts from the sales table where the product ID is in a subquery that selects the IDs of products in the lipstick or eyeshadow category. The subquery uses the IN operator to match the product IDs." +}, { + "id": "1948", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of cosmetic products in the database that are vegan and cruelty-free?", + "sql_context": "CREATE TABLE products (product_id INT, product_name VARCHAR(100), vegan BOOLEAN, certification VARCHAR(20)); INSERT INTO products VALUES (1, \u0027Mascara\u0027, true, \u0027cruelty-free\u0027), (2, \u0027Lipstick\u0027, false, \u0027not_certified\u0027), (3, \u0027Foundation\u0027, true, \u0027cruelty-free\u0027);", + "sql": "SELECT 100.0 * COUNT(*) / (SELECT COUNT(*) FROM products) AS percentage FROM products WHERE vegan \u003d true AND certification \u003d \u0027cruelty-free\u0027;", + "sql_explanation": "The query calculates the count of rows that are both vegan and cruelty-free and divides this by the total number of rows in the products table. The result is multiplied by 100 to get the percentage." +}, { + "id": "225", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the difference in mental health scores between the student with the highest grade and the student with the lowest grade?", + "sql_context": "CREATE TABLE students (id INT, name VARCHAR(50), grade FLOAT, mental_health_score INT); INSERT INTO students (id, name, grade, mental_health_score) VALUES (1, \u0027John Doe\u0027, 85.5, 70); INSERT INTO students (id, name, grade, mental_health_score) VALUES (2, \u0027Jane Smith\u0027, 68.0, 85);", + "sql": "SELECT (SELECT mental_health_score FROM students WHERE id \u003d (SELECT id FROM students WHERE grade \u003d (SELECT MAX(grade) FROM students))) - (SELECT mental_health_score FROM students WHERE id \u003d (SELECT id FROM students WHERE grade \u003d (SELECT MIN(grade) FROM students)));", + "sql_explanation": "Calculate the difference in mental health scores between the student with the highest grade and the student with the lowest grade." +}, { + "id": "481", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of students who have accessed open educational resources in the last year?", + "sql_context": "CREATE TABLE students (id INT, name VARCHAR(255)); CREATE TABLE resources (id INT, name VARCHAR(255), access_date DATE); CREATE TABLE student_resources (student_id INT, resource_id INT);", + "sql": "SELECT 100.0 * COUNT(sr.student_id) / (SELECT COUNT(s.id) FROM students s) as pct_accessed FROM student_resources sr JOIN resources r ON sr.resource_id \u003d r.id WHERE r.access_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR);", + "sql_explanation": "This SQL query joins the student_resources table with the resources table on the resource_id column. It then filters the results to only include resources that were accessed in the last year and calculates the percentage of students who have accessed open educational resources." +}, { + "id": "248", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the name and birthplace of the artist with the most artwork in the \u0027Post-Impressionist\u0027 period?", + "sql_context": "CREATE TABLE Artworks (ArtworkID INT, ArtistID INT, Title VARCHAR(255), Period VARCHAR(255)); INSERT INTO Artworks VALUES (1, 1, \u0027The Starry Night\u0027, \u0027Post-Impressionist\u0027); CREATE TABLE Artists (ArtistID INT, Name VARCHAR(255), Birthplace VARCHAR(255)); INSERT INTO Artists VALUES (1, \u0027Vincent van Gogh\u0027, \u0027Netherlands\u0027);", + "sql": "SELECT Artists.Name, Artists.Birthplace FROM Artists INNER JOIN (SELECT ArtistID, COUNT(*) AS ArtworkCount FROM Artworks WHERE Period \u003d \u0027Post-Impressionist\u0027 GROUP BY ArtistID) SubQuery ON Artists.ArtistID \u003d SubQuery.ArtistID ORDER BY ArtworkCount DESC LIMIT 1;", + "sql_explanation": "The SQL query first creates a subquery that counts the number of artworks per artist for the \u0027Post-Impressionist\u0027 period. Then, it joins the Artists table with this subquery based on the ArtistID. The query orders the results by the ArtworkCount in descending order and limits the result to 1, returning the name and birthplace of the artist with the most artwork in the specified period." +}, { + "id": "143", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average claim amount for policyholders with multiple claims in the last 6 months?", + "sql_context": "CREATE TABLE Claims (ClaimID int, ClaimDate date, ClaimAmount decimal(10, 2), PolicyType varchar(50), PolicyholderID int); INSERT INTO Claims (ClaimID, ClaimDate, ClaimAmount, PolicyType, PolicyholderID) VALUES (1, \u00272022-01-15\u0027, 4500.00, \u0027Auto\u0027, 1001), (2, \u00272022-02-03\u0027, 3200.00, \u0027Home\u0027, 1002), (3, \u00272022-03-17\u0027, 5700.00, \u0027Auto\u0027, 1001), (4, \u00272022-04-01\u0027, 6100.00, \u0027Life\u0027, 1004), (5, \u00272022-05-12\u0027, 4200.00, \u0027Auto\u0027, 1001), (6, \u00272022-06-20\u0027, 3800.00, \u0027Home\u0027, 1002); CREATE TABLE Policyholders (PolicyholderID int, FirstName varchar(50), LastName varchar(50)); INSERT INTO Policyholders (PolicyholderID, FirstName, LastName) VALUES (1001, \u0027Mohammed\u0027, \u0027Ali\u0027), (1002, \u0027Sarah\u0027, \u0027Smith\u0027), (1003, \u0027Pedro\u0027, \u0027Gonzales\u0027), (1004, \u0027Anna\u0027, \u0027Jensen\u0027), (1005, \u0027Hiroshi\u0027, \u0027Tanaka\u0027);", + "sql": "SELECT PolicyholderID, AVG(ClaimAmount) AS AvgClaimAmount FROM (SELECT PolicyholderID, ClaimAmount FROM Claims WHERE ClaimDate \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 6 MONTH) GROUP BY PolicyholderID, ClaimAmount HAVING COUNT(DISTINCT PolicyholderID) \u003e 1) AS Subquery GROUP BY PolicyholderID;", + "sql_explanation": "Calculate the average claim amount for policyholders with multiple claims in the last 6 months" +}, { + "id": "308", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total number of policies issued in \u0027California\u0027 having a claim amount greater than $1000.", + "sql_context": "CREATE TABLE policies (id INT, policyholder_id INT, issue_date DATE); CREATE TABLE claims (id INT, policy_id INT, claim_amount FLOAT); INSERT INTO policies (id, policyholder_id, issue_date) VALUES (1, 1, \u00272020-01-01\u0027); INSERT INTO claims (id, policy_id, claim_amount) VALUES (1, 1, 1200.00);", + "sql": "SELECT COUNT(policies.id) FROM policies INNER JOIN claims ON policies.id \u003d claims.policy_id WHERE policies.issue_date \u003e\u003d \u00272020-01-01\u0027 AND claims.claim_amount \u003e 1000 AND policies.policyholder_id IN (SELECT id FROM policyholders WHERE state \u003d \u0027CA\u0027);", + "sql_explanation": "This SQL query finds the total number of policies issued in \u0027California\u0027 having a claim amount greater than $1000. It does this by joining the policies and claims tables on the policy_id column, filtering for policies issued in \u0027California\u0027 (CA) after January 1st, 2020, and having a claim amount greater than $1000. It then counts the number of policies that meet these criteria." +}, { + "id": "363", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "find the number of policyholders from each state who have the highest average claim amount", + "sql_context": "CREATE TABLE Policyholder (PolicyholderID INT, State VARCHAR(255), ClaimAmount DECIMAL(10,2)); INSERT INTO Policyholder VALUES (1, \u0027NY\u0027, 5000), (2, \u0027NJ\u0027, 7000), (3, \u0027NY\u0027, 8000), (4, \u0027NJ\u0027, 6000), (5, \u0027NY\u0027, 9000);", + "sql": "SELECT State, AVG(ClaimAmount) as AvgClaimAmount FROM Policyholder GROUP BY State HAVING AVG(ClaimAmount) \u003d (SELECT MAX(AvgClaimAmount) FROM (SELECT State, AVG(ClaimAmount) as AvgClaimAmount FROM Policyholder GROUP BY State) AS SubQuery);", + "sql_explanation": "This query first calculates the average claim amount for each state using the AVG and GROUP BY functions. The HAVING clause filters the results to only show states with the highest average claim amount, which is determined by a subquery that finds the maximum average claim amount from a subquery that calculates the average claim amount for each state." +}, { + "id": "141", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum delivery time in days for shipments from China to Australia in January 2021?", + "sql_context": "CREATE TABLE deliveries (id INT, shipment_id INT, delivery_date DATE, delivery_time_days INT); INSERT INTO deliveries (id, shipment_id, delivery_date, delivery_time_days) VALUES (1, 1, \u00272021-01-01\u0027, 5); INSERT INTO deliveries (id, shipment_id, delivery_date, delivery_time_days) VALUES (2, 2, \u00272021-01-03\u0027, 7);", + "sql": "SELECT MAX(delivery_time_days) FROM deliveries D INNER JOIN (SELECT id AS shipment_id FROM shipments WHERE origin_country \u003d \u0027China\u0027 AND destination_country \u003d \u0027Australia\u0027 AND EXTRACT(MONTH FROM ship_date) \u003d 1 AND EXTRACT(YEAR FROM ship_date) \u003d 2021) AS S ON D.shipment_id \u003d S.shipment_id;", + "sql_explanation": "This query calculates the maximum delivery time in days for shipments from China to Australia in January 2021 by selecting the maximum delivery_time_days column in the deliveries table, joining it with a subquery that selects the id column from the shipments table where the origin_country is \u0027China\u0027, the destination_country is \u0027Australia\u0027, and the ship_date falls within January 2021." +}, { + "id": "616", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the suppliers who supply to customers in the same city.", + "sql_context": "CREATE TABLE Customers (CustomerID INT, CustomerName VARCHAR(50), WarehouseID INT, City VARCHAR(50), State VARCHAR(50)); INSERT INTO Customers (CustomerID, CustomerName, WarehouseID, City, State) VALUES (1, \u0027Customer A\u0027, 1, \u0027Los Angeles\u0027, \u0027CA\u0027), (2, \u0027Customer B\u0027, 2, \u0027Chicago\u0027, \u0027IL\u0027); CREATE TABLE Suppliers (SupplierID INT, SupplierName VARCHAR(50), WarehouseID INT, City VARCHAR(50), State VARCHAR(50)); INSERT INTO Suppliers (SupplierID, SupplierName, WarehouseID, City, State) VALUES (1, \u0027Supplier A\u0027, 1, \u0027Los Angeles\u0027, \u0027CA\u0027), (2, \u0027Supplier B\u0027, 2, \u0027Atlanta\u0027, \u0027GA\u0027), (3, \u0027Supplier C\u0027, 3, \u0027Miami\u0027, \u0027FL\u0027); CREATE TABLE CustomerSupplier (CustomerID INT, SupplierID INT); INSERT INTO CustomerSupplier (CustomerID, SupplierID) VALUES (1, 1), (2, 2);", + "sql": "SELECT C.CustomerName, S.SupplierName FROM Customers C JOIN Suppliers S ON C.City \u003d S.City WHERE EXISTS (SELECT * FROM CustomerSupplier CS WHERE CS.CustomerID \u003d C.CustomerID AND CS.SupplierID \u003d S.SupplierID);", + "sql_explanation": "This SQL query performs an inner join on the Customers and Suppliers tables based on the City column and filters the results by checking for matching rows in the CustomerSupplier table. It provides information about the customers and suppliers in the same city." +}, { + "id": "2252", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum delivery time for packages shipped to \u0027NYC\u0027?", + "sql_context": "CREATE TABLE Warehouse (id INT, name VARCHAR(20), city VARCHAR(20)); INSERT INTO Warehouse (id, name, city) VALUES (1, \u0027NYC Warehouse\u0027, \u0027NYC\u0027); CREATE TABLE Packages (id INT, warehouse_id INT, delivery_time INT, status VARCHAR(20)); INSERT INTO Packages (id, warehouse_id, delivery_time, status) VALUES (1, 1, 5, \u0027shipped\u0027), (2, 1, 7, \u0027shipped\u0027), (3, 1, 6, \u0027processing\u0027);", + "sql": "SELECT MIN(delivery_time) FROM Packages WHERE warehouse_id \u003d (SELECT id FROM Warehouse WHERE city \u003d \u0027NYC\u0027) AND status \u003d \u0027shipped\u0027;", + "sql_explanation": "This SQL query first finds the id of the \u0027NYC\u0027 warehouse by selecting from the Warehouse table. Then it finds the minimum delivery time for packages with a matching warehouse_id and \u0027shipped\u0027 status in the Packages table." +}, { + "id": "370", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the donation retention rate by age group?", + "sql_context": "CREATE TABLE donor_retention_data (id INT, age INT, donor INT, retained INT); INSERT INTO donor_retention_data (id, age, donor, retained) VALUES (1, 25, 100, 85), (2, 35, 120, 90), (3, 45, 150, 100), (4, 55, 180, 120);", + "sql": "SELECT age_group, AVG(retained/donor*100) as retention_rate FROM (SELECT CASE WHEN age \u003c 30 THEN \u0027Under 30\u0027 WHEN age \u003c 50 THEN \u002730-49\u0027 ELSE \u002750+\u0027 END as age_group, donor, retained FROM donor_retention_data) AS subquery GROUP BY age_group;", + "sql_explanation": "This SQL query calculates the donation retention rate for each age group in the donor_retention_data table. It uses a subquery to create a new column age_group based on the age column, and then uses the AVG function to find the average retained/donor*100 for each age_group. The GROUP BY clause is used to group the results by age_group." +}, { + "id": "1284", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total donation amount given to nonprofits offering programs in the categories of Education, Health, and Environment, excluding any duplicate records?", + "sql_context": "CREATE TABLE donations (id INT, donor TEXT, program TEXT, amount FLOAT); INSERT INTO donations (id, donor, program, amount) VALUES (1, \u0027Donor A\u0027, \u0027Education\u0027, 500.00), (2, \u0027Donor B\u0027, \u0027Health\u0027, 1000.00), (3, \u0027Donor C\u0027, \u0027Environment\u0027, 750.00), (4, \u0027Donor D\u0027, \u0027Education\u0027, 250.00), (5, \u0027Donor A\u0027, \u0027Health\u0027, 750.00);", + "sql": "SELECT SUM(amount) as total_donations FROM donations WHERE program IN (SELECT DISTINCT program FROM donations WHERE program IN (\u0027Education\u0027, \u0027Health\u0027, \u0027Environment\u0027));", + "sql_explanation": "We select distinct records from the donations table based on the program column and filter for nonprofits offering programs in the categories of Education, Health, and Environment. We then calculate the sum of the amounts for these records." +}, { + "id": "1372", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the mission statement for the nonprofit with the lowest average grant amount?", + "sql_context": "CREATE TABLE Nonprofits (NonprofitID INT, Name VARCHAR(50), City VARCHAR(50), State VARCHAR(2), Zip VARCHAR(10), MissionStatement TEXT); CREATE TABLE Grants (GrantID INT, DonorID INT, NonprofitID INT, GrantAmount DECIMAL(10,2), Date DATE);", + "sql": "SELECT MissionStatement FROM Nonprofits N WHERE N.NonprofitID \u003d (SELECT G.NonprofitID FROM Grants G GROUP BY G.NonprofitID ORDER BY AVG(GrantAmount) ASC LIMIT 1);", + "sql_explanation": "The SQL query first calculates the nonprofit ID with the lowest average grant amount using a subquery. It then selects the mission statement for that nonprofit using the WHERE clause." +}, { + "id": "1390", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the volunteers for the \u0027arts_education\u0027 program and when did they last volunteer from the \u0027African\u0027 region?", + "sql_context": "CREATE TABLE region (id INT, name VARCHAR(255)); CREATE TABLE program (id INT, name VARCHAR(255)); CREATE TABLE volunteer (id INT, region_id INT, program_id INT, name VARCHAR(255), last_volunteered DATE); INSERT INTO region (id, name) VALUES (1, \u0027Asia-Pacific\u0027), (2, \u0027Americas\u0027), (3, \u0027Europe\u0027), (4, \u0027African\u0027); INSERT INTO program (id, name) VALUES (1, \u0027youth_mentoring\u0027), (2, \u0027arts_education\u0027); INSERT INTO volunteer (id, region_id, program_id, name, last_volunteered) VALUES (1, 1, 1, \u0027Alice\u0027, \u00272022-01-01\u0027), (2, 2, 2, \u0027Bina\u0027, \u00272022-02-15\u0027), (3, 2, 2, \u0027Candela\u0027, \u00272022-03-05\u0027), (4, 2, 1, \u0027Dinesh\u0027, \u00272022-03-20\u0027), (5, 4, 2, \u0027Eve\u0027, \u00272022-04-01\u0027), (6, 4, 2, \u0027Fatima\u0027, \u00272022-03-10\u0027);", + "sql": "SELECT * FROM volunteer WHERE program_id \u003d (SELECT id FROM program WHERE name \u003d \u0027arts_education\u0027) AND region_id \u003d (SELECT id FROM region WHERE name \u003d \u0027African\u0027);", + "sql_explanation": "This SQL query finds the \u0027id\u0027 of the \u0027African\u0027 region and the \u0027arts_education\u0027 program from the \u0027region\u0027 and \u0027program\u0027 tables, filters the \u0027volunteer\u0027 table by those \u0027region_id\u0027 and \u0027program_id\u0027, and returns the name and last volunteered date for all volunteers from the \u0027African\u0027 region who volunteered for the \u0027arts_education\u0027 program." +}, { + "id": "3843", + "domain": "archeology", + "domain_description": "Detailed records on excavation sites, artifact analysis, historical context, and public outreach in archeology.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete artifacts from the artifacts table that are not associated with any site.", + "sql_context": "CREATE TABLE excavation_sites (id INT, name VARCHAR(255)); CREATE TABLE artifacts (id INT, excavation_site_id INT, year INT, type VARCHAR(255));", + "sql": "DELETE FROM artifacts WHERE excavation_site_id NOT IN (SELECT id FROM excavation_sites);", + "sql_explanation": "The SQL query first selects all ids from the excavation_sites table. It then deletes all rows from the artifacts table where the excavation_site_id is not in the set of selected ids." +}, { + "id": "98", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of daily smart contract deployments on the Stellar network in 2022?", + "sql_context": "CREATE TABLE stellar_smart_contracts (contract_address VARCHAR(42), creation_timestamp TIMESTAMP);", + "sql": "SELECT AVG(num_deployments) AS avg_daily_deployments FROM (SELECT DATE_FORMAT(creation_timestamp, \u0027%Y-%m-%d\u0027) AS tx_date, COUNT(*) AS num_deployments FROM stellar_smart_contracts WHERE creation_timestamp \u003e\u003d \u00272022-01-01 00:00:00\u0027 AND creation_timestamp \u003c \u00272023-01-01 00:00:00\u0027 GROUP BY tx_date) subquery;", + "sql_explanation": "This query calculates the average number of daily smart contract deployments on the Stellar network in 2022. It groups the contracts by their creation date, counts the number of deployments for each day, and then calculates the average number of deployments per day." +}, { + "id": "397", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total value of all token transfers to and from exchanges in the last 7 days?", + "sql_context": "CREATE TABLE token_exchanges (token_name TEXT, from_exchange TEXT, to_exchange TEXT, transfer_amount REAL, timestamp TIMESTAMP); INSERT INTO token_exchanges (token_name, from_exchange, to_exchange, transfer_amount, timestamp) VALUES (\u0027Uniswap\u0027, \u0027Tether\u0027, NULL, 15000, \u00272022-01-10 10:45:22\u0027); INSERT INTO token_exchanges (token_name, from_exchange, to_exchange, transfer_amount, timestamp) VALUES (\u0027Sushiswap\u0027, NULL, \u0027Binance\u0027, 20000, \u00272022-01-11 11:18:35\u0027);", + "sql": "SELECT SUM(transfer_amount) as total_value FROM token_exchanges WHERE timestamp \u003e\u003d (SELECT timestamp FROM token_exchanges ORDER BY timestamp DESC LIMIT 1) - INTERVAL \u00277 days\u0027 AND (from_exchange IS NOT NULL OR to_exchange IS NOT NULL);", + "sql_explanation": "This SQL query calculates the total value of all token transfers to and from exchanges by filtering for rows with a timestamp greater than or equal to the most recent timestamp minus 7 days and a non-null value in either the from_exchange or to_exchange field, then calculating the sum of the transfer_amount field." +}, { + "id": "190", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which cities have had a female mayor for the longest continuous period?", + "sql_context": "CREATE TABLE city (id INT, name VARCHAR(255)); INSERT INTO city (id, name) VALUES (1, \u0027New York\u0027), (2, \u0027Los Angeles\u0027), (3, \u0027Chicago\u0027), (4, \u0027Houston\u0027), (5, \u0027Phoenix\u0027); CREATE TABLE mayor (id INT, city_id INT, name VARCHAR(255), gender VARCHAR(10), start_year INT, end_year INT); INSERT INTO mayor (id, city_id, name, gender, start_year, end_year) VALUES (1, 1, \u0027John Smith\u0027, \u0027Male\u0027, 2018, 2021), (2, 1, \u0027Maria Rodriguez\u0027, \u0027Female\u0027, 2005, 2017), (3, 2, \u0027James Johnson\u0027, \u0027Male\u0027, 2015, 2020), (4, 3, \u0027William Lee\u0027, \u0027Male\u0027, 2000, 2005), (5, 3, \u0027Sarah Lee\u0027, \u0027Female\u0027, 2006, 2019), (6, 4, \u0027Robert Brown\u0027, \u0027Male\u0027, 2010, 2019), (7, 5, \u0027David Garcia\u0027, \u0027Male\u0027, 2005, 2011), (8, 5, \u0027Grace Kim\u0027, \u0027Female\u0027, 2012, 2021);", + "sql": "SELECT c.name, MAX(m.end_year - m.start_year) as max_tenure FROM city c JOIN mayor m ON c.id \u003d m.city_id WHERE m.gender \u003d \u0027Female\u0027 GROUP BY c.name HAVING MAX(m.end_year - m.start_year) \u003e\u003d ALL (SELECT MAX(m2.end_year - m2.start_year) FROM mayor m2 WHERE m2.gender \u003d \u0027Female\u0027)", + "sql_explanation": "This query first selects the names of all cities and the maximum continuous tenure of their female mayors by using the MAX function and the GROUP BY clause. It then filters out the cities that have had a female mayor for the longest continuous period by using the HAVING clause with the ALL keyword." +}, { + "id": "1126", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many registered voters have changed their address in \u0027voter_registration\u0027 table, in the last 6 months?", + "sql_context": "CREATE TABLE voter_registration (voter_id INT, voter_name VARCHAR(255), registration_date DATE, new_address VARCHAR(255));", + "sql": "SELECT COUNT(voter_id) FROM (SELECT voter_id FROM voter_registration WHERE new_address IS NOT NULL AND registration_date \u003e\u003d DATEADD(month, -6, GETDATE())) AS address_changes;", + "sql_explanation": "This query first selects the voter IDs of all voters who have changed their address in the last 6 months, and then calculates the number of such voters by counting the unique voter IDs." +}, { + "id": "3076", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the open data ambassadors in the region of Lombardy?", + "sql_context": "CREATE TABLE regions (id INT PRIMARY KEY, name TEXT); INSERT INTO regions (id, name) VALUES (1, \u0027Lombardy\u0027); CREATE TABLE open_data_ambassadors (id INT PRIMARY KEY, region_id INT, name TEXT); INSERT INTO open_data_ambassadors (id, region_id, name) VALUES (1, 1, \u0027Ambassador A\u0027); INSERT INTO open_data_ambassadors (id, region_id, name) VALUES (2, 1, \u0027Ambassador B\u0027); INSERT INTO open_data_ambassadors (id, region_id, name) VALUES (3, 1, \u0027Ambassador C\u0027);", + "sql": "SELECT name FROM open_data_ambassadors WHERE region_id \u003d (SELECT id FROM regions WHERE name \u003d \u0027Lombardy\u0027);", + "sql_explanation": "The SQL query first selects the id of the region \u0027Lombardy\u0027 from the regions table. Then, it selects the name of the open data ambassadors from the open_data_ambassadors table that have a region_id equal to the id of \u0027Lombardy\u0027." +}, { + "id": "3986", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the transportation methods with a frequency less than the average frequency in the \u0027city_transport\u0027 database.", + "sql_context": "CREATE TABLE transport (id INT, method VARCHAR(50), frequency INT); INSERT INTO transport (id, method, frequency) VALUES (1, \u0027Bicycle\u0027, 1500), (2, \u0027Private Car\u0027, 8000), (3, \u0027Public Bus\u0027, 4000), (4, \u0027Subway\u0027, 3500), (5, \u0027Motorcycle\u0027, 600);", + "sql": "SELECT method FROM transport WHERE frequency \u003c (SELECT AVG(frequency) FROM transport);", + "sql_explanation": "This SQL query lists transportation methods with frequencies less than the average frequency by using the AVG() aggregate function and comparing it to the frequency of each transportation method." +}, { + "id": "45", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the ratio of community health workers to mental health professionals in urban areas.", + "sql_context": "CREATE TABLE HealthWorkers (Location VARCHAR(20), WorkerType VARCHAR(20), Count INT); INSERT INTO HealthWorkers (Location, WorkerType, Count) VALUES (\u0027Urban\u0027, \u0027MentalHealthProfessional\u0027, 1200), (\u0027Urban\u0027, \u0027CommunityHealthWorker\u0027, 800), (\u0027Rural\u0027, \u0027MentalHealthProfessional\u0027, 600), (\u0027Rural\u0027, \u0027CommunityHealthWorker\u0027, 400);", + "sql": "SELECT AVG(CommunityHealthWorkerCount / MentalHealthProfessionalCount) AS Ratio FROM (SELECT SUM(CASE WHEN Location \u003d \u0027Urban\u0027 AND WorkerType \u003d \u0027MentalHealthProfessional\u0027 THEN Count ELSE 0 END) AS MentalHealthProfessionalCount, SUM(CASE WHEN Location \u003d \u0027Urban\u0027 AND WorkerType \u003d \u0027CommunityHealthWorker\u0027 THEN Count ELSE 0 END) AS CommunityHealthWorkerCount FROM HealthWorkers);", + "sql_explanation": "The SQL query first calculates the sum of the Count column for mental health professionals and community health workers in urban areas in the HealthWorkers table. Then, it calculates the ratio of community health workers to mental health professionals in urban areas." +}, { + "id": "210", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 2 regions with the highest percentage of cultural competency trainings conducted in languages other than English.", + "sql_context": "CREATE TABLE CulturalCompetencyTrainings (Region VARCHAR(20), Language VARCHAR(20), Count INT); INSERT INTO CulturalCompetencyTrainings (Region, Language, Count) VALUES (\u0027Northeast\u0027, \u0027Spanish\u0027, 50), (\u0027Northeast\u0027, \u0027French\u0027, 25), (\u0027Northeast\u0027, \u0027Mandarin\u0027, 30), (\u0027Southeast\u0027, \u0027Spanish\u0027, 75), (\u0027Southeast\u0027, \u0027French\u0027, 50), (\u0027Southeast\u0027, \u0027Hmong\u0027, 25), (\u0027Midwest\u0027, \u0027Spanish\u0027, 100), (\u0027Midwest\u0027, \u0027French\u0027, 75), (\u0027Midwest\u0027, \u0027Somali\u0027, 50), (\u0027West\u0027, \u0027Spanish\u0027, 125), (\u0027West\u0027, \u0027French\u0027, 100), (\u0027West\u0027, \u0027Tagalog\u0027, 75);", + "sql": "SELECT Region, MAX(Percentage) FROM (SELECT Region, (SUM(CASE WHEN Language \u003c\u003e \u0027English\u0027 THEN Count ELSE 0 END) / SUM(Count)) * 100 AS Percentage FROM CulturalCompetencyTrainings GROUP BY Region) AS LanguagePercentages GROUP BY Region ORDER BY Percentage DESC LIMIT 2;", + "sql_explanation": "The SQL query calculates the percentage of cultural competency trainings conducted in languages other than English for each region. It then finds the maximum percentage for each region and orders the results in descending order, limiting the output to the top 2 regions." +}, { + "id": "283", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of community health workers serving a single zip code who identify as disabled?", + "sql_context": "CREATE TABLE community_health_workers (worker_id INT, zip_code VARCHAR(10), disability_identification VARCHAR(10)); INSERT INTO community_health_workers (worker_id, zip_code, disability_identification) VALUES (1, \u002710001\u0027, \u0027Yes\u0027), (2, \u002710001\u0027, \u0027No\u0027), (3, \u002711222\u0027, \u0027Yes\u0027);", + "sql": "SELECT zip_code, MAX(cnt) as max_workers FROM (SELECT zip_code, disability_identification, COUNT(*) as cnt FROM community_health_workers GROUP BY zip_code, disability_identification) as subquery WHERE disability_identification \u003d \u0027Yes\u0027 GROUP BY zip_code;", + "sql_explanation": "This SQL query calculates the maximum number of community health workers serving a single zip code who identify as disabled. It uses a subquery to first count the number of community health workers serving each zip code and disability identification, and then uses the MAX function to find the zip code with the maximum number of workers who identify as disabled." +}, { + "id": "1586", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of mental health providers in a single county?", + "sql_context": "CREATE TABLE Counties (county_id INT, county_name TEXT); CREATE TABLE Providers (provider_id INT, provider_type TEXT, county_id INT);", + "sql": "SELECT MAX(count_providers) FROM (SELECT COUNT(*) as count_providers FROM Providers p JOIN Counties c ON p.county_id \u003d c.county_id GROUP BY p.county_id);", + "sql_explanation": "Find the maximum number of mental health providers in a single county by grouping providers by county_id and selecting the maximum count." +}, { + "id": "1703", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of community health workers who speak a language other than English?", + "sql_context": "CREATE TABLE Languages (language_id INT, language_name TEXT);CREATE TABLE CommunityHealthWorkers (worker_id INT, worker_language INT, worker_age INT);", + "sql": "SELECT AVG(worker_age) FROM CommunityHealthWorkers cwh WHERE worker_language IN (SELECT language_id FROM Languages WHERE language_name !\u003d \u0027English\u0027);", + "sql_explanation": "The SQL query calculates the average age of community health workers who speak a language other than English. It does this by selecting the worker_age column from the CommunityHealthWorkers table where the worker_language column is in the set of language_id values for languages that are not English. Then, it calculates the average worker_age for these rows." +}, { + "id": "5", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of investigative journalism articles published in the last 3 months, and what percentage of the total publications do they represent?", + "sql_context": "CREATE TABLE publications (id INT, title VARCHAR(100), genre VARCHAR(20), publication_date DATE);INSERT INTO publications (id, title, genre, publication_date) VALUES (1, \u0027Uncovering Corruption\u0027, \u0027investigative journalism\u0027, \u00272022-04-01\u0027);INSERT INTO publications (id, title, genre, publication_date) VALUES (2, \u0027The Hidden Truth\u0027, \u0027opinion\u0027, \u00272022-03-15\u0027);", + "sql": "SELECT COUNT(*) AS total_investigative_articles FROM publications WHERE genre \u003d \u0027investigative journalism\u0027 AND publication_date \u003e\u003d DATEADD(month, -3, GETDATE());SELECT COUNT(*) AS total_publications FROM publications;SELECT (total_investigative_articles * 100.0 / total_publications) AS percentage FROM (SELECT COUNT(*) AS total_investigative_articles FROM publications WHERE genre \u003d \u0027investigative journalism\u0027 AND publication_date \u003e\u003d DATEADD(month, -3, GETDATE())) AS investigative_articles, (SELECT COUNT(*) AS total_publications FROM publications) AS total_publications;", + "sql_explanation": "The SQL query performs three separate queries. The first query counts the number of investigative journalism articles published in the last 3 months. The second query counts the total number of publications. The third query calculates the percentage of investigative journalism articles out of the total publications by dividing the number of investigative journalism articles by the total number of publications and multiplying the result by 100.0. The DATEADD and GETDATE functions are used to determine the date 3 months ago, and the COUNT function is used to count the number of rows in the publications table that meet the specified criteria." +}, { + "id": "500", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the name of the author who has published the most articles?", + "sql_context": "CREATE TABLE authors_articles (author_id INT, article_id INT); INSERT INTO authors_articles (author_id, article_id) VALUES (1, 1), (1, 2), (2, 3);CREATE TABLE authors (id INT, name VARCHAR(50)); INSERT INTO authors (id, name) VALUES (1, \u0027Alice\u0027), (2, \u0027Bob\u0027);", + "sql": "SELECT authors.name FROM authors JOIN (SELECT author_id, COUNT(*) as article_count FROM authors_articles GROUP BY author_id ORDER BY article_count DESC LIMIT 1) as article_counts ON authors.id \u003d article_counts.author_id;", + "sql_explanation": "This query finds the name of the author who has published the most articles. It does so by using a subquery to first count the number of articles for each author, grouped by author_id. It then orders the results by the count in descending order and returns the top result. Finally, it joins the result of the subquery with the \u0027authors\u0027 table to get the name of the author." +}, { + "id": "710", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the names and cities of all journalists who have worked on investigative stories in the \"journalists\" and \"stories\" tables?", + "sql_context": "CREATE TABLE journalists (id INT, name VARCHAR(50), city VARCHAR(50)); INSERT INTO journalists (id, name, city) VALUES (1, \u0027Dana\u0027, \u0027New York\u0027), (2, \u0027Eliot\u0027, \u0027Los Angeles\u0027), (3, \u0027Fiona\u0027, \u0027Chicago\u0027); CREATE TABLE stories (id INT, title VARCHAR(50), journalist_id INT); INSERT INTO stories (id, title, journalist_id) VALUES (1, \u0027The Big Heist\u0027, 1), (2, \u0027Mystery of the Lost Art\u0027, 2), (3, \u0027The Unsolved Case\u0027, 3), (4, \u0027The Secret Files\u0027, 1);", + "sql": "SELECT journalists.name, journalists.city FROM journalists INNER JOIN stories ON journalists.id \u003d stories.journalist_id WHERE stories.id IN (SELECT id FROM stories WHERE title LIKE \u0027%investigative%\u0027);", + "sql_explanation": "This query lists the names and cities of all journalists who have worked on investigative stories. It first performs an inner join between the \"journalists\" and \"stories\" tables on the \"id\" and \"journalist_id\" columns, respectively. Then it filters the results to only include rows with stories.id that appear in the subquery, which selects the ids of all stories with the word \u0027investigative\u0027 in the title. Finally, it returns the name and city columns of the resulting rows." +}, { + "id": "2745", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all records from the \u0027investigative_projects\u0027 table where the project is assigned to \u0027John Doe\u0027", + "sql_context": "CREATE TABLE investigative_projects (project_id INT, project_name VARCHAR(255), status VARCHAR(20), assigned_to INT);", + "sql": "DELETE FROM investigative_projects WHERE assigned_to \u003d (SELECT employee_id FROM employees WHERE name \u003d \u0027John Doe\u0027);", + "sql_explanation": "The SQL query deletes all records from the \u0027investigative_projects\u0027 table where the \u0027assigned_to\u0027 is the employee with name \u0027John Doe\u0027." +}, { + "id": "122", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record for a donation of $500 made by a corporate donor named \"ABC Corp\" on March 15, 2022.", + "sql_context": "CREATE TABLE donations (id INT, donor_id INT, donation_date DATE, amount_donated DECIMAL(10,2)); CREATE TABLE donors (id INT, name TEXT, donor_type TEXT);", + "sql": "INSERT INTO donations (id, donor_id, donation_date, amount_donated) VALUES (1, (SELECT id FROM donors WHERE name \u003d \u0027ABC Corp\u0027 AND donor_type \u003d \u0027Corporate\u0027 LIMIT 1), \u00272022-03-15\u0027, 500); INSERT INTO donors (id, name, donor_type) VALUES (1, \u0027ABC Corp\u0027, \u0027Corporate\u0027) ON DUPLICATE KEY UPDATE id \u003d id;", + "sql_explanation": "This query inserts a new record for a donation of $500 made by a corporate donor named \"ABC Corp\" on March 15, 2022. It first checks if a donor with the name \"ABC Corp\" and donor type \"Corporate\" already exists in the donors table, and if so, it uses the ON DUPLICATE KEY UPDATE clause to update the existing record. Then, it inserts a new record into the donations table with the donation information." +}, { + "id": "148", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which programs had the most impact in the last three months?", + "sql_context": "CREATE TABLE Programs (Program TEXT, Impact DECIMAL); INSERT INTO Programs (Program, Impact) VALUES (\u0027Tree Planting\u0027, 1000); INSERT INTO Programs (Program, Impact) VALUES (\u0027Food Bank\u0027, 2000);", + "sql": "SELECT Program, Impact FROM Programs WHERE Program IN (SELECT Program FROM Volunteers WHERE VolunteerDate \u003e\u003d DATEADD(month, -3, GETDATE()) GROUP BY Program HAVING COUNT(*) \u003e\u003d (SELECT AVG(COUNT(*)) FROM Volunteers WHERE VolunteerDate \u003e\u003d DATEADD(month, -3, GETDATE()) GROUP BY Program));", + "sql_explanation": "The SQL query identifies which programs had the most impact in the last three months by selecting the Program and Impact columns from the Programs table where the Program appears in a subquery that selects the Program from the Volunteers table where the VolunteerDate is within the last three months, grouped by Program and having a count greater than or equal to the average count of Programs for the same time period." +}, { + "id": "947", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total donation amount by new donors in Q1 2023?", + "sql_context": "CREATE TABLE donors (donor_id INT, donation_date DATE, donation_amount DECIMAL(10, 2)); INSERT INTO donors VALUES (11, \u00272023-01-02\u0027, 200.00), (12, \u00272023-03-10\u0027, 300.00), (13, \u00272023-01-15\u0027, 100.00);", + "sql": "SELECT SUM(donation_amount) FROM donors WHERE donor_id IN (SELECT donor_id FROM donors WHERE donation_date BETWEEN \u00272023-01-01\u0027 AND \u00272023-03-31\u0027 GROUP BY donor_id HAVING COUNT(*) \u003d 1);", + "sql_explanation": "This query calculates the total donation amount from new donors in Q1 2023 by finding donor_ids with donation_dates in Q1 2023 and then summing their donation_amounts." +}, { + "id": "1570", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount donated by individual donors based in Canada in the year 2020?", + "sql_context": "CREATE TABLE donors (id INT, name VARCHAR(100), country VARCHAR(50), donation_amount DECIMAL(10,2), donation_date DATE); INSERT INTO donors (id, name, country, donation_amount, donation_date) VALUES (1, \u0027John Doe\u0027, \u0027Canada\u0027, 50.00, \u00272020-01-01\u0027); INSERT INTO donors (id, name, country, donation_amount, donation_date) VALUES (2, \u0027Jane Smith\u0027, \u0027Canada\u0027, 100.00, \u00272020-05-15\u0027);", + "sql": "SELECT SUM(donation_amount) FROM donors WHERE country \u003d \u0027Canada\u0027 AND YEAR(donation_date) \u003d 2020 AND donors.id NOT IN (SELECT donor_id FROM corporations);", + "sql_explanation": "This query calculates the total donation amount from individual donors in Canada for the year 2020 by summing the donation_amount field. It filters out donations from corporations by excluding rows with a matching donor_id in the corporations table, which is not shown here." +}, { + "id": "1579", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records of volunteers who have not volunteered in the last 6 months?", + "sql_context": "CREATE TABLE volunteer_hours (volunteer_id INT, hours INT, volunteer_date DATE); INSERT INTO volunteer_hours (volunteer_id, hours, volunteer_date) VALUES (3, 2, \u00272022-03-10\u0027), (4, 6, \u00272022-02-15\u0027);", + "sql": "DELETE FROM volunteers WHERE volunteer_id NOT IN (SELECT volunteer_id FROM volunteer_hours WHERE volunteer_date \u003e\u003d (CURRENT_DATE - INTERVAL \u00276 months\u0027));", + "sql_explanation": "This query deletes records of volunteers who have not volunteered in the last 6 months by first selecting the IDs of volunteers who have volunteered in the last 6 months and then deleting records of volunteers whose IDs are not in that list." +}, { + "id": "1634", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total amount donated by individuals in the city of Seattle in 2021?", + "sql_context": "CREATE TABLE donors (donor_id INT, donation_amount DECIMAL(10,2), donation_date DATE, city VARCHAR(50));", + "sql": "SELECT SUM(donation_amount) FROM donors WHERE city \u003d \u0027Seattle\u0027 AND YEAR(donation_date) \u003d 2021 AND donor_id NOT IN (SELECT donor_id FROM organizations);", + "sql_explanation": "The query calculates the sum of donation_amount from the donors table where city is \u0027Seattle\u0027 and the year of donation_date is 2021. It also ensures that the donor_id is not present in the organizations table." +}, { + "id": "2826", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of donations from male donors?", + "sql_context": "CREATE TABLE donors (id INT, name TEXT, gender TEXT, amount_donated DECIMAL(10,2));", + "sql": "SELECT (SUM(amount_donated) / (SELECT SUM(amount_donated) FROM donors) * 100) FROM donors WHERE gender \u003d \u0027male\u0027;", + "sql_explanation": "This query calculates the percentage of donations from male donors by dividing the sum of the \u0027amount_donated\u0027 column for all records where the \u0027gender\u0027 column is equal to \u0027male\u0027 by the total sum of the \u0027amount_donated\u0027 column for all records, and then multiplying by 100." +}, { + "id": "3615", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which donors have donated more than the average donation amount?", + "sql_context": "CREATE TABLE Donors (DonorName VARCHAR(50), DonationAmount DECIMAL(10,2)); INSERT INTO Donors (DonorName, DonationAmount) VALUES (\u0027John Smith\u0027, 5000.00), (\u0027Jane Doe\u0027, 3000.00), (\u0027Mike Johnson\u0027, 7000.00), (\u0027Sara Connor\u0027, 6000.00);", + "sql": "SELECT DonorName FROM Donors WHERE DonationAmount \u003e (SELECT AVG(DonationAmount) FROM Donors);", + "sql_explanation": "This SQL query identifies donors who have donated more than the average donation amount by using a subquery to find the average donation amount and comparing each donor\u0027s donation amount to it." +}, { + "id": "4051", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all programs in the \u0027programs\u0027 table that have a budget greater than the average program budget.", + "sql_context": "CREATE TABLE programs (program_id INT, program_name TEXT, budget DECIMAL(10, 2)); INSERT INTO programs VALUES (1, \u0027Education\u0027, 10000.00), (2, \u0027Health\u0027, 15000.00), (3, \u0027Environment\u0027, 8000.00);", + "sql": "SELECT program_name FROM programs WHERE budget \u003e (SELECT AVG(budget) FROM programs);", + "sql_explanation": "The SQL query uses a subquery to calculate the average program budget and then uses the WHERE clause to filter the results to only include programs with a budget greater than the average." +}, { + "id": "50", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which programs had the highest volunteer participation rate in the last quarter?", + "sql_context": "CREATE TABLE Programs (ProgramID INT, ProgramName TEXT, Budget DECIMAL(10,2), NumVolunteers INT); CREATE TABLE VolunteerEvents (EventID INT, ProgramID INT, EventDate DATE, NumVolunteers INT);", + "sql": "SELECT p.ProgramName, COUNT(v.EventID) / (SELECT COUNT(*) FROM VolunteerEvents WHERE EventDate \u003e\u003d DATEADD(quarter, -1, GETDATE())) * 100.0 AS VolunteerParticipationRate FROM Programs p INNER JOIN VolunteerEvents v ON p.ProgramID \u003d v.ProgramID WHERE v.EventDate \u003e\u003d DATEADD(quarter, -1, GETDATE()) GROUP BY p.ProgramName ORDER BY VolunteerParticipationRate DESC;", + "sql_explanation": "This query calculates the volunteer participation rate for each program in the last quarter. It does this by joining the Programs table with the VolunteerEvents table, filtering for events in the last quarter, then calculating the count of events for each program and dividing it by the total number of events in the last quarter. The result is multiplied by 100.0 to convert the decimal to a percentage." +}, { + "id": "262", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount donated by each donor in H1 2021, grouped by country?", + "sql_context": "CREATE TABLE Donors (DonorID INT, DonationDate DATE, Country TEXT); INSERT INTO Donors (DonorID, DonationDate, Country) VALUES (1, \u00272021-01-01\u0027, \u0027USA\u0027), (2, \u00272021-02-15\u0027, \u0027Canada\u0027), (3, \u00272021-03-01\u0027, \u0027Mexico\u0027);", + "sql": "SELECT Country, SUM(TotalDonation) as \u0027Total Donated in H1 2021\u0027 FROM (SELECT DonorID, SUM(TotalDonation) as TotalDonation, Country FROM Donors WHERE DonationDate BETWEEN \u00272021-01-01\u0027 AND \u00272021-06-30\u0027 GROUP BY DonorID, Country) as Subquery GROUP BY Country;", + "sql_explanation": "This query calculates the total amount donated by each donor in H1 2021, grouped by country by summing the TotalDonation field for each unique country in a subquery and grouping by Country in the outer query, filtering by the date range of DonationDate." +}, { + "id": "265", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the programs that have the same number of participants as volunteers.", + "sql_context": "CREATE TABLE programs (id INT, name TEXT, participants INT); INSERT INTO programs (id, name, participants) VALUES (1, \u0027Education\u0027, 60), (2, \u0027Health\u0027, 40), (3, \u0027Environment\u0027, 70), (4, \u0027Arts\u0027, 30); CREATE TABLE volunteers (id INT, program_id INT); INSERT INTO volunteers (id, program_id) VALUES (1, 1), (2, 1), (3, 2), (4, 2), (5, 3), (6, 3), (7, 3), (8, 4);", + "sql": "SELECT programs.name FROM programs INNER JOIN (SELECT program_id, COUNT(*) AS num_volunteers FROM volunteers GROUP BY program_id) AS volunteer_counts ON programs.id \u003d volunteer_counts.program_id WHERE programs.participants \u003d volunteer_counts.num_volunteers;", + "sql_explanation": "This query first calculates the number of volunteers for each program by grouping the volunteers table by the program_id column and using the COUNT function. Then, it performs an inner join between the programs table and the subquery on the id and program_id columns, respectively. Finally, it filters the combined table to only include records where the number of volunteers is equal to the number of participants for each program." +}, { + "id": "409", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which volunteers have volunteered for both the Youth and Sports programs in 2022?", + "sql_context": "CREATE TABLE Volunteers (VolunteerID INT, VolunteerName VARCHAR(50)); INSERT INTO Volunteers (VolunteerID, VolunteerName) VALUES (1, \u0027Olivia Jones\u0027), (2, \u0027Mason Lee\u0027), (3, \u0027Ava Thompson\u0027); CREATE TABLE VolunteerPrograms (VolunteerID INT, Program VARCHAR(50), VolunteerDate DATE); INSERT INTO VolunteerPrograms (VolunteerID, Program, VolunteerDate) VALUES (1, \u0027Youth\u0027, \u00272022-04-01\u0027), (1, \u0027Sports\u0027, \u00272022-05-01\u0027), (2, \u0027Youth\u0027, \u00272022-04-15\u0027), (2, \u0027Sports\u0027, \u00272022-05-15\u0027), (3, \u0027Youth\u0027, \u00272022-04-20\u0027), (3, \u0027Arts and Culture\u0027, \u00272022-05-20\u0027);", + "sql": "SELECT DISTINCT VolunteerName FROM Volunteers v INNER JOIN VolunteerPrograms y ON v.VolunteerID \u003d y.VolunteerID WHERE y.Program \u003d \u0027Youth\u0027 AND v.VolunteerID IN (SELECT VolunteerID FROM VolunteerPrograms s WHERE s.Program \u003d \u0027Sports\u0027);", + "sql_explanation": "The SQL query uses the INNER JOIN clause to combine the data from the Volunteers and VolunteerPrograms tables based on the volunteer ID. The WHERE clause filters records for the Youth and Sports programs in 2022. The DISTINCT keyword is used to return unique volunteer names." +}, { + "id": "417", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which programs had donations exceeding their budgets?", + "sql_context": "CREATE TABLE Donations (DonationID INT, ProgramID INT, DonationAmount DECIMAL(5,2)); INSERT INTO Donations (DonationID, ProgramID, DonationAmount) VALUES (1, 1, 5500.00), (2, 2, 6000.00), (3, 3, 7000.00);", + "sql": "SELECT p.ProgramName, (p.Budget - d.TotalDonations) as Difference FROM Programs p JOIN (SELECT ProgramID, SUM(DonationAmount) as TotalDonations FROM Donations GROUP BY ProgramID) d ON p.ProgramID \u003d d.ProgramID WHERE Difference \u003c 0;", + "sql_explanation": "This query identifies programs with donations exceeding their budgets by joining Programs and a subquery that sums the donation amounts for each program, then calculates the difference between the budget and total donations for each program, and returns only the records where the difference is negative." +}, { + "id": "720", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of unique donors for each program category, excluding those with a total donation amount below $10,000?", + "sql_context": "CREATE TABLE donor_category (donor_id INT, program_category VARCHAR(20), donation_amount INT);INSERT INTO donor_category VALUES (1, \u0027Arts\u0027, 5000), (2, \u0027Arts\u0027, 3000), (3, \u0027Education\u0027, 7000), (4, \u0027Health\u0027, 15000), (5, \u0027Arts\u0027, 8000), (6, \u0027Education\u0027, 6000);", + "sql": "SELECT program_category, COUNT(DISTINCT donor_id) FROM donor_category WHERE program_category IN (SELECT program_category FROM donor_category WHERE donation_amount \u003e\u003d 10000) GROUP BY program_category;", + "sql_explanation": "The SQL query uses a subquery to get the program categories with a total donation amount of $10,000 or more and then selects the unique donor count for those categories. The result is the total number of unique donors for each program category, excluding those with a total donation amount below $10,000." +}, { + "id": "1359", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of donations received by the top 3 cities?", + "sql_context": "CREATE TABLE DonorCities (City VARCHAR(50), Population INT); INSERT INTO DonorCities (City, Population) VALUES (\u0027San Francisco\u0027, 884363), (\u0027Seattle\u0027, 753359), (\u0027Boston\u0027, 694543); CREATE TABLE Donations (DonationID INT, City VARCHAR(50), DonationAmount DECIMAL(10,2)); INSERT INTO Donations (DonationID, City, DonationAmount) VALUES (1, \u0027San Francisco\u0027, 500000.00), (2, \u0027Seattle\u0027, 400000.00), (3, \u0027Boston\u0027, 300000.00), (4, \u0027San Francisco\u0027, 600000.00), (5, \u0027Seattle\u0027, 700000.00);", + "sql": "SELECT City, SUM(DonationAmount) * 100.0 / (SELECT SUM(DonationAmount) FROM Donations) AS percentage FROM Donations GROUP BY City ORDER BY percentage DESC LIMIT 3;", + "sql_explanation": "This query first calculates the total donation amount for all cities and then calculates the percentage of donations received by each city using a subquery. Finally, it selects the top 3 cities based on the percentage of donations received." +}, { + "id": "2256", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of volunteers who participated in the \u0027Education\u0027 program?", + "sql_context": "CREATE TABLE Programs (ProgramID int, Name varchar(50), Budget money); CREATE TABLE Volunteers (VolunteerID int, Name varchar(50), Age int, ProgramID int); INSERT INTO Programs (ProgramID, Name, Budget) VALUES (1, \u0027Education\u0027, 10000), (2, \u0027Healthcare\u0027, 15000); INSERT INTO Volunteers (VolunteerID, Name, Age, ProgramID) VALUES (1, \u0027Alice\u0027, 25, 1), (2, \u0027Bob\u0027, 22, 1), (3, \u0027Charlie\u0027, 30, 2), (4, \u0027David\u0027, 28, 2);", + "sql": "SELECT AVG(V.Age) as AvgAge FROM Volunteers V WHERE V.ProgramID \u003d (SELECT P.ProgramID FROM Programs P WHERE P.Name \u003d \u0027Education\u0027);", + "sql_explanation": "The SQL query calculates the average age of volunteers who participated in the \u0027Education\u0027 program by joining the Programs and Volunteers tables based on the ProgramID and then filtering the records based on the program name." +}, { + "id": "51", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of social good technology projects and digital divide reduction programs in the top 3 most active countries in Africa in H1 and H2 of 2022?", + "sql_context": "CREATE TABLE Social_Good_Tech_Africa (country VARCHAR(50), project_type VARCHAR(50), projects INT); INSERT INTO Social_Good_Tech_Africa (country, project_type, projects) VALUES (\u0027Nigeria\u0027, \u0027social_good_tech\u0027, 100), (\u0027Nigeria\u0027, \u0027digital_divide\u0027, 120), (\u0027Kenya\u0027, \u0027social_good_tech\u0027, 80), (\u0027Kenya\u0027, \u0027digital_divide\u0027, 110), (\u0027Egypt\u0027, \u0027social_good_tech\u0027, 150), (\u0027Egypt\u0027, \u0027digital_divide\u0027, 130);", + "sql": "SELECT Social_Good_Tech_Africa.country, SUM(Social_Good_Tech_Africa.projects) FROM Social_Good_Tech_Africa WHERE Social_Good_Tech_Africa.country IN (SELECT Social_Good_Tech_Africa.country FROM Social_Good_Tech_Africa GROUP BY Social_Good_Tech_Africa.country ORDER BY SUM(Social_Good_Tech_Africa.projects) DESC LIMIT 3) GROUP BY Social_Good_Tech_Africa.country;", + "sql_explanation": "This SQL query calculates the total number of social good technology projects and digital divide reduction programs in the top 3 most active countries in Africa in H1 and H2 of 2022, by filtering the records based on the country, grouping the records based on the country, and computing the sum of projects for each group, where the country is one of the top 3 most active countries in Africa." +}, { + "id": "1569", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which organizations are associated with ethical AI frameworks and have given more than one talk?", + "sql_context": "CREATE TABLE ethical_ai_frameworks (id INT PRIMARY KEY, name VARCHAR(255), description TEXT, organization VARCHAR(255)); INSERT INTO ethical_ai_frameworks (id, name, description, organization) VALUES (1, \u0027Ethical AI 1.0\u0027, \u0027A framework for building ethical AI\u0027, \u0027AI for Good Foundation\u0027); INSERT INTO ethical_ai_frameworks (id, name, description, organization) VALUES (2, \u0027AI for Climate 1.0\u0027, \u0027A framework for using AI to combat climate change\u0027, \u0027AI for Good Foundation\u0027); INSERT INTO ethical_ai_frameworks (id, name, description, organization) VALUES (3, \u0027Accessibility 1.0\u0027, \u0027A framework for building accessible tech\u0027, \u0027Tech for Social Impact Inc.\u0027); CREATE TABLE talks (id INT PRIMARY KEY, title VARCHAR(255), speaker_id INT, conference_id INT, organization VARCHAR(255)); INSERT INTO talks (id, title, speaker_id, conference_id, organization) VALUES (1, \u0027Ethical AI in Healthcare\u0027, 1, 1, \u0027AI for Good Foundation\u0027); INSERT INTO talks (id, title, speaker_id, conference_id, organization) VALUES (2, \u0027AI for Climate Change\u0027, 1, 2, \u0027AI for Good Foundation\u0027); INSERT INTO talks (id, title, speaker_id, conference_id, organization) VALUES (3, \u0027Accessibility in Tech\u0027, 2, 3, \u0027Tech for Social Impact Inc.\u0027); INSERT INTO talks (id, title, speaker_id, conference_id, organization) VALUES (4, \u0027AI for Social Good\u0027, 1, 4, \u0027AI for Good Foundation\u0027); CREATE TABLE conferences (id INT PRIMARY KEY, name VARCHAR(255), city VARCHAR(255), start_date DATE, end_date DATE); INSERT INTO conferences (id, name, city, start_date, end_date) VALUES (1, \u0027AI for Social Good Summit\u0027, \u0027San Francisco\u0027, \u00272022-06-01\u0027, \u00272022-06-03\u0027); INSERT INTO conferences (id, name, city, start_date, end_date) VALUES (2, \u0027Climate Change Tech Conference\u0027, \u0027Vancouver\u0027, \u00272022-07-01\u0027, \u00272022-07-02\u0027); INSERT INTO conferences (id, name, city, start_date, end_date) VALUES (3, \u0027Accessibility in Tech Conference\u0027, \u0027Toronto\u0027, \u00272022-08-01\u0027, \u00272022-08-03\u0027); INSERT INTO conferences (id, name, city, start_date, end_date) VALUES (4, \u0027Ethical AI Conference\u0027, \u0027New York\u0027, \u00272022-09-01\u0027, \u00272022-09-03\u0027);", + "sql": "SELECT DISTINCT organization FROM talks WHERE organization IN (SELECT organization FROM ethical_ai_frameworks) GROUP BY organization HAVING COUNT(*) \u003e 1;", + "sql_explanation": "This query finds the organizations that are associated with ethical AI frameworks and have given more than one talk by using the IN operator to filter the results based on the organization column in the talks table and the GROUP BY and HAVING clauses to filter the results based on the count of talks per organization." +}, { + "id": "1982", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "For the AI_ethics_guidelines table, return the organization_name and guideline_text for the row with the latest review_date, in descending order.", + "sql_context": "CREATE TABLE AI_ethics_guidelines (organization_name VARCHAR(255), guideline_text TEXT, review_date DATE);", + "sql": "SELECT organization_name, guideline_text FROM AI_ethics_guidelines WHERE review_date \u003d (SELECT MAX(review_date) FROM AI_ethics_guidelines);", + "sql_explanation": "The SQL query returns the organization_name and guideline_text for the row with the latest review_date, in descending order. The subquery calculates the maximum review_date value, and the outer query returns the organization_name and guideline_text for the row with the latest review_date, based on the result of the subquery." +}, { + "id": "4674", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the tool with the lowest score?", + "sql_context": "CREATE TABLE tool (category VARCHAR(20), tool VARCHAR(20), score INT); INSERT INTO tool (category, tool, score) VALUES (\u0027AI\u0027, \u0027Chatbot\u0027, 85), (\u0027AI\u0027, \u0027Image Recognition\u0027, 90), (\u0027Data\u0027, \u0027Data Visualization\u0027, 80);", + "sql": "SELECT tool, score FROM tool WHERE score \u003d (SELECT MIN(score) FROM tool);", + "sql_explanation": "The SQL query finds the tool with the lowest score by using a subquery to find the minimum score, and then using a WHERE clause to filter the results to only show tools with the minimum score." +}, { + "id": "770", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average production (in metric tons) of staple crops (rice, wheat, corn) in Southeast Asia and how many farms produce them?", + "sql_context": "CREATE TABLE StapleCropProduction (id INT, crop VARCHAR(50), region VARCHAR(50), production DECIMAL(10,2)); INSERT INTO StapleCropProduction (id, crop, region, production) VALUES (1, \u0027Rice\u0027, \u0027Southeast Asia\u0027, 5.0); INSERT INTO StapleCropProduction (id, crop, region, production) VALUES (2, \u0027Wheat\u0027, \u0027Southeast Asia\u0027, 3.5);", + "sql": "SELECT AVG(production), COUNT(DISTINCT farm_id) FROM (SELECT farm_id, crop, production FROM StapleCropProduction WHERE region \u003d \u0027Southeast Asia\u0027 AND crop IN (\u0027Rice\u0027, \u0027Wheat\u0027, \u0027Corn\u0027)) AS subquery;", + "sql_explanation": "This query calculates the average production (in metric tons) of staple crops (rice, wheat, corn) in Southeast Asia and how many farms produce them by using the AVG and COUNT keywords on the production and farm_id columns, and filtering the StapleCropProduction table with the WHERE keyword to only include rows with a region of Southeast Asia and crops in (Rice, Wheat, Corn) then using a subquery to select the required columns." +}, { + "id": "437", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of seafood exported to Europe from Africa in 2022?", + "sql_context": "CREATE TABLE seafood_export (product VARCHAR(255), quantity INT, year INT, country VARCHAR(255), PRIMARY KEY (product, year, country)); INSERT INTO seafood_export (product, quantity, year, country) VALUES (\u0027Shrimp\u0027, 10000, 2022, \u0027Nigeria\u0027), (\u0027Tuna\u0027, 15000, 2022, \u0027Ghana\u0027), (\u0027Salmon\u0027, 5000, 2022, \u0027Morocco\u0027);", + "sql": "SELECT (SUM(quantity) * 100.0 / (SELECT SUM(quantity) FROM seafood_export WHERE year \u003d 2022)) FROM seafood_export WHERE year \u003d 2022 AND country IN (SELECT country FROM countries WHERE continent \u003d \u0027Africa\u0027) AND region \u003d \u0027Europe\u0027;", + "sql_explanation": "This query calculates the percentage of seafood exported to Europe from Africa in 2022 by using the SUM function to add up the quantities of seafood exported to Europe from Africa in 2022 and dividing the result by the total quantity of seafood exported in 2022." +}, { + "id": "775", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average biomass of fish in saltwater fish farms in the Northern Hemisphere?", + "sql_context": "CREATE TABLE fish_farms (id INT, name TEXT, type TEXT, location TEXT, biomass FLOAT); INSERT INTO fish_farms (id, name, type, location, biomass) VALUES (1, \u0027Farm M\u0027, \u0027Fish\u0027, \u0027Norway\u0027, 25000.0), (2, \u0027Farm N\u0027, \u0027Fish\u0027, \u0027Canada\u0027, 18000.0);", + "sql": "SELECT AVG(biomass) FROM fish_farms WHERE type \u003d \u0027Fish\u0027 AND location IN (SELECT location FROM fish_farms WHERE biomass IS NOT NULL GROUP BY location HAVING EXTRACT(HOUR FROM AVG(location)) \u003c 12);", + "sql_explanation": "The query calculates the average biomass of fish in saltwater fish farms in the Northern Hemisphere by selecting the average value of the biomass column in the fish_farms table where the type is \u0027Fish\u0027 and the location is in the Northern Hemisphere." +}, { + "id": "1099", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all fish species raised in sustainable farms in Norway.", + "sql_context": "CREATE TABLE fish_species (id INT, species TEXT, sustainable BOOLEAN); CREATE TABLE farm_species (farm_id INT, species_id INT); INSERT INTO fish_species (id, species, sustainable) VALUES (1, \u0027Salmon\u0027, TRUE); INSERT INTO fish_species (id, species, sustainable) VALUES (2, \u0027Cod\u0027, FALSE); INSERT INTO farm_species (farm_id, species_id) VALUES (1, 1); INSERT INTO farm_species (farm_id, species_id) VALUES (2, 1); INSERT INTO farm_species (farm_id, species_id) VALUES (3, 2);", + "sql": "SELECT species FROM fish_species fs JOIN farm_species fss ON fs.id \u003d fss.species_id WHERE fss.farm_id IN (SELECT id FROM farms WHERE country \u003d \u0027Norway\u0027 AND sustainable \u003d TRUE);", + "sql_explanation": "This query retrieves all fish species raised in sustainable fish farms located in Norway by joining the fish_species and farm_species tables and filtering for farms with the country Norway and sustainable set to true." +}, { + "id": "1559", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average feeding rate for Trout in farms with water temperature between 10 and 25 degrees Celsius?", + "sql_context": "CREATE TABLE Farm (FarmID int, FarmName varchar(50), WaterTemperature numeric); INSERT INTO Farm (FarmID, FarmName, WaterTemperature) VALUES (1, \u0027Farm A\u0027, 22); INSERT INTO Farm (FarmID, FarmName, WaterTemperature) VALUES (2, \u0027Farm B\u0027, 18); INSERT INTO Farm (FarmID, FarmName, WaterTemperature) VALUES (3, \u0027Farm C\u0027, 15); INSERT INTO Farm (FarmID, FarmName, WaterTemperature) VALUES (4, \u0027Farm D\u0027, 23); CREATE TABLE FishStock (FishStockID int, FishSpecies varchar(50), FarmID int, FeedingRate numeric); INSERT INTO FishStock (FishStockID, FishSpecies, FarmID, FeedingRate) VALUES (1, \u0027Cod\u0027, 1, 3); INSERT INTO FishStock (FishStockID, FishSpecies, FarmID, FeedingRate) VALUES (2, \u0027Cod\u0027, 2, 2); INSERT INTO FishStock (FishStockID, FishSpecies, FarmID, FeedingRate) VALUES (3, \u0027Trout\u0027, 3, 1); INSERT INTO FishStock (FishStockID, FishSpecies, FarmID, FeedingRate) VALUES (4, \u0027Trout\u0027, 4, 4); INSERT INTO FishStock (FishStockID, FishSpecies, FarmID, FeedingRate) VALUES (5, \u0027Trout\u0027, 3, 2);", + "sql": "SELECT AVG(FeedingRate) FROM FishStock fs WHERE FishSpecies \u003d \u0027Trout\u0027 AND fs.FarmID IN (SELECT FarmID FROM Farm WHERE WaterTemperature BETWEEN 10 AND 25);", + "sql_explanation": "Calculates the average feeding rate for Trout in farms with water temperature between 10 and 25 degrees Celsius by selecting the average FeedingRate from the FishStock table where FishSpecies is \u0027Trout\u0027 and FarmID is in the subquery that retrieves FarmIDs from the Farm table where WaterTemperature is between 10 and 25 degrees Celsius." +}, { + "id": "1851", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the biomass of fish in the Barents Sea grouped by feeding type?", + "sql_context": "CREATE TABLE fish_data (fish_name VARCHAR(50), biomass FLOAT, feeding_type VARCHAR(50)); INSERT INTO fish_data (fish_name, biomass, feeding_type) VALUES (\u0027Cod\u0027, 3000, \u0027Carnivore\u0027), (\u0027Herring\u0027, 3500, \u0027Planktivore\u0027), (\u0027Capelin\u0027, 2500, \u0027Planktivore\u0027), (\u0027Saithe\u0027, 4000, \u0027Carnivore\u0027);", + "sql": "SELECT feeding_type, SUM(biomass) AS total_biomass FROM fish_data WHERE fish_name IN (SELECT fish_name FROM barents_sea) GROUP BY feeding_type;", + "sql_explanation": "This query calculates the biomass of fish in the Barents Sea grouped by feeding type. It does so by creating a subquery to select fish_name from the barents_sea table, using the IN keyword to filter fish_data by fish_name, and grouping the results by feeding_type while summing the biomass." +}, { + "id": "82", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the change in virtual tourism revenue between Q1 and Q2 2022, for each country in the Americas?", + "sql_context": "CREATE TABLE tourism_revenue (country VARCHAR(50), revenue FLOAT, quarter INT, year INT); INSERT INTO tourism_revenue (country, revenue, quarter, year) VALUES (\u0027USA\u0027, 1200000, 1, 2022), (\u0027Canada\u0027, 800000, 1, 2022), (\u0027Mexico\u0027, 500000, 1, 2022), (\u0027USA\u0027, 1500000, 2, 2022), (\u0027Canada\u0027, 900000, 2, 2022), (\u0027Mexico\u0027, 600000, 2, 2022);", + "sql": "SELECT country, (q2_revenue - q1_revenue) as revenue_change FROM (SELECT country, SUM(CASE WHEN quarter \u003d 1 THEN revenue ELSE 0 END) as q1_revenue, SUM(CASE WHEN quarter \u003d 2 THEN revenue ELSE 0 END) as q2_revenue FROM tourism_revenue WHERE country LIKE \u0027%Americas%\u0027 AND year \u003d 2022 GROUP BY country) as subquery;", + "sql_explanation": "This query calculates the change in virtual tourism revenue between Q1 and Q2 2022, for each country in the Americas. It uses a subquery to calculate the revenue for each quarter, and then subtracts the Q1 revenue from the Q2 revenue for each country to find the revenue change." +}, { + "id": "2719", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the production figures for well \u0027A\u0027 in the \u0027GulfOfMexico\u0027 region?", + "sql_context": "CREATE TABLE wells (id VARCHAR(10), name VARCHAR(10), region VARCHAR(20)); INSERT INTO wells (id, name, region) VALUES (\u0027W001\u0027, \u0027A\u0027, \u0027GulfOfMexico\u0027); CREATE TABLE production (well_id VARCHAR(10), date DATE, quantity INT); INSERT INTO production (well_id, date, quantity) VALUES (\u0027W001\u0027, \u00272022-01-01\u0027, 100), (\u0027W001\u0027, \u00272022-01-02\u0027, 120);", + "sql": "SELECT quantity FROM production WHERE well_id \u003d (SELECT id FROM wells WHERE name \u003d \u0027A\u0027 AND region \u003d \u0027GulfOfMexico\u0027);", + "sql_explanation": "This query first selects the id of the well named \u0027A\u0027 in the \u0027GulfOfMexico\u0027 region from the \u0027wells\u0027 table. Then, it uses this id to select the quantity from the \u0027production\u0027 table where the well_id matches." +}, { + "id": "345", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the difference in average property price between eco-friendly and standard communities?", + "sql_context": "CREATE TABLE property_community ( id INT PRIMARY KEY, price FLOAT, community_type VARCHAR(255) ); INSERT INTO property_community (id, price, community_type) VALUES (1, 600000, \u0027eco-friendly\u0027), (2, 400000, \u0027standard\u0027), (3, 550000, \u0027eco-friendly\u0027);", + "sql": "SELECT AVG(price_eco) - AVG(price_standard) FROM (SELECT price FROM property_community WHERE community_type \u003d \u0027eco-friendly\u0027) AS price_eco JOIN (SELECT price FROM property_community WHERE community_type \u003d \u0027standard\u0027) AS price_standard ON 1\u003d1;", + "sql_explanation": "This query calculates the difference in average property price between eco-friendly and standard communities by performing a join on subqueries that calculate the average price for eco-friendly and standard properties. The result set is then filtered for the difference between the average prices." +}, { + "id": "416", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which cities have inclusive housing policies and the highest percentage of green spaces?", + "sql_context": "CREATE TABLE City (id INT PRIMARY KEY, name VARCHAR(50), population INT, green_space_percentage DECIMAL(5,2), inclusive_housing BOOLEAN); CREATE VIEW Inclusive_Cities AS SELECT * FROM City WHERE inclusive_housing \u003d true;", + "sql": "SELECT City.name, City.green_space_percentage FROM City INNER JOIN Inclusive_Cities ON City.id \u003d Inclusive_Cities.id WHERE City.green_space_percentage \u003d (SELECT MAX(green_space_percentage) FROM City WHERE inclusive_housing \u003d true);", + "sql_explanation": "This query first creates a view called Inclusive_Cities that selects all cities with inclusive housing policies. It then joins the City and Inclusive_Cities tables on the id column and filters for cities with the highest percentage of green spaces that have inclusive housing policies. The query returns the name and green_space_percentage for each city." +}, { + "id": "1717", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the median property price for properties in the city of Chicago with a walkability score above 60?", + "sql_context": "CREATE TABLE properties (id INT, price FLOAT, city VARCHAR(20), walkability_score INT); INSERT INTO properties (id, price, city, walkability_score) VALUES (1, 950000, \u0027Chicago\u0027, 70), (2, 800000, \u0027Chicago\u0027, 65), (3, 700000, \u0027Chicago\u0027, 67), (4, 1000000, \u0027Chicago\u0027, 80), (5, 600000, \u0027Chicago\u0027, 62);", + "sql": "SELECT AVG(price) FROM (SELECT price FROM properties WHERE city \u003d \u0027Chicago\u0027 AND walkability_score \u003e 60 ORDER BY price LIMIT 2 OFFSET 1) AS subquery;", + "sql_explanation": "The SQL query first filters the data from the properties table based on the conditions in the WHERE clause, and then orders the data by price. The OFFSET clause skips the first row of the ordered data, and the LIMIT clause selects the second row. A subquery is then used to calculate the average price of the two selected rows, giving the median property price for properties in Chicago with a walkability score above 60." +}, { + "id": "2172", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of inclusive housing policies per state, broken down by policy category?", + "sql_context": "CREATE TABLE inclusive_housing (id INT, state VARCHAR, policy_category VARCHAR, policy_count INT); INSERT INTO inclusive_housing (id, state, policy_category, policy_count) VALUES (1, \u0027NY\u0027, \u0027Affordable Housing\u0027, 5), (2, \u0027CA\u0027, \u0027Accessibility\u0027, 4), (3, \u0027TX\u0027, \u0027Inclusionary Zoning\u0027, 3), (4, \u0027FL\u0027, \u0027Fair Housing\u0027, 2);", + "sql": "SELECT AVG(policy_count) FROM (SELECT policy_count, state FROM inclusive_housing GROUP BY state, policy_category) as state_policies;", + "sql_explanation": "This query first creates a subquery that groups the inclusive_housing table by the state and policy_category columns and selects the policy_count column. Then, it calculates the average policy_count from the subquery." +}, { + "id": "2766", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which cities have a housing affordability score above the median?", + "sql_context": "CREATE TABLE housing_affordability (id INT, city VARCHAR(20), score FLOAT); INSERT INTO housing_affordability (id, city, score) VALUES (1, \u0027SF\u0027, 45.2), (2, \u0027NYC\u0027, 38.6), (3, \u0027LA\u0027, 51.1), (4, \u0027SF\u0027, 47.9), (5, \u0027NYC\u0027, 39.8);", + "sql": "SELECT city FROM housing_affordability WHERE score \u003e (SELECT AVG(score) FROM housing_affordability) GROUP BY city;", + "sql_explanation": "This query identifies the cities with a housing affordability score above the median by selecting the city column from the housing_affordability table and filtering for rows where the score is greater than the median score (which is calculated using a subquery that selects the average score from the housing_affordability table)." +}, { + "id": "3386", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the median property price for co-ownership properties in Seattle?", + "sql_context": "CREATE TABLE seattle_prop (id INT, address TEXT, price FLOAT, co_ownership BOOLEAN); INSERT INTO seattle_prop (id, address, price, co_ownership) VALUES (1, \u0027345 Pine St\u0027, 400000, TRUE), (2, \u0027678 Juniper St\u0027, 500000, FALSE), (3, \u0027901 Oak St\u0027, 450000, TRUE), (4, \u0027213 Fir St\u0027, 550000, FALSE);", + "sql": "SELECT median(price) FROM (SELECT DISTINCT price FROM seattle_prop WHERE co_ownership \u003d TRUE) tmp;", + "sql_explanation": "Calculate the median property price for co-ownership properties in Seattle by finding the distinct prices for co-ownership properties and calculating the median of those prices." +}, { + "id": "2028", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the most expensive artwork created by an artist from \u0027Asia\u0027?", + "sql_context": "CREATE TABLE Artworks (artwork_id INTEGER, title TEXT, artist_name TEXT, artist_origin TEXT, price FLOAT); INSERT INTO Artworks (artwork_id, title, artist_name, artist_origin, price) VALUES (1, \u0027Artwork 1\u0027, \u0027Hiroshi\u0027, \u0027Japan\u0027, 10000.0), (2, \u0027Artwork 2\u0027, \u0027Mei\u0027, \u0027China\u0027, 12000.0), (3, \u0027Artwork 3\u0027, \u0027Aamir\u0027, \u0027Pakistan\u0027, 8000.0);", + "sql": "SELECT title, price FROM Artworks WHERE artist_origin \u003d \u0027Asia\u0027 AND price \u003d (SELECT MAX(price) FROM Artworks WHERE artist_origin \u003d \u0027Asia\u0027)", + "sql_explanation": "This query finds the most expensive artwork created by an artist from Asia. It uses a subquery to find the maximum price for artworks from Asia and filters the Artworks table for rows with \u0027Asia\u0027 as the artist_origin and price equal to the maximum price." +}, { + "id": "2446", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many cultural events were held in Paris, and what was the total attendance?", + "sql_context": "CREATE TABLE cities (id INT, name VARCHAR(255));CREATE TABLE cultural_events (id INT, title VARCHAR(255), city_id INT, start_date DATE, end_date DATE, attendees INT);", + "sql": "SELECT SUM(attendees) as total_attendance FROM cultural_events WHERE city_id \u003d (SELECT id FROM cities WHERE name \u003d \u0027Paris\u0027);", + "sql_explanation": "This query first selects the id of the city named \u0027Paris\u0027 from the cities table, and then filters the cultural_events table by that city ID. Lastly, it sums the attendees column to find the total attendance for cultural events held in Paris." +}, { + "id": "3103", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all states that have a labor cost greater than the average labor cost", + "sql_context": "CREATE TABLE construction_labor (state VARCHAR(2), labor_cost NUMERIC); INSERT INTO construction_labor (state, labor_cost) VALUES (\u0027WA\u0027, 45.5), (\u0027OR\u0027, 38.3), (\u0027CA\u0027, 52.1);", + "sql": "SELECT state FROM construction_labor WHERE labor_cost \u003e (SELECT AVG(labor_cost) FROM construction_labor);", + "sql_explanation": "This SQL query returns a list of states that have a labor cost greater than the average labor cost in the construction_labor table. The subquery calculates the average labor cost using the AVG function, and the WHERE clause filters the data based on the labor_cost column." +}, { + "id": "73", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new sustainable sourcing record for \u0027ABC Cafe\u0027 with a rating of 4 and a date of \u00272022-02-01\u0027 if it does not exist already.", + "sql_context": "CREATE TABLE sustainable_sourcing (restaurant_id INT, rating INT, sourcing_date DATE);", + "sql": "INSERT INTO sustainable_sourcing (restaurant_id, rating, sourcing_date) SELECT 1, 4, \u00272022-02-01\u0027 FROM (SELECT 1 FROM sustainable_sourcing WHERE restaurant_id \u003d 1 AND sourcing_date \u003d \u00272022-02-01\u0027) AS dummy WHERE NOT EXISTS (SELECT 1 FROM sustainable_sourcing WHERE restaurant_id \u003d 1 AND sourcing_date \u003d \u00272022-02-01\u0027);", + "sql_explanation": "We first check if there is a record in the sustainable_sourcing table for \u0027ABC Cafe\u0027 on February 1, 2022. If there isn\u0027t, we use an INSERT statement to insert the new record. The INSERT statement uses a SELECT statement with a WHERE NOT EXISTS clause to ensure the record is only inserted if it doesn\u0027t already exist." +}, { + "id": "1482", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete the record for the \u0027Fish Tacos\u0027 from the \u0027Dinner Menu\u0027 of Restaurant B.", + "sql_context": "CREATE TABLE menus (menu_id INT, restaurant_id INT, meal_time VARCHAR(255), item VARCHAR(255), price DECIMAL(5,2));", + "sql": "DELETE FROM menus WHERE restaurant_id \u003d (SELECT restaurant_id FROM restaurants WHERE name \u003d \u0027Restaurant B\u0027) AND meal_time \u003d \u0027Dinner\u0027 AND item \u003d \u0027Fish Tacos\u0027;", + "sql_explanation": "This query deletes the record for the \u0027Fish Tacos\u0027 from the \u0027Dinner Menu\u0027 of \u0027Restaurant B\u0027." +}, { + "id": "1953", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many sustainable sourcing records does Restaurant V have?", + "sql_context": "CREATE TABLE restaurants (restaurant_id INT, name VARCHAR(255)); CREATE TABLE sourcing (sourcing_id INT, restaurant_id INT, produce VARCHAR(255), local BOOLEAN);", + "sql": "SELECT COUNT(*) as sourcing_records FROM sourcing WHERE restaurant_id \u003d (SELECT restaurant_id FROM restaurants WHERE name \u003d \u0027Restaurant V\u0027);", + "sql_explanation": "This query filters the \u0027sourcing\u0027 table for \u0027Restaurant V\u0027, then calculates the total number of sourcing records for that restaurant." +}, { + "id": "970", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many pollution control initiatives are there in the Southern Ocean?", + "sql_context": "CREATE TABLE oceans (id INT, name TEXT, area FLOAT); CREATE TABLE regions (id INT, region TEXT); CREATE TABLE pollution_control (id INT, initiative TEXT, ocean_id INT, region_id INT); INSERT INTO oceans (id, name, area) VALUES (1, \u0027Pacific Ocean\u0027, 165200000); INSERT INTO oceans (id, name, area) VALUES (5, \u0027Southern Ocean\u0027, 20327000); INSERT INTO regions (id, region) VALUES (1, \u0027North America\u0027); INSERT INTO regions (id, region) VALUES (2, \u0027South America\u0027); INSERT INTO pollution_control (id, initiative, ocean_id, region_id) VALUES (1, \u0027Clean Beach Initiative\u0027, 1, 1); INSERT INTO pollution_control (id, initiative, ocean_id, region_id) VALUES (2, \u0027Ocean Cleanup Project\u0027, 5, 2);", + "sql": "SELECT COUNT(*) FROM pollution_control WHERE ocean_id \u003d (SELECT id FROM oceans WHERE name \u003d \u0027Southern Ocean\u0027) AND region_id \u003d (SELECT id FROM regions WHERE region \u003d \u0027South America\u0027);", + "sql_explanation": "This query calculates the number of pollution control initiatives in the Southern Ocean by counting the records in the pollution_control table where the ocean_id is equal to the id of the row in the oceans table with a name of \u0027Southern Ocean\u0027 and the region_id is equal to the id of the row in the regions table with a region of \u0027South America\u0027. The subqueries are used to look up the id values based on the name and region values." +}, { + "id": "3206", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average depth of oceanic trenches in the Pacific plate?\u0027", + "sql_context": "CREATE TABLE plate (plate_id INT, name VARCHAR(255), PRIMARY KEY(plate_id)); INSERT INTO plate (plate_id, name) VALUES (1, \u0027Pacific\u0027); CREATE TABLE trench (trench_id INT, name VARCHAR(255), plate_id INT, avg_depth DECIMAL(5,2), PRIMARY KEY(trench_id), FOREIGN KEY (plate_id) REFERENCES plate(plate_id)); INSERT INTO trench (trench_id, name, plate_id, avg_depth) VALUES (1, \u0027Mariana Trench\u0027, 1, 10994);", + "sql": "SELECT AVG(avg_depth) FROM trench WHERE plate_id \u003d (SELECT plate_id FROM plate WHERE name \u003d \u0027Pacific\u0027);", + "sql_explanation": "We calculate the average depth of trenches in the Pacific plate by selecting the average of the avg_depth column in the trench table, filtered by the plate_id corresponding to the Pacific plate." +}, { + "id": "311", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average donation frequency for each cause in the \u0027philanthropy.causes\u0027 table?", + "sql_context": "CREATE TABLE philanthropy.donation_amount_by_cause (donation_id INT, donor_id INT, cause_id INT, donation_date DATE, donation_amount DECIMAL);", + "sql": "SELECT c.cause_name, AVG(dam.donation_frequency) FROM philanthropy.causes c JOIN (SELECT cause_id, COUNT(*) AS donation_frequency FROM philanthropy.donation_amount_by_cause GROUP BY cause_id) dam ON c.cause_id \u003d dam.cause_id GROUP BY c.cause_name;", + "sql_explanation": "The SQL query performs a join between the \u0027philanthropy.causes\u0027 table and a subquery based on the \u0027philanthropy.donation_amount_by_cause\u0027 table based on the \u0027cause_id\u0027 column. It then calculates the average of the \u0027donation_frequency\u0027 column for each cause_name in the \u0027philanthropy.causes\u0027 table, providing the average donation frequency for each cause." +}, { + "id": "778", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all regions with a total donation amount above the overall average, in alphabetical order.", + "sql_context": "CREATE TABLE Donations (DonationID INT, DonationRegion TEXT, DonationAmount DECIMAL(10,2)); INSERT INTO Donations (DonationID, DonationRegion, DonationAmount) VALUES (1, \u0027Asia\u0027, 1000.00), (2, \u0027Africa\u0027, 1500.00), (3, \u0027Europe\u0027, 2000.00), (4, \u0027Asia\u0027, 500.00), (5, \u0027Africa\u0027, 800.00), (6, \u0027Europe\u0027, 1200.00);", + "sql": "SELECT DonationRegion, SUM(DonationAmount) AS TotalDonation FROM Donations GROUP BY DonationRegion HAVING SUM(DonationAmount) \u003e (SELECT AVG(DonationAmount) FROM Donations) ORDER BY TotalDonation;", + "sql_explanation": "The SQL query calculates the total donation amount per region by grouping by the DonationRegion and using the SUM function. It then filters the results to only include regions with a total donation amount greater than the overall average (calculated using a subquery) and orders the results by the total donation amount." +}, { + "id": "2076", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of donors who made more than one donation in 2021?", + "sql_context": "CREATE TABLE Donors (DonorID INT, DonorName TEXT, DonationCount INT); INSERT INTO Donors (DonorID, DonorName, DonationCount) VALUES (1, \u0027John Doe\u0027, 3), (2, \u0027Jane Smith\u0027, 1);", + "sql": "SELECT (COUNT(*) * 100.0 / (SELECT COUNT(*) FROM Donors)) as \u0027% of Donors with Multiple Donations\u0027 FROM Donors WHERE DonationCount \u003e 1;", + "sql_explanation": "The SQL query calculates the percentage of donors who made more than one donation in 2021 by first calculating the total number of donors who made multiple donations and then dividing it by the total number of donors." +}, { + "id": "152", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum and maximum number of patients served per rural health center in Europe, and how many of these centers serve more than 30000 patients?", + "sql_context": "CREATE TABLE rural_health_centers (center_id INT, center_name VARCHAR(100), country VARCHAR(50), num_patients INT); INSERT INTO rural_health_centers (center_id, center_name, country, num_patients) VALUES (1, \u0027Center A\u0027, \u0027France\u0027, 35000), (2, \u0027Center B\u0027, \u0027France\u0027, 28000), (3, \u0027Center C\u0027, \u0027Germany\u0027, 42000), (4, \u0027Center D\u0027, \u0027Germany\u0027, 48000);", + "sql": "SELECT MIN(num_patients) AS min_patients_per_center, MAX(num_patients) AS max_patients_per_center, COUNT(*) FILTER (WHERE num_patients \u003e 30000) AS centers_with_more_than_30000_patients FROM rural_health_centers WHERE country IN (SELECT name FROM countries WHERE continent \u003d \u0027Europe\u0027);", + "sql_explanation": "Determine the minimum and maximum number of patients served per rural health center in Europe and the number of these centers serving more than 30000 patients." +}, { + "id": "4010", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records in the \"medications\" table where the \"provider_id\" is not present in the \"providers\" table", + "sql_context": "CREATE TABLE providers (provider_id INT, name VARCHAR(50)); INSERT INTO providers (provider_id, name) VALUES (1, \u0027Dr. Smith\u0027), (2, \u0027Dr. Johnson\u0027); CREATE TABLE medications (medication_id INT, name VARCHAR(50), provider_id INT); INSERT INTO medications (medication_id, name, provider_id) VALUES (1, \u0027Aspirin\u0027, 1), (2, \u0027Ibuprofen\u0027, NULL), (3, \u0027Penicillin\u0027, 2);", + "sql": "DELETE FROM medications WHERE provider_id NOT IN (SELECT provider_id FROM providers);", + "sql_explanation": "This query first selects the provider_id from the providers table and then deletes all records in the medications table where the provider_id is not present in the list from the providers table." +}, { + "id": "128", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 3 users who gained the most followers in a specific country last month.", + "sql_context": "CREATE TABLE users (id INT, name VARCHAR(255), country VARCHAR(255), follower_count INT); INSERT INTO users (id, name, country, follower_count) VALUES (1, \u0027Alice\u0027, \u0027USA\u0027, 1000), (2, \u0027Bob\u0027, \u0027Canada\u0027, 2000), (3, \u0027Charlie\u0027, \u0027USA\u0027, 1500);", + "sql": "SELECT name, follower_count, (follower_count - (SELECT follower_count FROM users WHERE id \u003d u.id AND country \u003d \u0027USA\u0027 AND DATE(date_added) \u003c DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH))) AS follower_gain FROM users u WHERE country \u003d \u0027USA\u0027 ORDER BY follower_gain DESC, follower_count DESC LIMIT 3;", + "sql_explanation": "This query calculates the followers gained by each user in the USA during the last month. It uses a correlated subquery to get the previous follower count for each user. Then, it orders the results by the followers gained and the current follower count to find the top 3 users." +}, { + "id": "132", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the user with the most followers who has posted about \"#education\" in the last month, excluding users with less than 10 posts.", + "sql_context": "CREATE TABLE users (id INT, name VARCHAR(255), followers INT); CREATE TABLE posts (id INT, user INT, content TEXT, timestamp TIMESTAMP);", + "sql": "SELECT u.name FROM users u JOIN (SELECT user, COUNT(*) AS post_count FROM posts WHERE content LIKE \u0027%#education%\u0027 AND timestamp BETWEEN DATE_SUB(NOW(), INTERVAL 1 MONTH) AND NOW() GROUP BY user HAVING COUNT(*) \u003e\u003d 10) pc ON u.id \u003d pc.user ORDER BY u.followers DESC, pc.post_count DESC LIMIT 1;", + "sql_explanation": "Join the users and posts tables, filter rows based on the specified criteria, count the number of posts by user, and then find the user with the most followers who posted about the desired topic with at least 10 posts." +}, { + "id": "173", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 5 users with the most followers in the \u0027social_media\u0027 database.", + "sql_context": "CREATE TABLE users (id INT, name VARCHAR(50), gender VARCHAR(10), age INT, location VARCHAR(50), followers INT); CREATE TABLE following (user_id INT, following_id INT);", + "sql": "SELECT users.name, users.followers FROM users JOIN ( SELECT following_id, COUNT(*) AS follower_count FROM following GROUP BY following_id ORDER BY follower_count DESC LIMIT 5 ) AS top_followers ON users.id \u003d top_followers.following_id ORDER BY top_followers.follower_count DESC;", + "sql_explanation": "This query lists the top 5 users with the most followers by first joining the \u0027users\u0027 and \u0027following\u0027 tables on the \u0027id\u0027 and \u0027following_id\u0027 columns. It then calculates the follower count for each user using a subquery and joins it with the \u0027users\u0027 table. Finally, it orders the results by follower count in descending order and returns the top 5." +}, { + "id": "404", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many users unliked post ID 1001 in the last week?", + "sql_context": "CREATE TABLE post_likes (user_id INT, post_id INT, like_date DATE); INSERT INTO post_likes (user_id, post_id, like_date) VALUES (1, 1001, \u00272022-01-01\u0027), (2, 1001, \u00272022-01-02\u0027), (3, 1001, \u00272022-01-03\u0027), (1, 1001, \u00272022-01-04\u0027);", + "sql": "SELECT COUNT(*) FROM post_likes WHERE post_id \u003d 1001 AND like_date \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 1 WEEK) AND user_id NOT IN (SELECT user_id FROM post_likes WHERE post_id \u003d 1001 AND like_date \u003c DATE_SUB(CURDATE(), INTERVAL 1 WEEK));", + "sql_explanation": "The SQL query first finds the users who liked post 1001 in the last week, then finds the users who liked the post before that, and finally counts the number of users who liked the post in the last week but not before that." +}, { + "id": "545", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of posts made by a single user on a specific platform in a day?", + "sql_context": "CREATE TABLE posts (id INT, user_id INT, platform VARCHAR(255), post_date DATE); INSERT INTO posts (id, user_id, platform, post_date) VALUES (1, 1, \u0027Instagram\u0027, \u00272022-01-01\u0027), (2, 2, \u0027Twitter\u0027, \u00272022-01-10\u0027), (3, 1, \u0027Instagram\u0027, \u00272022-02-01\u0027), (4, 3, \u0027LinkedIn\u0027, \u00272022-03-01\u0027), (5, 1, \u0027Instagram\u0027, \u00272022-03-01\u0027), (6, 1, \u0027Instagram\u0027, \u00272022-03-01\u0027); CREATE TABLE users (id INT, country VARCHAR(255)); INSERT INTO users (id, country) VALUES (1, \u0027Spain\u0027), (2, \u0027Russia\u0027), (3, \u0027Indonesia\u0027);", + "sql": "SELECT platform, user_id, MAX(post_count) FROM (SELECT platform, user_id, COUNT(*) AS post_count FROM posts WHERE platform \u003d \u0027Instagram\u0027 GROUP BY platform, user_id, post_date) AS subquery GROUP BY platform, user_id;", + "sql_explanation": "This query calculates the maximum number of posts made by a single user on the Instagram platform in a day by grouping the posts table by platform, user_id, and post_date and counting the number of posts for each group. Then, it selects the maximum post_count for each platform and user_id from the subquery." +}, { + "id": "840", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the sum of post_likes for posts in the \u0027travel\u0027 category from the \u0027posts\u0027 table, who have more than 10,000 followers from the \u0027users\u0027 table, in the past 60 days?", + "sql_context": "CREATE TABLE users (user_id INT, user_category VARCHAR(20), user_followers INT); CREATE TABLE posts (post_id INT, user_id INT, post_category VARCHAR(20), post_date DATE, post_likes INT);", + "sql": "SELECT SUM(post_likes) FROM posts p1 WHERE post_category \u003d \u0027travel\u0027 AND p1.user_id IN (SELECT user_id FROM users WHERE user_followers \u003e 10000) AND p1.post_date \u003e\u003d CURDATE() - INTERVAL 60 DAY;", + "sql_explanation": "1. Subquery selects user_id from users table where user_followers is more than 10,000. 2. Main query selects post_likes from posts table where post_category is \u0027travel\u0027, user_id is in the subquery, and post_date is within the past 60 days. 3. Sums the post_likes." +}, { + "id": "1035", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the interests of users who have engaged with posts about environmental activism but do not follow any environmental organizations?", + "sql_context": "CREATE TABLE user_interactions (user_id INT, post_topic VARCHAR(50)); CREATE TABLE user_following (user_id INT, org_type VARCHAR(50)); INSERT INTO user_interactions (user_id, post_topic) VALUES (1, \u0027environmental activism\u0027), (2, \u0027climate change\u0027), (3, \u0027environmental activism\u0027), (4, \u0027animal rights\u0027), (5, \u0027environmental activism\u0027), (6, \u0027renewable energy\u0027); INSERT INTO user_following (user_id, org_type) VALUES (1, \u0027animal rights organization\u0027), (2, \u0027renewable energy organization\u0027), (3, \u0027tech company\u0027), (4, \u0027climate change organization\u0027), (5, \u0027non-profit\u0027), (6, \u0027climate change organization\u0027);", + "sql": "SELECT post_topic FROM user_interactions WHERE user_id NOT IN (SELECT user_id FROM user_following WHERE org_type LIKE \u0027%environmental%\u0027) AND post_topic \u003d \u0027environmental activism\u0027;", + "sql_explanation": "We create two tables: user_interactions and user_following. Then, we filter the user_following table to exclude users who follow environmental organizations. Next, we select the post topics of users who have engaged with \u0027environmental activism\u0027 posts but do not follow any environmental organizations." +}, { + "id": "1109", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the unique job titles of users who have engaged with posts about social justice but have not applied to any jobs in the non-profit sector.", + "sql_context": "CREATE TABLE user_engagements (user_id INT, engagement_topic VARCHAR(50), job_title VARCHAR(50)); INSERT INTO user_engagements (user_id, engagement_topic, job_title) VALUES (1, \u0027social justice\u0027, \u0027Human Rights Lawyer\u0027), (2, \u0027technology\u0027, \u0027Software Engineer\u0027), (3, \u0027social justice\u0027, \u0027Social Worker\u0027), (4, \u0027education\u0027, \u0027Teacher\u0027), (5, \u0027social justice\u0027, \u0027Community Organizer\u0027), (6, \u0027healthcare\u0027, \u0027Doctor\u0027);", + "sql": "SELECT job_title FROM user_engagements WHERE engagement_topic \u003d \u0027social justice\u0027 AND user_id NOT IN (SELECT user_id FROM user_engagements WHERE job_title LIKE \u0027%non-profit%\u0027);", + "sql_explanation": "We create the user_engagements table and insert sample data. Then, we select the unique job titles of users who have engaged with posts about social justice but have not applied to any jobs in the non-profit sector." +}, { + "id": "1157", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of posts per day for users in \u0027Europe\u0027?", + "sql_context": "CREATE TABLE users (id INT, region VARCHAR(20)); INSERT INTO users (id, region) VALUES (1, \u0027North America\u0027), (2, \u0027Europe\u0027), (3, \u0027Asia\u0027);", + "sql": "SELECT AVG(quantity) FROM (SELECT COUNT(*) AS quantity FROM users JOIN posts ON users.id \u003d posts.user_id WHERE users.region \u003d \u0027Europe\u0027 GROUP BY posts.post_date) AS subquery;", + "sql_explanation": "The SQL query calculates the average number of posts per day for users in \u0027Europe\u0027. It first creates a subquery that groups posts by date and calculates the count of posts for each date. Then, the main query calculates the average of the counts returned by the subquery." +}, { + "id": "1230", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the maximum number of posts per day for users in India?", + "sql_context": "CREATE SCHEMA postsdata; CREATE TABLE posts_per_day(post_date DATE, user_id INT, posts_per_day INT); INSERT INTO posts_per_day (post_date, user_id, posts_per_day) VALUES (\u00272022-01-01\u0027, 1, 5); INSERT INTO posts_per_day (post_date, user_id, posts_per_day) VALUES (\u00272022-01-01\u0027, 2, 3);", + "sql": "SELECT post_date, MAX(posts_per_day) FROM postsdata.posts_per_day WHERE user_id IN (SELECT user_id FROM postsdata.posts_per_day WHERE country \u003d \u0027IN\u0027) GROUP BY post_date;", + "sql_explanation": "This query identifies the maximum number of posts per day for users in India by filtering rows with country values of \u0027IN\u0027, grouping rows by post_date, and then calculating the maximum posts_per_day value for each group." +}, { + "id": "665", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add a new union \u0027Retail Workers Union\u0027 with 300 members.", + "sql_context": "CREATE TABLE unions (id INT, name TEXT, member_count INT); CREATE TABLE members (id INT, union_id INT);", + "sql": "INSERT INTO unions (id, name, member_count) VALUES (1, \u0027Retail Workers Union\u0027, 300); INSERT INTO members (id, union_id) SELECT NULL, id FROM (SELECT 1 + (SELECT MAX(id) FROM unions) AS id) AS seq_table;", + "sql_explanation": "The query adds a new union \u0027Retail Workers Union\u0027 with 300 members by inserting the new union record with the \u0027id\u0027 value of 1 and \u0027name\u0027 value of \u0027Retail Workers Union\u0027 and \u0027member_count\u0027 value of 300. Then, it inserts new member records with the \u0027union_id\u0027 value corresponding to the \u0027Retail Workers Union\u0027 by selecting the \u0027id\u0027 value from the \u0027seq_table\u0027 with the \u0027id\u0027 value of 1 plus the maximum \u0027id\u0027 value from the \u0027unions\u0027 table." +}, { + "id": "2095", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum monthly salary in the \u0027Retail Workers Union\u0027?", + "sql_context": "CREATE TABLE union_members (member_id INT, member_name VARCHAR(255), union_id INT, monthly_salary DECIMAL(10,2)); CREATE TABLE unions (union_id INT, union_name VARCHAR(255)); INSERT INTO unions (union_id, union_name) VALUES (123, \u0027Retail Workers Union\u0027); INSERT INTO unions (union_id, union_name) VALUES (456, \u0027Teachers Union\u0027); INSERT INTO union_members (member_id, member_name, union_id, monthly_salary) VALUES (1, \u0027John Doe\u0027, 123, 2000.50); INSERT INTO union_members (member_id, member_name, union_id, monthly_salary) VALUES (2, \u0027Jane Doe\u0027, 123, 2200.25);", + "sql": "SELECT MIN(monthly_salary) FROM union_members WHERE union_id \u003d (SELECT union_id FROM unions WHERE union_name \u003d \u0027Retail Workers Union\u0027);", + "sql_explanation": "This query calculates the minimum monthly salary of workers in the Retail Workers Union by filtering the union_members table for rows where the union_id matches the union_id of the Retail Workers Union and then finding the minimum monthly_salary." +}, { + "id": "2454", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the unions involved in collective bargaining agreements, but not involved in any labor rights violations.", + "sql_context": "CREATE TABLE unions (id INT PRIMARY KEY, name VARCHAR(255)); CREATE TABLE cb_agreements (id INT PRIMARY KEY, union_id INT); CREATE TABLE reports (id INT PRIMARY KEY, violation VARCHAR(255), union_id INT); INSERT INTO unions (id, name) VALUES (1, \u0027Union A\u0027), (2, \u0027Union B\u0027), (3, \u0027Union C\u0027), (4, \u0027Union D\u0027); INSERT INTO cb_agreements (id, union_id) VALUES (1, 1), (2, 2), (3, 4); INSERT INTO reports (id, violation, union_id) VALUES (1, \u0027Violation 1\u0027, 2), (2, \u0027Violation 2\u0027, 3);", + "sql": "SELECT name FROM unions u WHERE u.id IN (SELECT union_id FROM cb_agreements) AND u.id NOT IN (SELECT union_id FROM reports);", + "sql_explanation": "The query checks if the id of each union is both in the \u0027cb_agreements\u0027 table\u0027s union_id column and not in the \u0027reports\u0027 table\u0027s union_id column, returning the names of unions involved in collective bargaining agreements but not involved in any labor rights violations." +}, { + "id": "4732", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the union names that have no reported workplace injuries.", + "sql_context": "CREATE TABLE injuries (id INT PRIMARY KEY, union_id INT); CREATE TABLE unions (id INT PRIMARY KEY, name VARCHAR(255)); INSERT INTO injuries (id, union_id) VALUES (1, 1), (2, 2), (3, 3), (4, 1); INSERT INTO unions (id, name) VALUES (1, \u0027Union A\u0027), (2, \u0027Union B\u0027), (3, \u0027Union C\u0027), (4, \u0027Union D\u0027);", + "sql": "SELECT name FROM unions WHERE id NOT IN (SELECT union_id FROM injuries);", + "sql_explanation": "The query checks if the id of each union is not in the \u0027injuries\u0027 table\u0027s union_id column, returning the names of unions with no reported workplace injuries." +}, { + "id": "1509", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who is the top funded biotech startup in Texas?", + "sql_context": "CREATE TABLE biotech_startups (id INT, name TEXT, location TEXT, funding_amount INT); INSERT INTO biotech_startups (id, name, location, funding_amount) VALUES (2, \u0027BioInnovate\u0027, \u0027Texas\u0027, 20000000), (3, \u0027TechGen\u0027, \u0027Texas\u0027, 15000000);", + "sql": "SELECT name FROM biotech_startups WHERE location \u003d \u0027Texas\u0027 AND funding_amount \u003d (SELECT MAX(funding_amount) FROM biotech_startups WHERE location \u003d \u0027Texas\u0027);", + "sql_explanation": "The SQL query retrieves the name of the top funded biotech startup in Texas by filtering rows based on the location column and using a subquery to find the maximum funding amount for startups in Texas." +}, { + "id": "1855", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the name and type of the most accurate biosensor, and the name and type of the least accurate biosensor?", + "sql_context": "CREATE TABLE biosensors (id INT, name VARCHAR(50), type VARCHAR(50), accuracy DECIMAL(3,2)); INSERT INTO biosensors (id, name, type, accuracy) VALUES (1, \u0027Biosensor1\u0027, \u0027Glucose\u0027, 0.95); INSERT INTO biosensors (id, name, type, accuracy) VALUES (2, \u0027Biosensor2\u0027, \u0027Pressure\u0027, 0.98);", + "sql": "SELECT name, type FROM biosensors WHERE accuracy \u003d (SELECT MAX(accuracy) FROM biosensors) OR accuracy \u003d (SELECT MIN(accuracy) FROM biosensors);", + "sql_explanation": "This query retrieves the names and types of biosensors whose accuracy is the maximum or minimum accuracy of all biosensors. It uses two subqueries to calculate the maximum and minimum accuracy from the biosensors table." +}, { + "id": "4485", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which genetic research projects have a budget lower than any bioprocess engineering project budget?", + "sql_context": "CREATE TABLE research (name TEXT, budget FLOAT); INSERT INTO research (name, budget) VALUES (\u0027ResearchA\u0027, 3000000), (\u0027ResearchB\u0027, 2000000), (\u0027ResearchC\u0027, 4000000); CREATE TABLE projects (name TEXT, budget FLOAT); INSERT INTO projects (name, budget) VALUES (\u0027ProjectA\u0027, 5000000), (\u0027ProjectB\u0027, 6000000), (\u0027ProjectC\u0027, 4500000);", + "sql": "SELECT name FROM research WHERE budget \u003c (SELECT MIN(budget) FROM projects);", + "sql_explanation": "This query retrieves the names of genetic research projects with a budget lower than any bioprocess engineering project budget by using a subquery to calculate the minimum budget of the bioprocess engineering projects and then using a WHERE clause to filter the \u0027budget\u0027 column." +}, { + "id": "4532", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which genetic research projects have a budget greater than the average budget?", + "sql_context": "CREATE TABLE research (name TEXT, budget FLOAT); INSERT INTO research (name, budget) VALUES (\u0027ResearchA\u0027, 7000000), (\u0027ResearchB\u0027, 6000000), (\u0027ResearchC\u0027, 8000000);", + "sql": "SELECT name FROM research WHERE budget \u003e (SELECT AVG(budget) FROM research);", + "sql_explanation": "This query retrieves the names of genetic research projects with a budget greater than the average budget by using a subquery to calculate the average budget and then using a WHERE clause to filter the \u0027budget\u0027 column." +}, { + "id": "619", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average time to resolve a security incident in the IT department?", + "sql_context": "CREATE TABLE incident_resolutions (id INT, incident_id INT, department VARCHAR(255), resolution_time INT); INSERT INTO incident_resolutions (id, incident_id, department, resolution_time) VALUES (1, 111, \u0027IT\u0027, 60), (2, 222, \u0027HR\u0027, 30), (3, 111, \u0027IT\u0027, 75), (4, 333, \u0027IT\u0027, 90), (5, 222, \u0027HR\u0027, 45);", + "sql": "SELECT AVG(resolution_time) FROM incident_resolutions WHERE department \u003d \u0027IT\u0027 AND incident_id IN (SELECT incident_id FROM incident_resolutions GROUP BY incident_id HAVING COUNT(DISTINCT resolution_time) \u003d 1);", + "sql_explanation": "This query calculates the average resolution_time for incidents in the IT department from the incident_resolutions table where there is only one unique resolution_time for each incident_id. This is done by using a subquery to first select the incident_id values that have only one unique resolution_time, and then calculating the average resolution_time for those incidents." +}, { + "id": "736", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of security incidents by each team, and what is the percentage of the total for each team?", + "sql_context": "CREATE TABLE security_incidents (id INT, resolution_team VARCHAR(50), incident_date DATE); INSERT INTO security_incidents (id, resolution_team, incident_date) VALUES (1, \u0027Team A\u0027, \u00272022-01-01\u0027), (2, \u0027Team B\u0027, \u00272022-01-15\u0027);", + "sql": "SELECT resolution_team, COUNT(*) as num_incidents, CONCAT(ROUND(COUNT(*) / (SELECT COUNT(*) FROM security_incidents) * 100, 2), \u0027%\u0027) as pct_of_total FROM security_incidents GROUP BY resolution_team;", + "sql_explanation": "The SQL query selects the resolution_team, the count of incidents for each team, and the percentage of the total for each team from the security_incidents table. It then groups the results by resolution_team, which will return the total number of security incidents by each team and the percentage of the total for each team." +}, { + "id": "3429", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "What are the details of the most recent phishing attempt?", + "sql_context": "CREATE TABLE phishing_attempts (id INT, ip VARCHAR(255), timestamp TIMESTAMP, target VARCHAR(255), success BOOLEAN); INSERT INTO phishing_attempts (id, ip, timestamp, target, success) VALUES (1, \u002710.0.0.1\u0027, \u00272021-01-01 10:00:00\u0027, \u0027CEO\u0027, false), (2, \u002710.0.0.2\u0027, \u00272021-01-01 11:00:00\u0027, \u0027CFO\u0027, true);", + "sql": "SELECT * FROM phishing_attempts WHERE timestamp \u003d (SELECT MAX(timestamp) FROM phishing_attempts);", + "sql_explanation": "This query first finds the maximum timestamp value in the phishing_attempts table using a subquery, and then returns all the columns for the record with that timestamp value." +}, { + "id": "800", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the name and category of the menu item in Tokyo that contains quinoa as an ingredient?", + "sql_context": "CREATE TABLE Menu_Item (id INT PRIMARY KEY, name VARCHAR(255), category VARCHAR(255), restaurant_id INT); CREATE TABLE Ingredient (id INT PRIMARY KEY, name VARCHAR(255), origin VARCHAR(255), menu_item_id INT);", + "sql": "SELECT m.name, m.category FROM Menu_Item m INNER JOIN Ingredient i ON m.id \u003d i.menu_item_id WHERE i.name \u003d \u0027Quinoa\u0027 AND m.restaurant_id IN (SELECT r.id FROM Restaurant r WHERE r.city \u003d \u0027Tokyo\u0027);", + "sql_explanation": "This query joins the Menu_Item and Ingredient tables on the menu_item_id field. It then filters for rows where the name in the Ingredient table is \u0027Quinoa\u0027 and the restaurant is located in Tokyo, and returns the name and category of the menu item." +}, { + "id": "33", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the inventory turnover rate for each ingredient category", + "sql_context": "CREATE TABLE Ingredients (ingredient_id INT, ingredient_name VARCHAR(255), ingredient_category VARCHAR(255), quantity INT, purchase_price DECIMAL(5,2)); INSERT INTO Ingredients (ingredient_id, ingredient_name, ingredient_category, quantity, purchase_price) VALUES (1, \u0027Chickpeas\u0027, \u0027Legumes\u0027, 50, 1.25), (2, \u0027Chicken Breast\u0027, \u0027Poultry\u0027, 100, 3.50); CREATE TABLE Sales (sales_id INT, ingredient_id INT, quantity INT); INSERT INTO Sales (sales_id, ingredient_id, quantity) VALUES (1, 1, 25), (2, 2, 80);", + "sql": "SELECT ingredient_category, SUM(quantity) AS total_quantity, AVG(quantity) AS avg_quantity_sold, SUM(quantity) / (SELECT SUM(quantity) * purchase_price FROM Ingredients, Sales WHERE Ingredients.ingredient_id \u003d Sales.ingredient_id GROUP BY Ingredients.ingredient_id) AS inventory_turnover_rate FROM Ingredients, Sales WHERE Ingredients.ingredient_id \u003d Sales.ingredient_id GROUP BY ingredient_category;", + "sql_explanation": "This query calculates the inventory turnover rate for each ingredient category by using nested subqueries. The subquery calculates the total value of inventory, and the main query calculates the inventory turnover rate by dividing the total quantity sold by the total value of inventory." +}, { + "id": "453", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get the total revenue and profit for the month of January in the year 2022?", + "sql_context": "CREATE TABLE sales (id INT, dish_id INT, order_date DATE, quantity INT, price FLOAT); INSERT INTO sales (id, dish_id, order_date, quantity, price) VALUES (1, 1, \u00272022-01-02\u0027, 2, 10.00), (2, 2, \u00272022-01-03\u0027, 1, 9.25), (3, 3, \u00272022-01-04\u0027, 3, 12.00), (4, 1, \u00272022-01-05\u0027, 1, 7.50), (5, 2, \u00272022-01-06\u0027, 4, 9.25), (6, 3, \u00272022-01-07\u0027, 2, 12.00);", + "sql": "SELECT SUM(quantity * price) as revenue, SUM((quantity * price) - (quantity * (SELECT cost FROM ingredients WHERE dish_id \u003d sales.dish_id LIMIT 1))) as profit FROM sales WHERE order_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-01-31\u0027;", + "sql_explanation": "This query calculates the total revenue and profit for the month of January 2022 by summing the revenue and profit columns. Revenue is calculated as the quantity multiplied by the price, and profit is calculated as the revenue minus the cost of ingredients, which is obtained using a subquery. The query filters sales records for the month of January 2022." +}, { + "id": "629", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average rating of vegan dishes, excluding dishes with less than 5 ratings?", + "sql_context": "CREATE TABLE dish_ratings (id INT, dish_name TEXT, rating INT);", + "sql": "SELECT AVG(rating) FROM dish_ratings WHERE dish_name IN (SELECT dish_name FROM dish_ratings GROUP BY dish_name HAVING COUNT(*) \u003e\u003d 5) AND dish_name IN (SELECT dish_name FROM menu_items WHERE is_vegan \u003d TRUE);", + "sql_explanation": "The SQL query first selects the dish_names of all vegan dishes from the menu_items table. It then uses this result to filter the dish_ratings table to only include records for dishes with at least 5 ratings. Finally, it calculates the average rating of vegan dishes in the result." +}, { + "id": "1403", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total sales of the top 3 most expensive items", + "sql_context": "CREATE TABLE sales (item_id INT, sales_amount DECIMAL(5,2)); INSERT INTO sales VALUES (1, 150.00), (2, 200.00), (3, 120.00);", + "sql": "SELECT SUM(sales.sales_amount) FROM sales JOIN menu ON sales.item_id \u003d menu.item_id WHERE menu.item_id IN (SELECT item_id FROM menu ORDER BY price DESC LIMIT 3);", + "sql_explanation": "This SQL query finds the total sales of the top 3 most expensive items by joining the \u0027menu\u0027 and \u0027sales\u0027 tables on the \u0027item_id\u0027 column, and then filtering for the top 3 most expensive items based on the \u0027price\u0027 column in the \u0027menu\u0027 table. Finally, the sales_amount for these top 3 items is summed up." +}, { + "id": "369", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which workers earn above the average salary in their respective departments across all factories?", + "sql_context": "CREATE TABLE factories (factory_id INT, name VARCHAR(20)); CREATE TABLE departments (department_id INT, name VARCHAR(20)); CREATE TABLE workers (worker_id INT, factory_id INT, department_id INT, salary DECIMAL(5,2)); INSERT INTO factories (factory_id, name) VALUES (1, \u0027factory1\u0027), (2, \u0027factory2\u0027), (3, \u0027factory3\u0027); INSERT INTO departments (department_id, name) VALUES (1, \u0027textiles\u0027), (2, \u0027metalwork\u0027), (3, \u0027electronics\u0027); INSERT INTO workers (worker_id, factory_id, department_id, salary) VALUES (1, 1, 1, 35000), (2, 1, 2, 40000), (3, 2, 3, 50000), (4, 3, 1, 60000);", + "sql": "SELECT w.worker_id, w.factory_id, w.department_id, w.salary FROM workers w INNER JOIN (SELECT department_id, AVG(salary) avg_salary FROM workers GROUP BY department_id) d ON w.department_id \u003d d.department_id WHERE w.salary \u003e d.avg_salary;", + "sql_explanation": "First, the query calculates the average salary for each department using a subquery. Then, it performs an INNER JOIN between the \u0027workers\u0027 table and the subquery based on the \u0027department_id\u0027. Finally, it filters for workers earning above the average salary for their respective departments." +}, { + "id": "407", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the latest AI integration and automation level for suppliers from India in the textile industry?", + "sql_context": "CREATE TABLE suppliers (id INT PRIMARY KEY, name VARCHAR(255), country VARCHAR(255), industry VARCHAR(255)); INSERT INTO suppliers (id, name, country, industry) VALUES (3, \u0027Eco-Threads\u0027, \u0027India\u0027, \u0027Textile\u0027); CREATE TABLE industry_4_0 (id INT PRIMARY KEY, supplier_id INT, automation_level DECIMAL(10,2), ai_integration BOOLEAN); INSERT INTO industry_4_0 (id, supplier_id, automation_level, ai_integration) VALUES (3, 3, 0.88, true);", + "sql": "SELECT i.ai_integration, i.automation_level FROM industry_4_0 i INNER JOIN suppliers s ON i.supplier_id \u003d s.id WHERE s.country \u003d \u0027India\u0027 AND s.industry \u003d \u0027Textile\u0027 AND i.id IN (SELECT MAX(id) FROM industry_4_0 GROUP BY supplier_id);", + "sql_explanation": "Join the industry_4_0 table with the suppliers table to filter for Indian textile suppliers, then select the latest AI integration and automation level for each supplier." +}, { + "id": "614", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of employees working in the \u0027manufacturing\u0027 department, excluding any employees who do not also appear in the \u0027training\u0027 table?", + "sql_context": "CREATE TABLE companies (company_id INT, department VARCHAR(20)); CREATE TABLE employees (employee_id INT, company_id INT); CREATE TABLE training (employee_id INT, training VARCHAR(20)); INSERT INTO companies (company_id, department) VALUES (1, \u0027manufacturing\u0027), (2, \u0027HR\u0027), (3, \u0027manufacturing\u0027); INSERT INTO employees (employee_id, company_id) VALUES (1, 1), (2, 1), (3, 2); INSERT INTO training (employee_id, training) VALUES (1, \u0027welding\u0027), (2, \u0027safety\u0027), (3, \u0027safety\u0027);", + "sql": "SELECT COUNT(*) FROM companies INNER JOIN employees ON companies.company_id \u003d employees.company_id WHERE companies.department \u003d \u0027manufacturing\u0027 AND employees.employee_id IN (SELECT employee_id FROM training);", + "sql_explanation": "The SQL query performs an inner join between the \u0027companies\u0027 and \u0027employees\u0027 tables on the \u0027company_id\u0027 column. It then filters the results to only include records where the \u0027department\u0027 column is equal to \u0027manufacturing\u0027 and the \u0027employee_id\u0027 appears in the \u0027training\u0027 table. Finally, it counts the number of records that meet these criteria." +}, { + "id": "638", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Which suppliers have not received any safety training in the past year? Delete these records.", + "sql_context": "CREATE TABLE suppliers(id INT, name TEXT, location TEXT);CREATE TABLE trainings(id INT, supplier_id INT, type TEXT, date DATE);INSERT INTO suppliers(id, name, location) VALUES (1, \u0027Supplier A\u0027, \u0027City A\u0027); INSERT INTO trainings(id, supplier_id, type, date) VALUES (1, 1, \u0027Safety\u0027, \u00272021-02-01\u0027), (2, 1, \u0027Quality\u0027, \u00272021-03-01\u0027);", + "sql": "DELETE FROM trainings WHERE supplier_id IN (SELECT id FROM suppliers WHERE id NOT IN (SELECT supplier_id FROM trainings WHERE trainings.date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) AND type \u003d \u0027Safety\u0027));", + "sql_explanation": "The subquery in the DELETE statement selects the supplier_id of suppliers who have had safety training in the past year. The outer DELETE statement deletes all training records for suppliers not in the result of the subquery, effectively removing safety training records for suppliers without safety training in the past year." +}, { + "id": "698", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the records in the workforce_training table to reflect 15 more employees trained on \u0027AI\u0027 for \u0027ManufacturerH\u0027.", + "sql_context": "CREATE TABLE manufacturers (manufacturer_id INT, manufacturer_name TEXT); INSERT INTO manufacturers (manufacturer_id, manufacturer_name) VALUES (1, \u0027ManufacturerA\u0027), (2, \u0027ManufacturerB\u0027), (3, \u0027ManufacturerC\u0027), (4, \u0027ManufacturerD\u0027), (5, \u0027ManufacturerE\u0027), (6, \u0027ManufacturerF\u0027), (7, \u0027ManufacturerG\u0027), (8, \u0027ManufacturerH\u0027); CREATE TABLE workforce_training (training_id INT, manufacturer_id INT, training_topic TEXT, num_employees INT); INSERT INTO workforce_training (training_id, manufacturer_id, training_topic, num_employees) VALUES (1, 3, \u0027Robotics\u0027, 30), (2, 3, \u0027Automated Guided Vehicles\u0027, 25), (3, 8, \u0027AI\u0027, 40);", + "sql": "UPDATE workforce_training SET num_employees \u003d num_employees + 15 WHERE manufacturer_id \u003d (SELECT manufacturer_id FROM manufacturers WHERE manufacturer_name \u003d \u0027ManufacturerH\u0027) AND training_topic \u003d \u0027AI\u0027;", + "sql_explanation": "This query updates the records in the workforce_training table to reflect 15 more employees trained on \u0027AI\u0027 for \u0027ManufacturerH\u0027. It uses a subquery to find the manufacturer_id for \u0027ManufacturerH\u0027 and updates the num_employees for that manufacturer_id and training_topic." +}, { + "id": "757", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which suppliers have not provided any materials to factory5 in the past year?", + "sql_context": "CREATE TABLE Suppliers (id INT, name TEXT, location TEXT);CREATE TABLE Materials (id INT, supplier_id INT, factory_id INT, material TEXT, quantity INT, date DATE);INSERT INTO Suppliers VALUES (1, \u0027SupplierA\u0027, \u0027CityA\u0027), (2, \u0027SupplierB\u0027, \u0027CityB\u0027), (3, \u0027SupplierC\u0027, \u0027CityC\u0027);INSERT INTO Materials VALUES (1, 1, 5, \u0027MaterialX\u0027, 100, \u00272021-06-01\u0027), (2, 1, 5, \u0027MaterialY\u0027, 200, \u00272021-07-15\u0027), (3, 2, 5, \u0027MaterialX\u0027, 150, \u00272021-08-01\u0027), (4, 3, 6, \u0027MaterialZ\u0027, 50, \u00272021-09-10\u0027);", + "sql": "SELECT DISTINCT s.name FROM Suppliers s WHERE s.id NOT IN (SELECT m.supplier_id FROM Materials m WHERE m.factory_id \u003d 5 AND m.date BETWEEN DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) AND CURRENT_DATE);", + "sql_explanation": "The SQL query first selects the distinct names of suppliers where the supplier_id is not in the subquery result. The subquery selects supplier_ids from the Materials table for factory 5 between the current date minus one year and the current date. The query then returns the names of suppliers who have not provided any materials to factory5 in the past year." +}, { + "id": "1428", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the name of the factory in \u0027China\u0027 with the lowest number of workers?", + "sql_context": "CREATE TABLE factories_workers_ext (id INT, factory_id INT, name VARCHAR(50), country VARCHAR(50), worker_count INT); INSERT INTO factories_workers_ext (id, factory_id, name, country, worker_count) VALUES (1, 1, \u0027Factory One\u0027, \u0027Germany\u0027, 100), (2, 2, \u0027Factory Two\u0027, \u0027China\u0027, 50), (3, 3, \u0027Factory Three\u0027, \u0027China\u0027, 150);", + "sql": "SELECT name FROM factories_workers_ext WHERE country \u003d \u0027China\u0027 AND worker_count \u003d (SELECT MIN(worker_count) FROM factories_workers_ext WHERE country \u003d \u0027China\u0027);", + "sql_explanation": "This query uses a subquery to find the minimum worker count for factories in China, then filters factories in China to only the one with that minimum worker count. The name of that factory is then returned." +}, { + "id": "1903", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the name of the factory with the lowest CO2 emissions in France?", + "sql_context": "CREATE TABLE factories (id INT, name VARCHAR(50), location VARCHAR(50), co2_emissions INT); INSERT INTO factories (id, name, location, co2_emissions) VALUES (1, \u0027EcoFactory\u0027, \u0027France\u0027, 100), (2, \u0027SmartTech\u0027, \u0027France\u0027, 120), (3, \u0027GreenInnovations\u0027, \u0027France\u0027, 90);", + "sql": "SELECT name FROM factories WHERE location \u003d \u0027France\u0027 AND co2_emissions \u003d (SELECT MIN(co2_emissions) FROM factories WHERE location \u003d \u0027France\u0027);", + "sql_explanation": "This query identifies the name of the factory with the lowest CO2 emissions in France by using a subquery to calculate the minimum CO2 emissions of factories in France, and then filtering the records of the factories table to only include factories with CO2 emissions equal to the minimum." +}, { + "id": "3022", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average salary of workers in the \u0027textiles\u0027 department?", + "sql_context": "CREATE TABLE department (id INT, name VARCHAR(255), manager_id INT, location VARCHAR(255)); INSERT INTO department (id, name, manager_id, location) VALUES (1, \u0027textiles\u0027, 101, \u0027New York\u0027); INSERT INTO department (id, name, manager_id, location) VALUES (2, \u0027metallurgy\u0027, 102, \u0027Chicago\u0027); CREATE TABLE employee (id INT, name VARCHAR(255), department_id INT, salary DECIMAL(10,2)); INSERT INTO employee (id, name, department_id, salary) VALUES (1001, \u0027Alice\u0027, 1, 50000.00); INSERT INTO employee (id, name, department_id, salary) VALUES (1002, \u0027Bob\u0027, 1, 55000.00); INSERT INTO employee (id, name, department_id, salary) VALUES (1003, \u0027Charlie\u0027, 2, 60000.00);", + "sql": "SELECT AVG(salary) FROM employee WHERE department_id \u003d (SELECT id FROM department WHERE name \u003d \u0027textiles\u0027);", + "sql_explanation": "This SQL query calculates the average salary for employees in the \u0027textiles\u0027 department. It first finds the id of the \u0027textiles\u0027 department, and then filters the employee table to only include records with that department id. Lastly, it calculates the average salary for those records." +}, { + "id": "541", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 3 diseases with the highest infection rate in New York.", + "sql_context": "CREATE TABLE Rates (RateID INT, Age INT, Gender VARCHAR(10), City VARCHAR(20), Disease VARCHAR(20), Rate DECIMAL(5,2)); INSERT INTO Rates (RateID, Age, Gender, City, Disease, Rate) VALUES (1, 35, \u0027Male\u0027, \u0027New York\u0027, \u0027Cholera\u0027, 0.15);", + "sql": "SELECT Disease, Rate FROM (SELECT Disease, COUNT(*) * 100.0 / (SELECT COUNT(*) FROM Rates WHERE City \u003d \u0027New York\u0027) as Rate FROM Rates WHERE City \u003d \u0027New York\u0027 GROUP BY Disease) as Subquery ORDER BY Rate DESC LIMIT 3;", + "sql_explanation": "This query calculates the infection rate for each disease in New York by grouping data by Disease and City columns, counting the number of cases and calculating the rate based on the total number of cases in New York. It then orders the results by rate in descending order and limits the output to the top 3 diseases." +}, { + "id": "812", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 5 cities with the highest Tuberculosis infection rate.", + "sql_context": "CREATE TABLE Infections (InfectionID INT, Age INT, Gender VARCHAR(10), City VARCHAR(20), Disease VARCHAR(20)); INSERT INTO Infections (InfectionID, Age, Gender, City, Disease) VALUES (1, 30, \u0027Male\u0027, \u0027Los Angeles\u0027, \u0027Tuberculosis\u0027);", + "sql": "SELECT City, COUNT(*) as InfectionCount, (COUNT(*) * 100.0 / (SELECT COUNT(*) FROM Infections)) as Rate FROM Infections WHERE Disease \u003d \u0027Tuberculosis\u0027 GROUP BY City ORDER BY Rate DESC LIMIT 5;", + "sql_explanation": "This query calculates the infection rate for each city by grouping data by City and Disease columns, counting the number of infections, and calculating the rate based on the total number of infections. It then orders the results by rate in descending order and limits the output to the top 5 cities." +}, { + "id": "1379", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of patients with a specific diagnosis code by ethnicity?", + "sql_context": "CREATE TABLE diagnoses (id INT, patient_id INT, code VARCHAR(10), ethnicity VARCHAR(50)); INSERT INTO diagnoses (id, patient_id, code, ethnicity) VALUES (1, 1, \u0027A01\u0027, \u0027Caucasian\u0027), (2, 1, \u0027B01\u0027, \u0027Caucasian\u0027), (3, 2, \u0027A01\u0027, \u0027African American\u0027), (4, 3, \u0027C01\u0027, \u0027Hispanic\u0027);", + "sql": "SELECT ethnicity, code, COUNT(*) * 100.0 / (SELECT COUNT(*) FROM diagnoses WHERE code \u003d \u0027A01\u0027) AS percentage FROM diagnoses WHERE code \u003d \u0027A01\u0027 GROUP BY ethnicity;", + "sql_explanation": "This SQL query calculates the percentage of patients with a specific diagnosis code by ethnicity. It uses a subquery to calculate the total number of patients with the diagnosis code, and then divides the number of patients in each group by the total number of patients with the diagnosis code to get the percentage. The COUNT(*) function is used to count the number of patients in each group." +}, { + "id": "1921", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the most common infectious disease in Asia?", + "sql_context": "CREATE TABLE Diseases (Disease TEXT, Continent TEXT, NumberOfCases INTEGER); INSERT INTO Diseases (Disease, Continent, NumberOfCases) VALUES (\u0027Tuberculosis\u0027, \u0027Asia\u0027, 9000000), (\u0027Malaria\u0027, \u0027Africa\u0027, 20000000), (\u0027HIV\u0027, \u0027Europe\u0027, 500000);", + "sql": "SELECT Disease FROM Diseases WHERE Continent \u003d \u0027Asia\u0027 AND NumberOfCases \u003d (SELECT MAX(NumberOfCases) FROM Diseases WHERE Continent \u003d \u0027Asia\u0027);", + "sql_explanation": "This query retrieves the most common infectious disease in Asia. It does so by selecting the Disease column from the Diseases table, where the Continent is \u0027Asia\u0027 and the NumberOfCases is equal to the maximum number of cases in Asia." +}, { + "id": "1952", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of smokers in India by age group?", + "sql_context": "CREATE TABLE smoking_data (id INT, age_group VARCHAR(20), country VARCHAR(20), smokers INT); INSERT INTO smoking_data (id, age_group, country, smokers) VALUES (1, \u002718-24\u0027, \u0027India\u0027, 5000), (2, \u002725-34\u0027, \u0027India\u0027, 8000), (3, \u002735-44\u0027, \u0027India\u0027, 10000);", + "sql": "SELECT age_group, smokers * 100 / (SELECT SUM(smokers) FROM smoking_data WHERE country \u003d \u0027India\u0027) FROM smoking_data WHERE country \u003d \u0027India\u0027;", + "sql_explanation": "This query finds the percentage of smokers in India by age group by filtering the smoking_data table for rows with a country of \u0027India\u0027, dividing the smokers column by the total number of smokers in India using a subquery, and then multiplying by 100 to convert it to a percentage." +}, { + "id": "2041", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the name and age of the oldest patient who received a COVID-19 vaccine in New York?", + "sql_context": "CREATE TABLE covid_vaccine (patient_id INT, vaccine_name VARCHAR(10), administered_date DATE, patient_age INT); INSERT INTO covid_vaccine (patient_id, vaccine_name, administered_date, patient_age) VALUES (1, \u0027Moderna\u0027, \u00272021-01-01\u0027, 80);", + "sql": "SELECT vaccine_name, patient_age FROM covid_vaccine WHERE patient_age \u003d (SELECT MAX(patient_age) FROM covid_vaccine WHERE state \u003d \u0027NY\u0027);", + "sql_explanation": "The SQL query finds the oldest patient who received a COVID-19 vaccine in New York by using a subquery to find the maximum patient age in the New York data, and then selecting the vaccine name and patient age for the row with that age." +}, { + "id": "791", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of emergencies handled by police stations in districts with high crime rates?", + "sql_context": "CREATE TABLE station_crime_rates (sid INT, rate INT, PRIMARY KEY(sid)); CREATE TABLE station_emergencies (eid INT, sid INT, time TIMESTAMP, PRIMARY KEY(eid), FOREIGN KEY(sid) REFERENCES stations(sid));", + "sql": "SELECT SUM(1) FROM station_emergencies se JOIN station_crime_rates sc ON se.sid \u003d sc.sid WHERE sc.rate \u003e (SELECT AVG(rate) FROM (SELECT did, AVG(rate) AS rate FROM crime_rates GROUP BY did) cr);", + "sql_explanation": "This query calculates the total number of emergencies handled by police stations in districts with a crime rate above the average. It first joins the station_emergencies table with the station_crime_rates table. Then, it uses a subquery to find the average crime rate in all districts. Finally, it counts the number of emergencies in stations with a crime rate above this average." +}, { + "id": "3291", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of emergency incidents recorded in 2021 and 2022?", + "sql_context": "CREATE TABLE emergency_incidents (id INT, year INT, type VARCHAR(255)); INSERT INTO emergency_incidents (id, year, type) VALUES (1, 2021, \u0027Medical Emergency\u0027), (2, 2022, \u0027Fire\u0027);", + "sql": "SELECT SUM(year) FROM (SELECT year FROM emergency_incidents WHERE year IN (2021, 2022)) AS subquery;", + "sql_explanation": "The SQL query uses a subquery to select the years 2021 and 2021 from the emergency_incidents table, then sums up the years to get the total number of emergency incidents in those years." +}, { + "id": "85", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of climate mitigation projects per year in South Asia from 2010 to 2020?", + "sql_context": "CREATE TABLE south_asia_projects (project_id INT, project_name TEXT, location TEXT, project_type TEXT, start_date DATE, end_date DATE);", + "sql": "SELECT AVG(projects_per_year) FROM (SELECT YEAR(start_date) AS project_year, COUNT(project_id) AS projects_per_year FROM south_asia_projects WHERE location LIKE \u0027%South Asia%\u0027 AND project_type \u003d \u0027climate_mitigation\u0027 AND start_date BETWEEN \u00272010-01-01\u0027 AND \u00272020-12-31\u0027 GROUP BY YEAR(start_date)) AS subquery;", + "sql_explanation": "The SQL query calculates the average number of climate mitigation projects per year in South Asia from 2010 to 2020 by grouping the \u0027south_asia_projects\u0027 table by \u0027start_date\u0027 and counting the number of projects (\u0027projects_per_year\u0027) for each year from 2010 to 2020 where \u0027location\u0027 contains \u0027South Asia\u0027 and \u0027project_type\u0027 is \u0027climate_mitigation\u0027. The result is then averaged using the AVG function on \u0027projects_per_year\u0027 from the subquery." +}, { + "id": "428", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the climate finance providers that have provided funding for climate communication initiatives in Oceania.", + "sql_context": "CREATE TABLE climate_finance (id INT, provider VARCHAR(100), initiative VARCHAR(100), amount FLOAT, year INT); INSERT INTO climate_finance (id, provider, initiative, amount, year) VALUES (1, \u0027World Bank\u0027, \u0027Climate Communication\u0027, 10000000, 2015), (2, \u0027UNDP\u0027, \u0027Climate Adaptation\u0027, 15000000, 2016);", + "sql": "SELECT DISTINCT provider FROM climate_finance WHERE initiative \u003d \u0027Climate Communication\u0027 AND EXISTS (SELECT 1 FROM climate_finance_location WHERE climate_finance.id \u003d climate_finance_location.finance_id AND location \u003d \u0027Oceania\u0027);", + "sql_explanation": "This query selects distinct providers from the \u0027climate_finance\u0027 table where the \u0027initiative\u0027 is \u0027Climate Communication\u0027 and there exists a record in the \u0027climate_finance_location\u0027 table with a matching \u0027id\u0027 and \u0027location\u0027 of \u0027Oceania\u0027." +}, { + "id": "1231", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which organizations have not reported any climate adaptation activities in Africa since 2010?", + "sql_context": "CREATE TABLE adaptation_activities (org VARCHAR(50), year INT, continent VARCHAR(50), activity VARCHAR(50)); INSERT INTO adaptation_activities VALUES (\u0027OrgA\u0027, 2010, \u0027Africa\u0027, \u0027ActivityA\u0027);", + "sql": "SELECT DISTINCT org FROM adaptation_activities WHERE org NOT IN (SELECT org FROM adaptation_activities WHERE year \u003e\u003d 2010 AND continent \u003d \u0027Africa\u0027 AND activity !\u003d \u0027N/A\u0027)", + "sql_explanation": "Identify the organizations that have reported climate adaptation activities in Africa since 2010, and then exclude those organizations to find the ones that have not reported any climate adaptation activities." +}, { + "id": "2351", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all items with a price above the median for sustainable items", + "sql_context": "CREATE TABLE inventory (id INT, item_name VARCHAR(20), price DECIMAL(5,2), is_sustainable BOOLEAN); INSERT INTO inventory (id, item_name, price, is_sustainable) VALUES (1, \u0027t-shirt\u0027, 20.99, false), (2, \u0027blouse\u0027, 45.50, true), (3, \u0027jeans\u0027, 39.99, true);", + "sql": "SELECT * FROM inventory WHERE is_sustainable \u003d true AND price \u003e (SELECT AVG(price) FROM inventory WHERE is_sustainable \u003d true);", + "sql_explanation": "This SQL query first calculates the average price of sustainable items using a subquery. It then filters the inventory table to only include sustainable items with a price above that average." +}, { + "id": "2450", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many customers of each gender and size are there, displayed as a percentage of the total number of customers?", + "sql_context": "CREATE TABLE Customers (id INT, gender VARCHAR(10), age INT, size VARCHAR(20)); INSERT INTO Customers (id, gender, age, size) VALUES (1, \u0027Female\u0027, 25, \u0027S\u0027), (2, \u0027Male\u0027, 35, \u0027L\u0027), (3, \u0027Female\u0027, 45, \u0027XL\u0027), (4, \u0027Male\u0027, 55, \u0027XXL\u0027), (5, \u0027Non-binary\u0027, 30, \u0027M\u0027), (6, \u0027Female\u0027, 33, \u0027M\u0027), (7, \u0027Male\u0027, 40, \u0027S\u0027), (8, \u0027Female\u0027, 50, \u0027L\u0027);", + "sql": "SELECT gender, size, COUNT(*) * 100.0 / (SELECT COUNT(*) FROM Customers) AS percentage FROM Customers GROUP BY gender, size;", + "sql_explanation": "Calculate the percentage of customers for each combination of gender and size by dividing the count of customers for each combination by the total number of customers." +}, { + "id": "809", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of cases won by attorneys who have a last name starting with the letter \u0027S\u0027?", + "sql_context": "CREATE TABLE attorneys (attorney_id INT, name VARCHAR(50), last_name VARCHAR(20)); INSERT INTO attorneys (attorney_id, name, last_name) VALUES (1, \u0027Jane Smith\u0027, \u0027Smith\u0027), (2, \u0027Michael Johnson\u0027, \u0027Johnson\u0027); CREATE TABLE cases (case_id INT, attorney_id INT, case_outcome VARCHAR(10)); INSERT INTO cases (case_id, attorney_id, case_outcome) VALUES (1, 1, \u0027Won\u0027), (2, 1, \u0027Won\u0027), (3, 2, \u0027Lost\u0027);", + "sql": "SELECT 100.0 * COUNT(*) / (SELECT COUNT(*) FROM cases) FROM cases JOIN attorneys ON cases.attorney_id \u003d attorneys.attorney_id WHERE attorneys.last_name LIKE \u0027S%\u0027 AND cases.case_outcome \u003d \u0027Won\u0027;", + "sql_explanation": "This query joins the attorneys and cases tables on the attorney_id column. It then filters for rows where the attorney\u0027s last name starts with the letter \u0027S\u0027 and the case outcome is \u0027Won\u0027. To calculate the percentage of cases won, it uses a subquery to count the total number of cases. Finally, it calculates the number of won cases as a percentage of the total number of cases." +}, { + "id": "1991", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average duration of cases handled by attorney Jane Smith?", + "sql_context": "CREATE TABLE Cases (CaseID INT, AttorneyID INT, CloseDate DATE); INSERT INTO Cases (CaseID, AttorneyID, CloseDate) VALUES (1, 2, \u00272022-03-01\u0027), (2, 2, \u00272022-06-15\u0027);", + "sql": "SELECT AVG(DATEDIFF(day, OpenDate, CloseDate)) FROM Cases WHERE AttorneyID \u003d (SELECT AttorneyID FROM Attorneys WHERE Name \u003d \u0027Jane Smith\u0027);", + "sql_explanation": "This query calculates the average duration of cases handled by attorney Jane Smith by finding the difference in days between the OpenDate and CloseDate for each of her cases, and then averaging those differences." +}, { + "id": "3677", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the names of all attorneys who have not opened any cases.", + "sql_context": "CREATE TABLE cases (case_id INT, attorney_id INT, case_open_date DATE); CREATE TABLE attorney (attorney_id INT, attorney_name VARCHAR(30));", + "sql": "SELECT attorney_name FROM attorney WHERE attorney_id NOT IN (SELECT attorney_id FROM cases);", + "sql_explanation": "This query selects the \u0027attorney_name\u0027 column for all attorneys whose \u0027attorney_id\u0027 does not exist in the \u0027cases\u0027 table. This represents attorneys who have not opened any cases." +}, { + "id": "600", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average depth of all marine protected areas in the Pacific region, grouped by country?", + "sql_context": "CREATE TABLE marine_protected_areas (id INT, name VARCHAR(255), depth FLOAT, region VARCHAR(255)); INSERT INTO marine_protected_areas (id, name, depth, region) VALUES (1, \u0027Galapagos Islands\u0027, 2000, \u0027Pacific\u0027); INSERT INTO marine_protected_areas (id, name, depth, region) VALUES (2, \u0027Palau National Marine Sanctuary\u0027, 5000, \u0027Pacific\u0027);", + "sql": "SELECT region, country, AVG(depth) as avg_depth FROM (SELECT region, SUBSTRING(name, 1, (INSTR(name, \u0027 \u0027) - 1)) as country, depth FROM marine_protected_areas WHERE region \u003d \u0027Pacific\u0027) GROUP BY region, country;", + "sql_explanation": "This SQL query first extracts the country name from the name of each marine protected area in the Pacific region, then calculates the average depth of these marine protected areas, grouped by country." +}, { + "id": "1701", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which countries have the longest coastlines in their respective continents?", + "sql_context": "CREATE TABLE countries (id INT, name TEXT, continent TEXT, coastline_km FLOAT); INSERT INTO countries (id, name, continent, coastline_km) VALUES (1, \u0027Canada\u0027, \u0027North America\u0027, 202089), (2, \u0027Norway\u0027, \u0027Europe\u0027, 25322), (3, \u0027South Africa\u0027, \u0027Africa\u0027, 2798), (4, \u0027Australia\u0027, \u0027Australia\u0027, 25760), (5, \u0027Chile\u0027, \u0027South America\u0027, 6435);", + "sql": "SELECT name, continent FROM countries WHERE coastline_km \u003d (SELECT MAX(coastline_km) FROM countries WHERE countries.continent \u003d countries.continent);", + "sql_explanation": "Retrieve the name and continent of countries with the maximum coastline_km value in their respective continents." +}, { + "id": "1941", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of marine species that are threatened in the Antarctic Ocean?", + "sql_context": "CREATE TABLE marine_species (name TEXT, threatened BOOLEAN, ocean TEXT); CREATE TABLE ocean_regions (name TEXT, area FLOAT);", + "sql": "SELECT COUNT(*) FROM marine_species WHERE threatened \u003d TRUE AND ocean \u003d (SELECT name FROM ocean_regions WHERE area \u003d \u0027Antarctic Ocean\u0027);", + "sql_explanation": "The query calculates the total number of marine species that are threatened in the Antarctic Ocean by using a subquery to find the name of the Antarctic Ocean from the ocean_regions table, and then using that result to filter the marine_species table to only include those in the Antarctic Ocean and those that are threatened. The count of these species is then calculated using the COUNT function." +}, { + "id": "572", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of unique users who have streamed songs from artists in both \u0027Rock\u0027 and \u0027Jazz\u0027 genres?", + "sql_context": "CREATE TABLE Streams (StreamID INT, UserID INT, ArtistID INT); INSERT INTO Streams (StreamID, UserID, ArtistID) VALUES (1, 101, 1), (2, 101, 2), (3, 102, 3), (4, 102, 4), (5, 103, 1), (6, 103, 3);", + "sql": "SELECT COUNT(DISTINCT UserID) AS UniqueUsers FROM (SELECT UserID FROM Streams JOIN Artists ON Streams.ArtistID \u003d Artists.ArtistID WHERE Genre IN (\u0027Rock\u0027, \u0027Jazz\u0027) GROUP BY UserID HAVING COUNT(DISTINCT Genre) \u003d 2);", + "sql_explanation": "Join \u0027Streams\u0027 and \u0027Artists\u0027 tables, filter rows with \u0027Rock\u0027 and \u0027Jazz\u0027 genres, group by UserID, count unique users who have streamed songs from artists in both genres." +}, { + "id": "834", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the ticket sales revenue for each artist\u0027s first concert in their career.", + "sql_context": "CREATE TABLE ticket_sales (sale_id INT, artist_name VARCHAR(100), concert_location VARCHAR(100), num_tickets INT, ticket_price INT, sale_date DATE); INSERT INTO ticket_sales (sale_id, artist_name, concert_location, num_tickets, ticket_price, sale_date) VALUES (1, \u0027Taylor Swift\u0027, \u0027Nashville, USA\u0027, 5000, 50, \u00272006-06-01\u0027); INSERT INTO ticket_sales (sale_id, artist_name, concert_location, num_tickets, ticket_price, sale_date) VALUES (2, \u0027BTS\u0027, \u0027Seoul, South Korea\u0027, 10000, 30, \u00272013-06-01\u0027);", + "sql": "SELECT artist_name, num_tickets * ticket_price as first_concert_revenue FROM ticket_sales WHERE sale_id \u003d (SELECT MIN(sale_id) FROM ticket_sales WHERE artist_name \u003d ticket_sales.artist_name);", + "sql_explanation": "This query determines the ticket sales revenue for each artist\u0027s first concert in their career. It does this by performing a subquery to find the sale_id of each artist\u0027s first concert and then joining this back to the ticket_sales table to calculate the revenue for each concert." +}, { + "id": "1388", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 3 streaming songs by female artists in 2021.", + "sql_context": "CREATE TABLE songs (id INT, artist_id INT, title VARCHAR(255), release_year INT, streams INT); INSERT INTO songs (id, artist_id, title, release_year, streams) VALUES (1, 1001, \u0027Song1\u0027, 2021, 100000); INSERT INTO songs (id, artist_id, title, release_year, streams) VALUES (2, 1002, \u0027Song2\u0027, 2020, 120000); INSERT INTO songs (id, artist_id, title, release_year, streams) VALUES (3, 1003, \u0027Song3\u0027, 2021, 150000); INSERT INTO songs (id, artist_id, title, release_year, streams) VALUES (4, 1004, \u0027Song4\u0027, 2019, 80000); INSERT INTO songs (id, artist_id, title, release_year, streams) VALUES (5, 1001, \u0027Song5\u0027, 2021, 110000);", + "sql": "SELECT title, streams FROM songs WHERE release_year \u003d 2021 AND artist_id IN (SELECT artist_id FROM artists WHERE gender \u003d \u0027female\u0027) ORDER BY streams DESC LIMIT 3;", + "sql_explanation": "This SQL query finds the top 3 streaming songs by female artists in 2021 by selecting the title and streams columns from the songs table where the release year is 2021 and the artist\u0027s gender is female, and then ordering the results by the streams column in descending order and limiting the results to the top 3." +}, { + "id": "1615", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the median age of country artists who have sold more than 1000 tickets?", + "sql_context": "CREATE TABLE Artists (ArtistID INT PRIMARY KEY, ArtistName VARCHAR(100), Age INT, Genre VARCHAR(50), TicketsSold INT); INSERT INTO Artists (ArtistID, ArtistName, Age, Genre, TicketsSold) VALUES (1, \u0027Artist A\u0027, 35, \u0027Country\u0027, 3000), (2, \u0027Artist B\u0027, 45, \u0027Jazz\u0027, 4000), (3, \u0027Artist C\u0027, 28, \u0027Pop\u0027, 5000), (4, \u0027Artist D\u0027, 50, \u0027Country\u0027, 2500), (5, \u0027Artist E\u0027, 42, \u0027Country\u0027, 1500), (6, \u0027Artist F\u0027, 48, \u0027Jazz\u0027, 6000);", + "sql": "SELECT AVG(Age) FROM (SELECT ArtistName, Age FROM Artists WHERE Genre \u003d \u0027Country\u0027 AND TicketsSold \u003e 1000 ORDER BY Age) AS Subquery ORDER BY Age LIMIT 1;", + "sql_explanation": "This query calculates the median age of country artists who have sold more than 1000 tickets by selecting the average age from a subquery that orders the rows in the \u0027Artists\u0027 table where the \u0027Genre\u0027 column is equal to \u0027Country\u0027 and the \u0027TicketsSold\u0027 column is greater than 1000 by the \u0027Age\u0027 column. The subquery is then ordered by the \u0027Age\u0027 column and the top row is selected, which will be the median age." +}, { + "id": "4955", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all concerts that have sold out.", + "sql_context": "CREATE TABLE concerts (id INT, country VARCHAR(255), city VARCHAR(255), artist_name VARCHAR(255), tier VARCHAR(255), price DECIMAL(10,2), num_tickets INT, num_sold INT); CREATE VIEW sold_out_concerts AS SELECT id FROM concerts WHERE num_sold \u003d num_tickets;", + "sql": "DELETE FROM concerts WHERE id IN (SELECT id FROM sold_out_concerts);", + "sql_explanation": "This query deletes all concerts that have sold out by first creating a view of sold out concerts, then deleting all concerts where the concert ID is in the sold out concerts view." +}, { + "id": "692", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the names of vessels and their maximum cargo capacity that have visited ports in Asia in 2019.", + "sql_context": "CREATE TABLE Vessels (id INT, name VARCHAR(255), capacity INT, last_port VARCHAR(255), last_visited DATETIME); INSERT INTO Vessels (id, name, capacity, last_port, last_visited) VALUES (1, \u0027Sea Titan\u0027, 15000, \u0027Hong Kong\u0027, \u00272019-03-05 14:30:00\u0027), (2, \u0027Ocean Wave\u0027, 12000, \u0027Tokyo\u0027, \u00272019-11-12 09:00:00\u0027); CREATE VIEW Ports AS SELECT DISTINCT port FROM (SELECT CASE WHEN LEFT(last_port, 2) \u003d \u0027S\u0027 THEN CONCAT(SUBSTR(last_port, 3), \u0027ia\u0027) ELSE last_port END AS port FROM Vessels);", + "sql": "SELECT name, capacity FROM Vessels V JOIN Ports P ON V.last_port \u003d P.port WHERE YEAR(last_visited) \u003d 2019 AND FIND_IN_SET(last_port, (SELECT GROUP_CONCAT(port) FROM Ports WHERE port LIKE \u0027%Asia%\u0027)) \u003e 0;", + "sql_explanation": "This SQL query lists the names of vessels and their maximum cargo capacity that have visited ports in Asia in 2019 by joining the Vessels table with a view called Ports, filtering the records to the desired year and ports located in Asia, and selecting the name and capacity of the vessels." +}, { + "id": "765", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of vessels registered per month in the Mediterranean region?", + "sql_context": "CREATE TABLE vessels (vessel_id INT, registration_date DATE, region TEXT); INSERT INTO vessels VALUES (1, \u00272020-01-15\u0027, \u0027Mediterranean\u0027), (2, \u00272020-03-07\u0027, \u0027Mediterranean\u0027), (3, \u00272019-12-28\u0027, \u0027Mediterranean\u0027), (4, \u00272020-02-04\u0027, \u0027Mediterranean\u0027), (5, \u00272019-11-10\u0027, \u0027Mediterranean\u0027), (6, \u00272020-01-02\u0027, \u0027Mediterranean\u0027);", + "sql": "SELECT AVG(vessel_count_per_month) FROM (SELECT COUNT(*) AS vessel_count_per_month FROM vessels WHERE region \u003d \u0027Mediterranean\u0027 GROUP BY YEAR(registration_date), MONTH(registration_date)) subquery;", + "sql_explanation": "First, a subquery is used to count the number of vessels registered per month in the Mediterranean region. Then, the average number of vessels registered per month is calculated using the AVG function." +}, { + "id": "2522", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of vessels owned by company \u0027PQR Marine\u0027 that have a capacity greater than 7000?", + "sql_context": "CREATE TABLE companies (id INT, name VARCHAR(20)); INSERT INTO companies (id, name) VALUES (1, \u0027STU Shipping\u0027), (2, \u0027PQR Marine\u0027), (3, \u0027VWX Navigation\u0027); CREATE TABLE vessels (id INT, capacity INT, company_id INT); INSERT INTO vessels (id, capacity, company_id) VALUES (1, 5000, 1), (2, 8000, 2), (3, 9000, 3), (4, 6000, 2), (5, 7500, 2);", + "sql": "SELECT COUNT(*) FROM vessels WHERE capacity \u003e 7000 AND company_id \u003d (SELECT id FROM companies WHERE name \u003d \u0027PQR Marine\u0027);", + "sql_explanation": "This query calculates the number of vessels owned by company \u0027PQR Marine\u0027 that have a capacity greater than 7000 by first finding the id of the company with that name and then filtering the vessels table to only include rows with that company_id and capacity greater than 7000. Finally, it counts the number of rows in the resulting table." +}, { + "id": "3506", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the weight of all cargo loaded in the port of Valparaíso, Chile?", + "sql_context": "CREATE TABLE ports (id INT PRIMARY KEY, name VARCHAR(255)); CREATE TABLE cargo (id INT PRIMARY KEY, name VARCHAR(255), description TEXT, weight FLOAT, volume FLOAT, port_id INT, vessel_id INT, FOREIGN KEY (port_id) REFERENCES ports(id), FOREIGN KEY (vessel_id) REFERENCES vessels(id));", + "sql": "SELECT SUM(weight) FROM cargo WHERE port_id \u003d (SELECT id FROM ports WHERE name \u003d \u0027Valparaíso\u0027);", + "sql_explanation": "This query calculates the total weight of all cargo loaded in the port of Valparaíso, Chile by summing the weight column in the cargo table where port_id matches the id of the port named \u0027Valparaíso\u0027." +}, { + "id": "3561", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all cargo handling records for \u0027Rotterdam\u0027 port.", + "sql_context": "CREATE TABLE ports (port_id INT, port_name VARCHAR(20)); INSERT INTO ports VALUES (1, \u0027Seattle\u0027), (2, \u0027Rotterdam\u0027); CREATE TABLE cargo (cargo_id INT, port_id INT, container_weight FLOAT, handling_date DATE); INSERT INTO cargo VALUES (1, 1, 2000.5, \u00272022-01-01\u0027), (2, 1, 3000.2, \u00272022-01-02\u0027), (3, 2, 1500.3, \u00272022-01-03\u0027);", + "sql": "SELECT * FROM cargo WHERE port_id \u003d (SELECT port_id FROM ports WHERE port_name \u003d \u0027Rotterdam\u0027);", + "sql_explanation": "The SQL query lists all cargo handling records for the \u0027Rotterdam\u0027 port by filtering the cargo table for the corresponding port_id." +}, { + "id": "4188", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all routes associated with vessels that have a license_number of a captain who is under 30 years old.", + "sql_context": "CREATE TABLE Vessel (id INT, name VARCHAR(50), type VARCHAR(50), length FLOAT); CREATE TABLE Captain (id INT, name VARCHAR(50), age INT, license_number VARCHAR(50), VesselId INT); CREATE TABLE Route (id INT, departure_port VARCHAR(50), arrival_port VARCHAR(50), distance FLOAT, VesselId INT);", + "sql": "DELETE FROM Route WHERE VesselId IN (SELECT VesselId FROM Captain WHERE age \u003c 30);", + "sql_explanation": "This query deletes all routes associated with vessels that have a license_number of a captain who is under 30 years old. It does so by first selecting the Vessel IDs of all captains with an age less than 30 and then deleting all routes with Vessel IDs present in that set." +}, { + "id": "497", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the number of autonomous vehicles in Germany and France, by manufacturer and year of production.", + "sql_context": "CREATE TABLE AutonomousVehicles (id INT, manufacturer VARCHAR(50), year INT, country VARCHAR(50)); INSERT INTO AutonomousVehicles (id, manufacturer, year, country) VALUES (1, \u0027ManufacturerA\u0027, 2018, \u0027Germany\u0027), (2, \u0027ManufacturerB\u0027, 2019, \u0027France\u0027), (3, \u0027ManufacturerC\u0027, 2020, \u0027Germany\u0027), (4, \u0027ManufacturerD\u0027, 2021, \u0027France\u0027);", + "sql": "SELECT context.manufacturer, context.country, COUNT(context.id) FROM (SELECT * FROM AutonomousVehicles WHERE AutonomousVehicles.country IN (\u0027Germany\u0027, \u0027France\u0027)) AS context GROUP BY context.manufacturer, context.country;", + "sql_explanation": "This query creates a table named \"AutonomousVehicles\" with columns \u0027id\u0027, \u0027manufacturer\u0027, \u0027year\u0027, and \u0027country\u0027. It then inserts 4 records for autonomous vehicles in Germany and France. The SQL query then lists the number of autonomous vehicles in each country by filtering the \u0027AutonomousVehicles\u0027 table to only include records where the country is Germany or France, then grouping the results by manufacturer and country." +}, { + "id": "1186", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum distance traveled by electric vehicles in China, grouped by manufacturer?", + "sql_context": "CREATE TABLE ElectricVehicles (id INT, manufacturer VARCHAR(50), distance FLOAT, country VARCHAR(50)); INSERT INTO ElectricVehicles (id, manufacturer, distance, country) VALUES (1, \u0027ManufacturerA\u0027, 250.3, \u0027China\u0027), (2, \u0027ManufacturerB\u0027, 300.5, \u0027China\u0027), (3, \u0027ManufacturerC\u0027, 350.7, \u0027China\u0027), (4, \u0027ManufacturerD\u0027, 400.8, \u0027China\u0027);", + "sql": "SELECT context.manufacturer, MAX(context.distance) FROM (SELECT * FROM ElectricVehicles WHERE ElectricVehicles.country \u003d \u0027China\u0027) AS context GROUP BY context.manufacturer;", + "sql_explanation": "This query creates a table named \"ElectricVehicles\" with columns \u0027id\u0027, \u0027manufacturer\u0027, \u0027distance\u0027, and \u0027country\u0027. It then inserts 4 records for electric vehicles in China. The SQL query then calculates the maximum distance traveled by electric vehicles in China by filtering the \u0027ElectricVehicles\u0027 table to only include records where the country is China, then grouping the results by manufacturer and calculating the maximum distance." +}, { + "id": "1439", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of electric vehicles in Berlin?", + "sql_context": "CREATE TABLE electric_vehicles (id INT, city VARCHAR(50)); CREATE TABLE vehicles (id INT, electric BOOLEAN);", + "sql": "SELECT COUNT(*) FROM electric_vehicles WHERE city \u003d \u0027Berlin\u0027 AND (SELECT COUNT(*) FROM vehicles WHERE electric \u003d TRUE) / (SELECT COUNT(*) FROM vehicles) \u003e 0.5;", + "sql_explanation": "This SQL query calculates the total number of electric vehicles in Berlin. It does this by using the COUNT function, which returns the number of rows in a table, and subqueries to determine the proportion of electric vehicles in the vehicles table. The query filters the electric_vehicles table to only include rows where the city is \u0027Berlin\u0027, and then uses a subquery to count the number of rows in the vehicles table where the electric column is true. The query then uses another subquery to count the total number of rows in the vehicles table, and divides the first subquery by the second to determine the proportion of electric vehicles. The query then filters the results to only include rows where the proportion of electric vehicles is greater than 0.5." +}, { + "id": "2089", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many cities in the \u0027transportation\u0027 schema have more than 10000 electric vehicles?", + "sql_context": "CREATE TABLE city_electric_vehicles (city_name VARCHAR(255), num_electric_vehicles INT); INSERT INTO city_electric_vehicles (city_name, num_electric_vehicles) VALUES (\u0027San Francisco\u0027, 15000), (\u0027Los Angeles\u0027, 20000), (\u0027New York\u0027, 30000);", + "sql": "SELECT COUNT(*) FROM (SELECT city_name FROM city_electric_vehicles WHERE num_electric_vehicles \u003e 10000 GROUP BY city_name) AS subquery;", + "sql_explanation": "This query calculates the number of cities with more than 10000 electric vehicles by selecting the count of the \u0027city_name\u0027 column from a subquery that groups the cities with more than 10000 electric vehicles and selects the city_name." +}, { + "id": "1424", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find communities with member count higher than the maximum in 2030", + "sql_context": "CREATE TABLE indigenous_communities (id INT PRIMARY KEY, community_name TEXT, members INT, year INT); CREATE VIEW community_members_max AS SELECT community_name, MAX(members) AS max_members FROM indigenous_communities WHERE year \u003d 2030 GROUP BY community_name;", + "sql": "SELECT community_name FROM indigenous_communities WHERE year \u003d 2030 AND members \u003e (SELECT max_members FROM community_members_max WHERE community_name \u003d \u0027Sami\u0027);", + "sql_explanation": "Find communities with more members than the maximum Sami community members in 2030 by finding community_name column values where members is greater than the max_members column value in community_members_max view, filtered for community_name \u0027Sami\u0027." +}, { + "id": "2006", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get the date with the lowest temperature overall", + "sql_context": "temperature_readings", + "sql": "SELECT reading_date as lowest_temp_date FROM temperature_readings WHERE temperature \u003d (SELECT MIN(temperature) FROM temperature_readings);", + "sql_explanation": "The SQL query finds the minimum temperature from the \u0027temperature_readings\u0027 table and then selects the \u0027reading_date\u0027 corresponding to this minimum temperature." +}, { + "id": "296", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of sales revenue for cruelty-free products in H1 2022?", + "sql_context": "CREATE TABLE sales (product VARCHAR(255), sale_date DATE, revenue NUMERIC(10, 2), is_cruelty_free BOOLEAN); INSERT INTO sales (product, sale_date, revenue, is_cruelty_free) VALUES (\u0027Eyeliner\u0027, \u00272022-01-01\u0027, 500, true), (\u0027Lipstick\u0027, \u00272022-01-03\u0027, 300, false), (\u0027Moisturizer\u0027, \u00272022-01-05\u0027, 700, true), (\u0027Conditioner\u0027, \u00272022-01-07\u0027, 600, false), (\u0027Eyeshadow\u0027, \u00272022-02-01\u0027, 400, true);", + "sql": "SELECT (SUM(revenue) / (SELECT SUM(revenue) FROM sales WHERE sale_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-06-30\u0027 AND is_cruelty_free \u003d true) * 100) as percentage FROM sales WHERE sale_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-06-30\u0027 AND is_cruelty_free \u003d true;", + "sql_explanation": "The query calculates the percentage of sales revenue for cruelty-free products in H1 2022 (first half of 2022) by dividing the total revenue for cruelty-free products by the total revenue for H1 2022 and multiplying by 100. The query uses a subquery to calculate the total revenue for H1 2022 for cruelty-free products. The WHERE clause filters the sales table to only include rows with sale dates in H1 2022 and with the is_cruelty_free column set to true. The SELECT clause calculates the percentage of sales revenue for cruelty-free products in H1 2022." +}, { + "id": "582", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What percentage of hair care products are free from sulfates and parabens in the EU?", + "sql_context": "CREATE TABLE hair_care_products (product_id INT, name VARCHAR(255), is_sulfate_free BOOLEAN, is_paraben_free BOOLEAN, region VARCHAR(255));", + "sql": "SELECT (COUNT(product_id) * 100.0 / (SELECT COUNT(*) FROM hair_care_products WHERE region \u003d \u0027EU\u0027)) AS percentage FROM hair_care_products WHERE is_sulfate_free \u003d TRUE AND is_paraben_free \u003d TRUE AND region \u003d \u0027EU\u0027;", + "sql_explanation": "The SQL query calculates the percentage of hair care products that are free from sulfates and parabens in the EU by using a subquery to count the total number of hair care products in the EU, counting the number of products that meet the conditions, and dividing the two values to calculate the percentage." +}, { + "id": "2035", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all products with a rating higher than the average rating for their respective category, ordered by category in ascending order.", + "sql_context": "CREATE TABLE products (product_id INT, name VARCHAR(255), category VARCHAR(255), rating FLOAT);", + "sql": "SELECT * FROM products WHERE rating \u003e (SELECT AVG(rating) FROM products p2 WHERE p2.category \u003d products.category) ORDER BY category ASC;", + "sql_explanation": "This query lists all products with a rating higher than the average rating for their respective category, sorted in ascending order by category. It does so by using a subquery to calculate the average rating for each product\u0027s category. It then filters the products table to only include rows where the rating is higher than the average for the product\u0027s category. It then uses the ORDER BY clause to sort the results by category in ascending order." +}, { + "id": "3068", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the safety rating of all records related to shampoo products with the word \u0027sulfate\u0027 in their ingredient list to 3.", + "sql_context": "CREATE TABLE cosmetics_ingredients (product VARCHAR(255), ingredient VARCHAR(255), safety_rating INTEGER); CREATE TABLE cosmetics (product VARCHAR(255), product_category VARCHAR(255)); CREATE TABLE ingredients (ingredient VARCHAR(255), chemical_class VARCHAR(255)); CREATE VIEW sulfate_shampoo AS SELECT * FROM cosmetics_ingredients JOIN cosmetics ON cosmetics_ingredients.product \u003d cosmetics.product JOIN ingredients ON cosmetics_ingredients.ingredient \u003d ingredients.ingredient WHERE ingredients.chemical_class \u003d \u0027Sulfates\u0027 AND cosmetics.product_category \u003d \u0027Shampoos\u0027;", + "sql": "UPDATE cosmetics_ingredients SET safety_rating \u003d 3 WHERE product IN (SELECT product FROM sulfate_shampoo);", + "sql_explanation": "This query updates the safety rating of all records related to shampoo products with the word \u0027sulfate\u0027 in their ingredient list to 3 by using a subquery to get the list of sulfate shampoo products. It then filters the data where the ingredient is a sulfate and the product category is \u0027Shampoos\u0027 and updates the safety rating to 3." +}, { + "id": "563", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of sustainable material orders in each country?", + "sql_context": "CREATE TABLE Deliveries (order_id INT, delivery_date DATE, material_sustainable BOOLEAN); CREATE TABLE Orders (order_id INT, order_date DATE, country VARCHAR(50));", + "sql": "SELECT O.country, (COUNT(D.order_id) * 100.0 / (SELECT COUNT(*) FROM Orders) ) as percentage FROM Deliveries D INNER JOIN Orders O ON D.order_id \u003d O.order_id WHERE D.material_sustainable \u003d TRUE GROUP BY O.country;", + "sql_explanation": "Calculate the percentage of sustainable material orders in each country by joining the Deliveries and Orders tables on order_id and filtering for sustainable materials, then dividing by the total number of orders." +}, { + "id": "1073", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which countries have the highest and lowest production costs for ethical garments?", + "sql_context": "CREATE TABLE country_costs (id INT, country VARCHAR(255), garment_type VARCHAR(255), production_cost DECIMAL(10,2));", + "sql": "SELECT country, production_cost FROM country_costs WHERE garment_type IN (SELECT garment_type FROM ethical_materials) ORDER BY production_cost ASC, production_cost DESC LIMIT 1;", + "sql_explanation": "This query finds the countries with the highest and lowest production costs for ethical garments. It does this by using the IN operator to check if the garment type is in the \u0027ethical_materials\u0027 table, and the ORDER BY clause to order the results first by ascending production cost (to find the lowest), and then by descending production cost (to find the highest). The LIMIT clause is used to limit the results to 1." +}, { + "id": "1590", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average water consumption per item for the top 3 clothing brands using the least water?", + "sql_context": "CREATE TABLE water_consumption (id INT PRIMARY KEY, brand VARCHAR(50), items_produced INT, water_consumption FLOAT); INSERT INTO water_consumption (id, brand, items_produced, water_consumption) VALUES (1, \u0027Brand A\u0027, 100000, 1000.00), (2, \u0027Brand B\u0027, 150000, 1200.00), (3, \u0027Brand C\u0027, 80000, 800.00), (4, \u0027Brand D\u0027, 120000, 1500.00), (5, \u0027Brand E\u0027, 200000, 2000.00);", + "sql": "SELECT AVG(water_consumption) FROM (SELECT brand, water_consumption FROM water_consumption ORDER BY water_consumption ASC LIMIT 3) as lowest_water_users;", + "sql_explanation": "This query calculates the average water consumption per item for the top 3 clothing brands using the least water by selecting the water_consumption column. The subquery (SELECT brand, water_consumption FROM water_consumption ORDER BY water_consumption ASC LIMIT 3) retrieves the top 3 brands with the lowest water consumption, and the outer query calculates the average water consumption using the AVG() aggregate function." +}, { + "id": "1768", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average water consumption for producing the top 2 sustainable materials?", + "sql_context": "CREATE TABLE water_consumption(material VARCHAR(20), water_consumption DECIMAL(5,2)); INSERT INTO water_consumption(material, water_consumption) VALUES(\u0027organic cotton\u0027, 20.00), (\u0027recycled polyester\u0027, 15.00), (\u0027hemp\u0027, 10.00);", + "sql": "SELECT AVG(water_consumption) FROM water_consumption WHERE material IN (SELECT material FROM water_consumption ORDER BY water_consumption LIMIT 2);", + "sql_explanation": "This query calculates the average water consumption for producing the top 2 sustainable materials (\u0027organic cotton\u0027 and \u0027recycled polyester\u0027) with the highest water consumption in the water_consumption table by using a subquery and the IN operator." +}, { + "id": "1936", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of sales by ethical material?", + "sql_context": "CREATE TABLE SalesByMaterial (SaleID INT, Material VARCHAR(50), Sales DECIMAL(5,2)); INSERT INTO SalesByMaterial (SaleID, Material, Sales) VALUES (1, \u0027Organic Cotton\u0027, 1200.50), (2, \u0027Hemp\u0027, 752.20), (3, \u0027Recycled Polyester\u0027, 986.60), (4, \u0027Tencel\u0027, 310.10);", + "sql": "SELECT Material, ROUND(SUM(Sales) / (SELECT SUM(Sales) FROM SalesByMaterial) * 100, 2) AS Percentage FROM SalesByMaterial GROUP BY Material;", + "sql_explanation": "The SQL query calculates the percentage of sales by ethical material by dividing the total sales for each material by the total sales from the SalesByMaterial table. The resulting percentage is then rounded to two decimal places using the ROUND function." +}, { + "id": "2119", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which sustainable materials have a higher CO2 emissions than cotton in the \u0027sustainable_materials\u0027 table?", + "sql_context": "CREATE TABLE sustainable_materials (material_id INT, material TEXT, co2_emissions FLOAT);", + "sql": "SELECT * FROM sustainable_materials WHERE co2_emissions \u003e (SELECT co2_emissions FROM sustainable_materials WHERE material \u003d \u0027cotton\u0027);", + "sql_explanation": "1. Select all columns from the \u0027sustainable_materials\u0027 table where the CO2 emissions are greater than the CO2 emissions for cotton." +}, { + "id": "2376", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the annual sales figures for garments made with organic cotton?", + "sql_context": "CREATE TABLE Sales (saleID INT, garmentID INT, year INT, revenue DECIMAL(5,2)); INSERT INTO Sales (saleID, garmentID, year, revenue) VALUES (1, 1, 2020, 25000.00), (2, 2, 2020, 30000.00), (3, 1, 2019, 22000.00);", + "sql": "SELECT SUM(revenue) FROM Sales WHERE garmentID IN (SELECT garmentID FROM GarmentProduction WHERE material \u003d \u0027Organic Cotton\u0027);", + "sql_explanation": "This query calculates the total annual sales figures for garments made with organic cotton by summing the revenue for garments made with organic cotton in the \u0027Sales\u0027 table. The subquery selects the garmentIDs of garments made with organic cotton from the \u0027GarmentProduction\u0027 table." +}, { + "id": "667", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 3 games by the number of players who have played them.", + "sql_context": "CREATE TABLE Games (GameID int, GameName varchar(50), PlayersCount int); INSERT INTO Games (GameID, GameName, PlayersCount) VALUES (1, \u0027GameA\u0027, 500), (2, \u0027GameB\u0027, 300), (3, \u0027GameC\u0027, 700), (4, \u0027GameD\u0027, 400);", + "sql": "SELECT g.GameName, g.PlayersCount FROM Games g JOIN (SELECT GameID, MAX(PlayersCount) as MaxPlayersCount FROM Games GROUP BY GameID LIMIT 3) as top3 ON g.GameID \u003d top3.GameID ORDER BY g.PlayersCount DESC;", + "sql_explanation": "This SQL query identifies the top 3 games by the number of players who have played them by first calculating the maximum number of players for each game using a subquery and grouping by GameID. Then, it joins the Games table with the subquery using the GameID and orders the results by the PlayersCount in descending order." +}, { + "id": "1003", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new records in the \u0027player_achievements\u0027 table for a player named \u0027Jordan\u0027 with the achievement \u0027Master of Games\u0027", + "sql_context": "CREATE TABLE player_achievements (achievement_id INT, player_id INT, achievement VARCHAR(100), date DATE);", + "sql": "INSERT INTO player_achievements (achievement_id, player_id, achievement, date) VALUES (1, (SELECT player_id FROM player_stats WHERE name \u003d \u0027Jordan\u0027), \u0027Master of Games\u0027, CURDATE());", + "sql_explanation": "This SQL query inserts new records in the \u0027player_achievements\u0027 table for a player named \u0027Jordan\u0027 with the achievement \u0027Master of Games\u0027. It uses the INSERT INTO statement, specifies the table name, the columns to insert, and the values to insert in those columns. The player_id is selected from the \u0027player_stats\u0027 table where the name is \u0027Jordan\u0027. The current date is used for the date column." +}, { + "id": "1476", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of players who have played \u0027Fortnite\u0027 but not \u0027CS:GO\u0027.", + "sql_context": "CREATE TABLE Players (PlayerID INT, Name VARCHAR(50)); INSERT INTO Players (PlayerID, Name) VALUES (1, \u0027John Doe\u0027); INSERT INTO Players (PlayerID, Name) VALUES (2, \u0027Jane Smith\u0027); CREATE TABLE Fortnite_Players (PlayerID INT); INSERT INTO Fortnite_Players (PlayerID) VALUES (1); INSERT INTO Fortnite_Players (PlayerID) VALUES (3); CREATE TABLE CSGO_Players (PlayerID INT); INSERT INTO CSGO_Players (PlayerID) VALUES (2); INSERT INTO CSGO_Players (PlayerID) VALUES (3);", + "sql": "SELECT COUNT(*) FROM Players p WHERE p.PlayerID IN (SELECT f.PlayerID FROM Fortnite_Players f) AND p.PlayerID NOT IN (SELECT c.PlayerID FROM CSGO_Players c);", + "sql_explanation": "The SQL query uses a subquery to find the PlayerID of players who have played \u0027Fortnite\u0027 and another subquery to find the PlayerID of players who have not played \u0027CS:GO\u0027. The main query then counts the number of players who meet both conditions." +}, { + "id": "2422", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete players with a score below the average score in the \u0027Simulation\u0027 game category.", + "sql_context": "CREATE TABLE SimulationScores (PlayerID int, PlayerName varchar(50), Game varchar(50), Score int); INSERT INTO SimulationScores (PlayerID, PlayerName, Game, Score) VALUES (1, \u0027Player1\u0027, \u0027Game4\u0027, 1000), (2, \u0027Player2\u0027, \u0027Game4\u0027, 1200), (3, \u0027Player3\u0027, \u0027Game4\u0027, 800), (4, \u0027Player4\u0027, \u0027Game4\u0027, 1400);", + "sql": "DELETE FROM SimulationScores WHERE Game \u003d \u0027Game4\u0027 AND Score \u003c (SELECT AVG(Score) FROM SimulationScores WHERE Game \u003d \u0027Game4\u0027);", + "sql_explanation": "The SQL query deletes players with a score below the average score in the \u0027Simulation\u0027 game category by using a subquery to find the average score for the \u0027Game4\u0027 game in the SimulationScores table, and then deleting records with a score lower than that average score." +}, { + "id": "302", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the retention rate of employees who have completed diversity and inclusion training?", + "sql_context": "CREATE TABLE EmployeeTraining (EmployeeID INT, TrainingType VARCHAR(50), TrainingCompletionDate DATE, EmploymentEndDate DATE); INSERT INTO EmployeeTraining (EmployeeID, TrainingType, TrainingCompletionDate, EmploymentEndDate) VALUES (1, \u0027Diversity and Inclusion\u0027, \u00272022-01-01\u0027, \u00272023-01-01\u0027), (2, NULL, NULL, \u00272022-01-01\u0027);", + "sql": "SELECT (COUNT(*) * 100.0 / (SELECT COUNT(*) FROM EmployeeTraining WHERE TrainingType \u003d \u0027Diversity and Inclusion\u0027 AND EmploymentEndDate IS NULL)) FROM EmployeeTraining WHERE TrainingType \u003d \u0027Diversity and Inclusion\u0027 AND EmploymentEndDate IS NOT NULL;", + "sql_explanation": "This SQL query calculates the retention rate of employees who have completed diversity and inclusion training. It does this by using a subquery to get the total number of employees who have completed the training and are still employed, and then calculating the percentage of those employees who have an employment end date." +}, { + "id": "331", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average salary of data analysts in the Analytics department, grouped by race?", + "sql_context": "CREATE TABLE Employees (EmployeeID INT, FirstName VARCHAR(50), LastName VARCHAR(50), Department VARCHAR(50), Position VARCHAR(50), Salary FLOAT, Race VARCHAR(50));", + "sql": "SELECT e.Race, AVG(e.Salary) FROM Employees e INNER JOIN (SELECT EmployeeID, \u0027Data Analyst\u0027 AS Position FROM TalentAcquisition WHERE Position \u003d \u0027Data Analyst\u0027) ta ON e.EmployeeID \u003d ta.EmployeeID WHERE e.Department \u003d \u0027Analytics\u0027 GROUP BY e.Race;", + "sql_explanation": "This query joins the Employees table with a subquery of the TalentAcquisition table to get the EmployeeID and position of all data analysts. It then filters for Analytics department employees, groups the results by race, and calculates the average salary for each race." +}, { + "id": "1119", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which employees have the same job title as those in the \u0027Marketing\u0027 department but work in a different department?", + "sql_context": "CREATE TABLE Employees (Employee_ID INT, First_Name VARCHAR(50), Last_Name VARCHAR(50), Department VARCHAR(50), Job_Title VARCHAR(50)); INSERT INTO Employees (Employee_ID, First_Name, Last_Name, Department, Job_Title) VALUES (1, \u0027John\u0027, \u0027Doe\u0027, \u0027HR\u0027, \u0027Analyst\u0027), (2, \u0027Jane\u0027, \u0027Smith\u0027, \u0027Marketing\u0027, \u0027Specialist\u0027), (3, \u0027Mike\u0027, \u0027Jameson\u0027, \u0027IT\u0027, \u0027Engineer\u0027), (4, \u0027Lucy\u0027, \u0027Brown\u0027, \u0027Finance\u0027, \u0027Analyst\u0027);", + "sql": "SELECT e1.* FROM Employees e1 INNER JOIN (SELECT Job_Title FROM Employees WHERE Department \u003d \u0027Marketing\u0027) e2 ON e1.Job_Title \u003d e2.Job_Title WHERE e1.Department !\u003d \u0027Marketing\u0027", + "sql_explanation": "First, we find all job titles in the \u0027Marketing\u0027 department. Then, we join this subquery with the Employees table to find employees who have the same job title but work in a different department." +}, { + "id": "1636", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of employees who identify as LGBTQ+ in the company?", + "sql_context": "CREATE TABLE Employees (EmployeeID INT, Gender VARCHAR(10), SexualOrientation VARCHAR(20)); INSERT INTO Employees (EmployeeID, Gender, SexualOrientation) VALUES (1, \u0027Female\u0027, \u0027Heterosexual\u0027), (2, \u0027Male\u0027, \u0027Gay\u0027), (3, \u0027Non-binary\u0027, \u0027Queer\u0027), (4, \u0027Male\u0027, \u0027Bisexual\u0027), (5, \u0027Female\u0027, \u0027Heterosexual\u0027);", + "sql": "SELECT (COUNT(*) * 100.0 / (SELECT COUNT(*) FROM Employees WHERE SexualOrientation IS NOT NULL)) FROM Employees WHERE SexualOrientation LIKE \u0027%LGBTQ%\u0027;", + "sql_explanation": "This SQL query calculates the percentage of employees who identify as LGBTQ+ in the company by filtering the Employees table for rows where SexualOrientation contains the string \u0027LGBTQ\u0027, and then dividing the count of these rows by the total number of employees with a non-null SexualOrientation value. The result is multiplied by 100.0 to convert the result to a percentage." +}, { + "id": "2390", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many employees have a higher salary than the average salary in the Marketing department?", + "sql_context": "CREATE TABLE employees (employee_id INT, department VARCHAR(255), salary INT); INSERT INTO employees (employee_id, department, salary) VALUES (1, \u0027Marketing\u0027, 50000), (2, \u0027HR\u0027, 60000), (3, \u0027Marketing\u0027, 55000), (4, \u0027Finance\u0027, 70000);", + "sql": "SELECT COUNT(*) FROM employees e WHERE e.salary \u003e (SELECT AVG(e2.salary) FROM employees e2 WHERE e2.department \u003d \u0027Marketing\u0027);", + "sql_explanation": "We use a subquery to calculate the average salary in the Marketing department and then select the count of employees who have a higher salary than the calculated average salary." +}, { + "id": "2491", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of hours spent on technical skills training for employees in the \u0027Finance\u0027 department?", + "sql_context": "CREATE TABLE Training (Employee_ID INT, Training_Type VARCHAR(50), Hours_Spent DECIMAL(5,2)); INSERT INTO Training (Employee_ID, Training_Type, Hours_Spent) VALUES (3, \u0027Technical Skills\u0027, 8.00), (4, \u0027Technical Skills\u0027, 10.00), (5, \u0027Technical Skills\u0027, 6.00), (9, \u0027Technical Skills\u0027, 9.00), (10, \u0027Technical Skills\u0027, 7.00);", + "sql": "SELECT SUM(Hours_Spent) FROM Training WHERE Employee_ID IN (SELECT Employee_ID FROM Employee WHERE Department \u003d \u0027Finance\u0027);", + "sql_explanation": "Calculate the total number of hours spent on technical skills training for employees in the \u0027Finance\u0027 department by filtering the Training table by the Employee_ID column and finding the sum of the Hours_Spent column." +}, { + "id": "3236", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the employees who have a higher salary than the average salary in the Sales department.", + "sql_context": "CREATE TABLE Employees (EmployeeID INT, Salary DECIMAL(10,2), Department VARCHAR(50)); INSERT INTO Employees (EmployeeID, Salary, Department) VALUES (1, 90000.00, \u0027Sales\u0027), (2, 80000.00, \u0027Sales\u0027);", + "sql": "SELECT * FROM Employees WHERE Salary \u003e (SELECT AVG(Salary) FROM Employees WHERE Department \u003d \u0027Sales\u0027);", + "sql_explanation": "The SQL query retrieves the employees with a higher salary than the average salary in the Sales department by using a subquery to calculate the average salary and filtering the \u0027Salary\u0027 column." +}, { + "id": "24", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total reclamation cost and number of employees for mines in the South America region with more than 300 employees?", + "sql_context": "CREATE TABLE production_data (id INT PRIMARY KEY, mine_id INT, year INT, monthly_production INT);CREATE TABLE reclamation_data (id INT PRIMARY KEY, mine_id INT, year INT, reclamation_cost INT);CREATE TABLE mine_employees (id INT PRIMARY KEY, mine_id INT, employee_id INT, employment_start_date DATE, employment_end_date DATE);CREATE TABLE employee_demographics (id INT PRIMARY KEY, employee_id INT, gender VARCHAR(255), ethnicity VARCHAR(255));CREATE VIEW employee_stats AS SELECT mine_id, COUNT(employee_id) as employee_count FROM mine_employees GROUP BY mine_id;CREATE VIEW operation_duration AS SELECT mine_id, COUNT(DISTINCT year) as operation_years FROM production_data GROUP BY mine_id;", + "sql": "SELECT r.mine_id, SUM(r.reclamation_cost) as total_reclamation_cost, e.employee_count FROM reclamation_data r JOIN employee_stats e ON r.mine_id \u003d e.mine_id WHERE r.mine_id IN (SELECT mine_id FROM employee_stats WHERE employee_count \u003e 300) AND e.mine_id IN (SELECT mine_id FROM employee_stats WHERE employee_count \u003e 300) AND r.mine_id IN (SELECT mine_id FROM operation_duration WHERE operation_years \u003e 5) GROUP BY r.mine_id;", + "sql_explanation": "This query calculates the total reclamation cost and number of employees for each mine in the South America region with more than 300 employees and more than 5 years of operation." +}, { + "id": "25", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total reclamation cost and number of employees for mines in the Asia-Pacific region with more than 500 employees?", + "sql_context": "CREATE TABLE production_data (id INT PRIMARY KEY, mine_id INT, year INT, monthly_production INT);CREATE TABLE reclamation_data (id INT PRIMARY KEY, mine_id INT, year INT, reclamation_cost INT);CREATE TABLE mine_employees (id INT PRIMARY KEY, mine_id INT, employee_id INT, employment_start_date DATE, employment_end_date DATE);CREATE TABLE employee_demographics (id INT PRIMARY KEY, employee_id INT, gender VARCHAR(255), ethnicity VARCHAR(255));CREATE VIEW employee_stats AS SELECT mine_id, COUNT(employee_id) as employee_count FROM mine_employees GROUP BY mine_id;CREATE VIEW operation_duration AS SELECT mine_id, COUNT(DISTINCT year) as operation_years FROM production_data GROUP BY mine_id;", + "sql": "SELECT r.mine_id, SUM(r.reclamation_cost) as total_reclamation_cost, e.employee_count FROM reclamation_data r JOIN employee_stats e ON r.mine_id \u003d e.mine_id WHERE r.mine_id IN (SELECT mine_id FROM employee_stats WHERE employee_count \u003e 500) AND e.mine_id IN (SELECT mine_id FROM employee_stats WHERE employee_count \u003e 500) AND r.mine_id IN (SELECT mine_id FROM operation_duration WHERE operation_years \u003e 5) GROUP BY r.mine_id;", + "sql_explanation": "This query calculates the total reclamation cost and number of employees for each mine in the Asia-Pacific region with more than 500 employees and more than 5 years of operation." +}, { + "id": "137", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the annual diversity report of the workforce by job category?", + "sql_context": "CREATE TABLE Workforce (ID INT, Category VARCHAR(255), Gender VARCHAR(255), HireDate DATE); INSERT INTO Workforce (ID, Category, Gender, HireDate) VALUES (1, \u0027Mining\u0027, \u0027Male\u0027, \u00272021-01-01\u0027), (2, \u0027Mining\u0027, \u0027Female\u0027, \u00272021-02-01\u0027), (3, \u0027Mining\u0027, \u0027Non-binary\u0027, \u00272021-03-01\u0027), (4, \u0027Maintenance\u0027, \u0027Male\u0027, \u00272021-01-01\u0027), (5, \u0027Maintenance\u0027, \u0027Female\u0027, \u00272021-02-01\u0027), (6, \u0027Environment\u0027, \u0027Male\u0027, \u00272021-01-01\u0027), (7, \u0027Environment\u0027, \u0027Female\u0027, \u00272021-02-01\u0027), (8, \u0027Environment\u0027, \u0027Non-binary\u0027, \u00272021-03-01\u0027), (9, \u0027Safety\u0027, \u0027Male\u0027, \u00272021-01-01\u0027), (10, \u0027Safety\u0027, \u0027Female\u0027, \u00272021-02-01\u0027), (11, \u0027Safety\u0027, \u0027Non-binary\u0027, \u00272021-03-01\u0027);", + "sql": "SELECT Category, Gender, COUNT(*) as Number_of_Employees, PERCENTAGE FROM (SELECT Category, Gender, COUNT(*) as Number_of_Employees, COUNT(*) * 100.0 / (SELECT COUNT(*) FROM Workforce) as PERCENTAGE FROM Workforce GROUP BY Category, Gender) AS Diversity ORDER BY Category, PERCENTAGE DESC;", + "sql_explanation": "This query calculates the diversity report of the workforce by job category by using the COUNT function with the GROUP BY clause, partitioned by the Category and Gender columns. The subquery calculates the percentage of each gender in each category. The results are ordered by the Category and PERCENTAGE columns." +}, { + "id": "476", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which mining operations have a higher than average monthly CO2 emission?", + "sql_context": "CREATE TABLE co2_emissions (mine_id INT, emission_date DATE, co2_amount INT); INSERT INTO co2_emissions (mine_id, emission_date, co2_amount) VALUES (1, \u00272021-01-01\u0027, 30000), (1, \u00272021-02-01\u0027, 32000), (1, \u00272021-03-01\u0027, 35000), (2, \u00272021-01-01\u0027, 28000), (2, \u00272021-02-01\u0027, 30000), (2, \u00272021-03-01\u0027, 33000), (3, \u00272021-01-01\u0027, 25000), (3, \u00272021-02-01\u0027, 27000), (3, \u00272021-03-01\u0027, 29000); CREATE TABLE mine_info (mine_id INT, mine_name TEXT); INSERT INTO mine_info (mine_id, mine_name) VALUES (1, \u0027Golden Mine\u0027), (2, \u0027Silver Mine\u0027), (3, \u0027Bronze Mine\u0027);", + "sql": "SELECT mine_name, AVG(co2_amount) AS avg_monthly_emission FROM co2_emissions JOIN mine_info ON co2_emissions.mine_id \u003d mine_info.mine_id GROUP BY mine_id HAVING AVG(co2_amount) \u003e (SELECT AVG(co2_amount) FROM co2_emissions);", + "sql_explanation": "The SQL query first calculates the average monthly CO2 emission for each mine_id. It then joins the co2_emissions table with the mine_info table to get the mine names. The query then filters the results to only show the mining operations with an average monthly CO2 emission higher than the overall average." +}, { + "id": "882", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which mining sites have a higher than average carbon emissions score, for a specific country?", + "sql_context": "CREATE TABLE mining_sites (id INT, name VARCHAR(255), country VARCHAR(255), carbon_emissions_score INT); INSERT INTO mining_sites (id, name, country, carbon_emissions_score) VALUES (1, \u0027Site A\u0027, \u0027Canada\u0027, 75), (2, \u0027Site B\u0027, \u0027Mexico\u0027, 85), (3, \u0027Site C\u0027, \u0027Brazil\u0027, 95);", + "sql": "SELECT name, carbon_emissions_score FROM mining_sites WHERE country \u003d \u0027Canada\u0027 AND carbon_emissions_score \u003e (SELECT AVG(carbon_emissions_score) FROM mining_sites WHERE country \u003d \u0027Canada\u0027);", + "sql_explanation": "This query performs a subquery to calculate the average carbon_emissions_score for mining sites in Canada, and then filters the results in the main query to include only those sites in Canada with a carbon_emissions_score higher than the average." +}, { + "id": "4433", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "What are the water usage statistics for mining companies in Canada?", + "sql_context": "CREATE TABLE CanadianMines (Company VARCHAR(50), Location VARCHAR(10)); INSERT INTO CanadianMines (Company, Location) VALUES (\u0027MNO Inc\u0027, \u0027Canada\u0027), (\u0027PQR Ltd\u0027, \u0027Canada\u0027); CREATE TABLE WaterUsage (Company VARCHAR(50), Water_usage INT); INSERT INTO WaterUsage (Company, Water_usage) VALUES (\u0027STU Industries\u0027, 700), (\u0027VWX Mining\u0027, 800);", + "sql": "SELECT * FROM WaterUsage WHERE Company IN (SELECT Company FROM CanadianMines)", + "sql_explanation": "This query uses a subquery to first select the companies operating in Canada from the CanadianMines table, and then combines the results from the WaterUsage table to show the water usage statistics for those companies." +}, { + "id": "528", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total sales revenue for the top 2 regions?", + "sql_context": "CREATE TABLE sales_data (sales_id INTEGER, region TEXT, sales_revenue INTEGER); INSERT INTO sales_data (sales_id, region, sales_revenue) VALUES (1, \u0027North\u0027, 5000000), (2, \u0027South\u0027, 7000000), (3, \u0027East\u0027, 6000000), (4, \u0027West\u0027, 8000000), (5, \u0027Central\u0027, 9000000);", + "sql": "SELECT region, SUM(sales_revenue) FROM sales_data WHERE region IN (SELECT region FROM sales_data WHERE sales_revenue \u003d (SELECT MAX(sales_revenue) FROM sales_data) ORDER BY sales_revenue DESC LIMIT 2) GROUP BY region;", + "sql_explanation": "This query calculates the total sales revenue for the top 2 regions by filtering sales data by region and summing the sales_revenue column values for these regions." +}, { + "id": "1217", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total R\u0026D expenditure for the top 2 countries in 2022?", + "sql_context": "CREATE TABLE rd_expenditures (country VARCHAR(255), amount FLOAT, year INT); INSERT INTO rd_expenditures (country, amount, year) VALUES (\u0027USA\u0027, 60000, 2022), (\u0027Germany\u0027, 32000, 2022), (\u0027Japan\u0027, 45000, 2022), (\u0027India\u0027, 20000, 2022), (\u0027Brazil\u0027, 25000, 2022);", + "sql": "SELECT SUM(amount) as total_expenditure FROM (SELECT country, SUM(amount) as amount FROM rd_expenditures WHERE year \u003d 2022 GROUP BY country ORDER BY amount DESC LIMIT 2);", + "sql_explanation": "The SQL query calculates the total R\u0026D expenditure for the top 2 countries in 2022 by grouping the records based on the country and summing up the amount. It then orders the result set in descending order of the total expenditure and limits the result set to the top 2 records. Finally, it calculates the total expenditure for these top 2 countries." +}, { + "id": "1554", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many clinical trials have been conducted in \u0027CountryA\u0027 but not in \u0027CountryB\u0027?", + "sql_context": "CREATE TABLE clinical_trials (trial_name TEXT, country TEXT); INSERT INTO clinical_trials (trial_name, country) VALUES (\u0027Trial1\u0027, \u0027CountryA\u0027), (\u0027Trial2\u0027, \u0027CountryB\u0027), (\u0027Trial3\u0027, \u0027CountryA\u0027), (\u0027Trial4\u0027, \u0027CountryC\u0027);", + "sql": "SELECT COUNT(*) FROM clinical_trials WHERE country \u003d \u0027CountryA\u0027 AND trial_name NOT IN (SELECT trial_name FROM clinical_trials WHERE country \u003d \u0027CountryB\u0027);", + "sql_explanation": "This query counts the number of clinical trials conducted in \u0027CountryA\u0027 but not in \u0027CountryB\u0027 by selecting all rows with \u0027country\u0027 \u003d \u0027CountryA\u0027 and excluding those that also appear in the subquery with \u0027country\u0027 \u003d \u0027CountryB\u0027." +}, { + "id": "2787", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the average revenue per drug in 2021?", + "sql_context": "CREATE TABLE drugs (drug_id INT, drug_name VARCHAR(100), revenue FLOAT, year INT); INSERT INTO drugs (drug_id, drug_name, revenue, year) VALUES (1, \u0027DrugA\u0027, 1500000, 2021), (2, \u0027DrugB\u0027, 2000000, 2021), (3, \u0027DrugC\u0027, 1200000, 2021);", + "sql": "SELECT AVG(revenue) FROM drugs WHERE year \u003d 2021 AND drug_name IN (SELECT drug_name FROM drugs WHERE year \u003d 2021);", + "sql_explanation": "The SQL query calculates the average revenue per drug in 2021 by first selecting all drug names from the table for the year 2021 and then finding the average revenue of those drugs." +}, { + "id": "256", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total water consumption for the 10 largest cities in the United States?", + "sql_context": "CREATE TABLE City_Water_Usage (ID INT, City VARCHAR(50), State VARCHAR(20), Usage FLOAT);", + "sql": "SELECT City, SUM(Usage) FROM (SELECT City, Usage FROM City_Water_Usage WHERE City IN (\u0027New York\u0027, \u0027Los Angeles\u0027, \u0027Chicago\u0027, \u0027Houston\u0027, \u0027Phoenix\u0027, \u0027Philadelphia\u0027, \u0027San Antonio\u0027, \u0027San Diego\u0027, \u0027Dallas\u0027, \u0027San Jose\u0027) ORDER BY Usage DESC LIMIT 10) t GROUP BY City;", + "sql_explanation": "This query calculates the total water consumption for the 10 largest cities in the United States by filtering the City_Water_Usage table for rows where the City column is one of the 10 largest cities and then grouping the result by city and computing the sum of the Usage column." +}, { + "id": "381", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which conservation initiatives were implemented in regions with increasing contaminant levels in 2019?", + "sql_context": "CREATE TABLE water_quality (region VARCHAR(255), year INT, contaminant_level INT); INSERT INTO water_quality (region, year, contaminant_level) VALUES (\u0027North\u0027, 2018, 10), (\u0027North\u0027, 2019, 12), (\u0027North\u0027, 2020, 15), (\u0027South\u0027, 2018, 15), (\u0027South\u0027, 2019, 18), (\u0027South\u0027, 2020, 20); CREATE TABLE conservation_initiatives (region VARCHAR(255), year INT, initiative VARCHAR(255)); INSERT INTO conservation_initiatives (region, year, initiative) VALUES (\u0027North\u0027, 2018, \u0027Rainwater harvesting\u0027), (\u0027North\u0027, 2019, \u0027Greywater reuse\u0027), (\u0027North\u0027, 2020, \u0027Smart toilets\u0027), (\u0027South\u0027, 2018, \u0027Permeable pavements\u0027), (\u0027South\u0027, 2019, \u0027Smart irrigation\u0027), (\u0027South\u0027, 2020, \u0027Green roofs\u0027);", + "sql": "SELECT c.initiative FROM conservation_initiatives c JOIN water_quality w ON c.region \u003d w.region WHERE c.year \u003d w.year AND w.contaminant_level \u003e (SELECT contaminant_level FROM water_quality WHERE region \u003d w.region AND year \u003d w.year - 1);", + "sql_explanation": "Join the conservation_initiatives and water_quality tables on the region and year columns, filter rows with a higher contaminant_level than the previous year, then select the initiative." +}, { + "id": "447", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average daily water consumption per capita in India for the past 6 months?", + "sql_context": "CREATE TABLE water_consumption (country VARCHAR(255), consumption FLOAT, date DATE); INSERT INTO water_consumption (country, consumption, date) VALUES (\u0027India\u0027, 15, \u00272022-01-01\u0027); INSERT INTO water_consumption (country, consumption, date) VALUES (\u0027India\u0027, 16, \u00272022-01-02\u0027);", + "sql": "SELECT AVG(consumption) FROM (SELECT consumption, DATE_TRUNC(\u0027day\u0027, date) AS day FROM water_consumption WHERE country \u003d \u0027India\u0027 AND date \u003e\u003d \u00272021-07-01\u0027 AND date \u003c \u00272022-01-01\u0027 GROUP BY day, consumption ORDER BY day) subquery;", + "sql_explanation": "Calculate the average daily water consumption per capita in India for the past 6 months. Group the water_consumption table by day and consumption, order by day, and then calculate the average consumption in the subquery." +}, { + "id": "635", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the states with increasing water consumption in the last 6 months.", + "sql_context": "CREATE TABLE monthly_usage (state TEXT, month DATE, consumption INTEGER); INSERT INTO monthly_usage (state, month, consumption) VALUES (\u0027California\u0027, \u00272022-01-01\u0027, 1200000), (\u0027California\u0027, \u00272022-02-01\u0027, 1300000), (\u0027California\u0027, \u00272022-03-01\u0027, 1400000), (\u0027California\u0027, \u00272022-04-01\u0027, 1500000), (\u0027Texas\u0027, \u00272022-01-01\u0027, 1800000), (\u0027Texas\u0027, \u00272022-02-01\u0027, 1900000), (\u0027Texas\u0027, \u00272022-03-01\u0027, 2000000), (\u0027Texas\u0027, \u00272022-04-01\u0027, 2100000), (\u0027Florida\u0027, \u00272022-01-01\u0027, 1500000), (\u0027Florida\u0027, \u00272022-02-01\u0027, 1600000), (\u0027Florida\u0027, \u00272022-03-01\u0027, 1550000), (\u0027Florida\u0027, \u00272022-04-01\u0027, 1650000);", + "sql": "SELECT state FROM monthly_usage WHERE consumption \u003e (SELECT consumption FROM monthly_usage WHERE state \u003d monthly_usage.state AND month \u003d DATE_SUB(month, INTERVAL 1 MONTH)) GROUP BY state HAVING COUNT(*) \u003d 6;", + "sql_explanation": "The query filters the \u0027monthly_usage\u0027 table for rows where the \u0027consumption\u0027 value is greater than the \u0027consumption\u0027 value of the same state in the previous month. It then groups the results by \u0027state\u0027 and filters for groups with 6 rows (the full date range), implying the state had increasing water consumption in the last 6 months." +}, { + "id": "864", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total water usage for all customers in the top 5 most populous counties in California?", + "sql_context": "CREATE TABLE customers (customer_id INT, county VARCHAR(50), water_usage FLOAT); INSERT INTO customers (customer_id, county, water_usage) VALUES (1, \u0027Los_Angeles\u0027, 5000), (2, \u0027San_Diego\u0027, 4000), (3, \u0027Orange\u0027, 6000), (4, \u0027Riverside\u0027, 7000), (5, \u0027San_Bernardino\u0027, 8000), (6, \u0027Ventura\u0027, 9000), (7, \u0027Santa_Barbara\u0027, 10000), (8, \u0027San_Luis_Obispo\u0027, 11000), (9, \u0027Monterey\u0027, 12000), (10, \u0027Alameda\u0027, 13000); CREATE TABLE counties (county VARCHAR(50), PRIMARY KEY (county)); INSERT INTO counties (county) VALUES (\u0027Los_Angeles\u0027), (\u0027San_Diego\u0027), (\u0027Orange\u0027), (\u0027Riverside\u0027), (\u0027San_Bernardino\u0027), (\u0027Ventura\u0027), (\u0027Santa_Barbara\u0027), (\u0027San_Luis_Obispo\u0027), (\u0027Monterey\u0027), (\u0027Alameda\u0027);", + "sql": "SELECT SUM(customers.water_usage) FROM customers JOIN (SELECT county FROM counties GROUP BY county ORDER BY COUNT(*) DESC LIMIT 5) AS top_counties ON customers.county \u003d top_counties.county;", + "sql_explanation": "This query calculates the total water usage for all customers in the top 5 most populous counties in California. It does this by using a subquery to select the top 5 counties with the highest population (assuming population is represented by the number of rows in the counties table). It then joins the customers table with the subquery on the county column and calculates the sum of the water_usage column." +}, { + "id": "1239", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total water consumption for each neighborhood in Mexico City in the month with the highest consumption?", + "sql_context": "CREATE TABLE mexico_city_water_consumption (id INT, date DATE, neighborhood VARCHAR(20), water_consumption FLOAT); INSERT INTO mexico_city_water_consumption (id, date, neighborhood, water_consumption) VALUES (1, \u00272021-01-01\u0027, \u0027Polanco\u0027, 600.0), (2, \u00272021-01-02\u0027, \u0027Coyoacan\u0027, 700.0);", + "sql": "SELECT neighborhood, SUM(water_consumption) FROM mexico_city_water_consumption WHERE date \u003d (SELECT MAX(date) FROM mexico_city_water_consumption) GROUP BY neighborhood;", + "sql_explanation": "Finds the month with the highest water consumption across all neighborhoods in Mexico City and calculates the total water consumption for each neighborhood in that month." +}, { + "id": "2769", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the water usage for customers in \u0027City E\u0027?", + "sql_context": "CREATE TABLE Water_Meters (id INT, customer_id INT, meter_reading FLOAT, read_date DATE); INSERT INTO Water_Meters (id, customer_id, meter_reading, read_date) VALUES (1, 2001, 80, \u00272021-01-01\u0027), (2, 2002, 90, \u00272021-01-01\u0027), (3, 2003, 70, \u00272021-01-01\u0027);", + "sql": "SELECT SUM(meter_reading) FROM Water_Meters WHERE customer_id IN (SELECT id FROM Customers WHERE city \u003d \u0027City E\u0027);", + "sql_explanation": "Sum of the meter_reading column for customer_id that exists in Customers table with city \u0027City E\u0027." +}, { + "id": "2866", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete the wastewater_treatment record with the lowest treatment_cost value.", + "sql_context": "CREATE TABLE wastewater_treatment (treatment_id INT, treatment_cost FLOAT, treatment_date DATE); INSERT INTO wastewater_treatment (treatment_id, treatment_cost, treatment_date) VALUES (1, 50.2, \u00272022-01-01\u0027), (2, 60.3, \u00272022-01-02\u0027), (3, 70.4, \u00272022-01-03\u0027);", + "sql": "DELETE FROM wastewater_treatment WHERE treatment_cost \u003d (SELECT MIN(treatment_cost) FROM wastewater_treatment);", + "sql_explanation": "This query deletes the record with the lowest treatment_cost value in the wastewater_treatment table by using a subquery to find the minimum treatment_cost and then deleting the record with that treatment_cost value." +}, { + "id": "3730", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "Find the water usage data with the highest usage amount in the water_usage table", + "sql_context": "CREATE TABLE water_usage ( date DATE, usage_category VARCHAR(20), region VARCHAR(20), usage_amount INT ); INSERT INTO water_usage (date, usage_category, region, usage_amount) VALUES ( \u00272022-07-01\u0027, \u0027Residential\u0027, \u0027Northeast\u0027, 15000), (\u00272022-07-02\u0027, \u0027Industrial\u0027, \u0027Midwest\u0027, 200000), (\u00272022-07-03\u0027, \u0027Agricultural\u0027, \u0027West\u0027, 800000);", + "sql": "SELECT * FROM water_usage WHERE usage_amount \u003d (SELECT MAX(usage_amount) FROM water_usage);", + "sql_explanation": "This query finds the water usage data with the highest usage amount in the water_usage table. The subquery calculates the maximum usage_amount value, and the outer query filters the records based on this condition using the WHERE clause. The SELECT statement retrieves all columns for these records." +}, { + "id": "153", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average donation amount by new members in Q4 2022?", + "sql_context": "CREATE TABLE Members (MemberID INT, JoinDate DATE, Region VARCHAR(50)); INSERT INTO Members (MemberID, JoinDate, Region) VALUES (1, \u00272022-10-01\u0027, \u0027Northeast\u0027), (2, \u00272022-11-14\u0027, \u0027Southeast\u0027), (3, \u00272022-12-03\u0027, \u0027Southwest\u0027); CREATE TABLE Donations (DonationID INT, MemberID INT, DonationDate DATE, Amount DECIMAL(10,2)); INSERT INTO Donations (DonationID, MemberID, DonationDate, Amount) VALUES (1, 1, \u00272022-10-05\u0027, 50.00), (2, 2, \u00272022-11-15\u0027, 100.00), (3, 3, \u00272022-12-07\u0027, 25.00);", + "sql": "SELECT AVG(Amount) FROM Donations INNER JOIN Members ON Donations.MemberID \u003d Members.MemberID WHERE YEAR(DonationDate) \u003d 2022 AND Members.MemberID NOT IN (SELECT Members.MemberID FROM Members GROUP BY Members.MemberID HAVING COUNT(Members.MemberID) \u003c 2) AND QUARTER(DonationDate) \u003d 4;", + "sql_explanation": "Joins the Donations and Members tables, then calculates the average donation amount in Q4 2022 by new members." +}, { + "id": "216", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the percentage of total funding received by each program type in the past year", + "sql_context": "CREATE TABLE art_programs (id INT, program_type VARCHAR(20), start_date DATE, end_date DATE); CREATE TABLE funding_received (id INT, program_id INT, amount INT);", + "sql": "SELECT p.program_type, 100.0 * SUM(r.amount) / (SELECT SUM(amount) FROM funding_received) AS percentage FROM art_programs p INNER JOIN funding_received r ON p.id \u003d r.program_id WHERE p.start_date \u003c\u003d \u00272022-12-31\u0027 AND p.end_date \u003e\u003d \u00272022-01-01\u0027 GROUP BY p.program_type;", + "sql_explanation": "This query calculates the percentage of total funding received by each program type in the past year. It uses the program_type, start_date, and end_date columns from the art_programs table, and the amount column from the funding_received table. The query filters for records where the start_date is within the past year and the end_date is within the past year. The query then groups the results by program_type and calculates the percentage of the total funding received for each group." +}, { + "id": "1375", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many attendees were from California for the \u0027Dance\u0027 program?", + "sql_context": "CREATE TABLE programs (program_id INT, program_name VARCHAR(255)); INSERT INTO programs (program_id, program_name) VALUES (1, \u0027Artistic Expression\u0027), (2, \u0027Dance\u0027); CREATE TABLE attendee_demographics (attendee_id INT, program_id INT, state VARCHAR(2)); INSERT INTO attendee_demographics (attendee_id, program_id, state) VALUES (1, 1, \u0027NY\u0027), (2, 1, \u0027CA\u0027), (3, 1, \u0027NY\u0027), (4, 2, \u0027CA\u0027), (5, 2, \u0027CA\u0027), (6, 2, \u0027TX\u0027);", + "sql": "SELECT COUNT(*) as num_ca_attendees FROM attendee_demographics WHERE state \u003d \u0027CA\u0027 AND program_id \u003d (SELECT program_id FROM programs WHERE program_name \u003d \u0027Dance\u0027);", + "sql_explanation": "This query calculates the number of attendees from California for the \u0027Dance\u0027 program by counting the number of rows in the \u0027attendee_demographics\u0027 table where the \u0027state\u0027 is \u0027CA\u0027 and the \u0027program_id\u0027 matches the \u0027program_id\u0027 for the \u0027Dance\u0027 program." +}, { + "id": "1930", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total number of dance workshops attended by participants from Japan and South Korea?", + "sql_context": "CREATE TABLE DanceWorkshops (id INT, participant_name VARCHAR(50), country VARCHAR(50), workshop_date DATE); INSERT INTO DanceWorkshops (id, participant_name, country, workshop_date) VALUES (1, \u0027Hana\u0027, \u0027Japan\u0027, \u00272022-09-01\u0027), (2, \u0027Bomi\u0027, \u0027South Korea\u0027, \u00272022-09-05\u0027), (3, \u0027Kenta\u0027, \u0027Japan\u0027, \u00272022-09-03\u0027), (4, \u0027Minji\u0027, \u0027South Korea\u0027, \u00272022-09-07\u0027);", + "sql": "SELECT SUM(total) FROM (SELECT COUNT(*) AS total FROM DanceWorkshops WHERE country IN (\u0027Japan\u0027, \u0027South Korea\u0027) GROUP BY country) AS subquery;", + "sql_explanation": "This SQL query first groups the records by country and counts the number of workshops attended by participants from each country (Japan and South Korea). Then, it sums the counts to find the total number of workshops attended by participants from both countries." +}, { + "id": "1960", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of unique funding sources supporting visual arts programs and music events.", + "sql_context": "CREATE TABLE programs (id INT, type VARCHAR(20)); INSERT INTO programs (id, type) VALUES (1, \u0027Painting\u0027), (2, \u0027Sculpture\u0027), (3, \u0027Music\u0027); CREATE TABLE funding (id INT, program_id INT, source VARCHAR(25)); INSERT INTO funding (id, program_id, source) VALUES (1, 1, \u0027Grant 1\u0027), (2, 1, \u0027Grant 2\u0027), (3, 2, \u0027Donation\u0027), (4, 3, \u0027Sponsorship\u0027), (5, 3, \u0027Crowdfunding\u0027);", + "sql": "SELECT COUNT(DISTINCT f.source) FROM funding f WHERE f.program_id IN (SELECT p.id FROM programs p WHERE p.type IN (\u0027Visual Arts\u0027, \u0027Music\u0027));", + "sql_explanation": "First, select all sources from the funding table where the program ID is in the subquery that returns IDs of visual arts programs and music events. Then, count the number of unique sources." +}, { + "id": "2050", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add a new record into the \"programs\" table for a program named \"Community Art Workshop\" with 15 participants", + "sql_context": "CREATE TABLE programs (program_id INT PRIMARY KEY, name VARCHAR(100), participants INT);", + "sql": "INSERT INTO programs (program_id, name, participants) VALUES ((SELECT MAX(program_id) FROM programs) + 1, \u0027Community Art Workshop\u0027, 15);", + "sql_explanation": "This query finds the maximum program_id from the programs table, adds 1 to create a new id, and then inserts a new record with the given values for name and participants." +}, { + "id": "4398", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which events have the same number of attendees as the average number of attendees for all events?", + "sql_context": "CREATE TABLE events (id INT, name VARCHAR(255), attendees INT); INSERT INTO events (id, name, attendees) VALUES (1, \u0027Concert\u0027, 500), (2, \u0027Play\u0027, 300), (3, \u0027Exhibit\u0027, 200);", + "sql": "SELECT name FROM events WHERE attendees \u003d (SELECT AVG(attendees) FROM events);", + "sql_explanation": "The SQL query uses a subquery to find the average number of attendees for all events, and then selects the names of the events with the same number of attendees." +}, { + "id": "145", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of military personnel, by rank, and the percentage of total personnel each rank represents, for the past year?", + "sql_context": "CREATE TABLE military_personnel(id INT, rank VARCHAR(255), status VARCHAR(255), date DATE);", + "sql": "SELECT rank, COUNT(*) as count, ROUND(100 * COUNT(*) / (SELECT COUNT(*) FROM military_personnel WHERE date \u003e DATE_SUB(NOW(), INTERVAL 1 YEAR) AND status \u003d \u0027active\u0027), 2) as percent FROM military_personnel WHERE date \u003e DATE_SUB(NOW(), INTERVAL 1 YEAR) AND status \u003d \u0027active\u0027 GROUP BY rank;", + "sql_explanation": "This query groups the military personnel by rank, filters for the past year and active personnel, and returns the count of personnel and the percentage of total personnel each rank represents." +}, { + "id": "1786", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of defense contracts signed by a single company in the United States?", + "sql_context": "CREATE TABLE defense_contracts (dc_id INT, dc_company VARCHAR(50), dc_country VARCHAR(50)); INSERT INTO defense_contracts (dc_id, dc_company, dc_country) VALUES (1, \u0027Company A\u0027, \u0027United States\u0027), (2, \u0027Company B\u0027, \u0027United States\u0027), (3, \u0027Company C\u0027, \u0027Canada\u0027);", + "sql": "SELECT MAX(dc_count) FROM (SELECT COUNT(*) AS dc_count FROM defense_contracts WHERE dc_country \u003d \u0027United States\u0027 GROUP BY dc_company) AS subquery;", + "sql_explanation": "This query calculates the maximum number of defense contracts (MAX(dc_count)) signed by a single company in the United States. It does this by first grouping the defense_contracts table by company and counting the number of contracts for each company. It then calculates the maximum of these counts using a subquery." +}, { + "id": "2689", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the vendors that supplied equipment not present in high-value contracts?", + "sql_context": "CREATE TABLE Procurement (id INT, vendor VARCHAR(255), item VARCHAR(255), date DATE); INSERT INTO Procurement (id, vendor, item, date) VALUES (1, \u0027XYZ Corp\u0027, \u0027Helicopter\u0027, \u00272022-04-01\u0027), (2, \u0027LMN Inc\u0027, \u0027Tank\u0027, \u00272022-05-01\u0027);", + "sql": "SELECT vendor FROM Procurement WHERE item NOT IN (SELECT item FROM Contracts WHERE value \u003e 100000) AND item \u003d \u0027Tank\u0027;", + "sql_explanation": "This query selects the vendor from the Procurement table who supplied equipment (Tank) not present in high-value contracts (value \u003e 100000)." +}, { + "id": "162", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert records for new members who joined in March 2022 with no workouts yet into the \u0027Workouts\u0027 table", + "sql_context": "CREATE TABLE Members (MemberID INT, MemberName VARCHAR(50), JoinDate DATETIME);", + "sql": "INSERT INTO Workouts (WorkoutID, MemberID, Duration, MembershipType) SELECT NULL, m.MemberID, 0, \u0027Standard\u0027 FROM (SELECT MemberID FROM Members WHERE MONTH(JoinDate) \u003d 3 AND YEAR(JoinDate) \u003d 2022 LIMIT 3) m WHERE NOT EXISTS (SELECT 1 FROM Workouts w WHERE w.MemberID \u003d m.MemberID);", + "sql_explanation": "1. Subquery identifies new members who joined in March 2022 and do not have any records in the \u0027Workouts\u0027 table. 2. New records are inserted into the \u0027Workouts\u0027 table with WorkoutID, MemberID, Duration, and MembershipType for those members." +}, { + "id": "165", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert records for new members who joined in April 2022 with no workouts yet into the \u0027Workouts\u0027 table", + "sql_context": "CREATE TABLE Members (MemberID INT, MemberName VARCHAR(50), JoinDate DATETIME);", + "sql": "INSERT INTO Workouts (WorkoutID, MemberID, Duration, MembershipType) SELECT NULL, m.MemberID, 0, \u0027Premium\u0027 FROM (SELECT MemberID FROM Members WHERE MONTH(JoinDate) \u003d 4 AND YEAR(JoinDate) \u003d 2022 LIMIT 4) m WHERE NOT EXISTS (SELECT 1 FROM Workouts w WHERE w.MemberID \u003d m.MemberID);", + "sql_explanation": "1. Subquery identifies new members who joined in April 2022 and do not have any records in the \u0027Workouts\u0027 table. 2. New records are inserted into the \u0027Workouts\u0027 table with WorkoutID, MemberID, Duration, and MembershipType for those members." +}, { + "id": "312", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum heart rate recorded for each user in the past month, and how many times was this maximum heart rate achieved?", + "sql_context": "CREATE TABLE HeartRate (UserID INT, HeartRate INT, Date DATE);", + "sql": "SELECT UserID, MAX(HeartRate), COUNT(*) FROM HeartRate WHERE Date \u003e\u003d DATEADD(month, -1, GETDATE()) GROUP BY UserID HAVING HeartRate \u003d (SELECT MAX(HeartRate) FROM HeartRate WHERE UserID \u003d HeartRate.UserID AND Date \u003e\u003d DATEADD(month, -1, GETDATE()));", + "sql_explanation": "The SQL query calculates the maximum heart rate recorded for each user in the past month and the number of times this maximum heart rate was achieved. It groups the data by UserID and filters the results using the HAVING clause to only include records where the heart rate is the maximum for that user in the past month." +}, { + "id": "1497", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Add a new record for a member with id 27, name \u0027Fatima Ahmed\u0027 and email \u0027fatimaahmed@example.com\u0027 into the members table if no such member already exists", + "sql_context": "CREATE TABLE members (id INT, name VARCHAR(50), email VARCHAR(50));", + "sql": "INSERT INTO members (id, name, email) SELECT 27, \u0027Fatima Ahmed\u0027, \u0027fatimaahmed@example.com\u0027 FROM dual WHERE NOT EXISTS (SELECT * FROM members WHERE id \u003d 27);", + "sql_explanation": "This query inserts a new record into the members table with the specified id, name, and email values if no such record already exists." +}, { + "id": "2379", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the number of users who achieved over 10,000 steps per day on average in 2021.", + "sql_context": "CREATE TABLE DailySteps (user_id INT, steps INT, activity_date DATE); INSERT INTO DailySteps (user_id, steps, activity_date) VALUES (1, 12000, \u00272021-01-01\u0027), (2, 8000, \u00272021-01-02\u0027), (3, 15000, \u00272021-12-31\u0027);", + "sql": "SELECT COUNT(*) FROM (SELECT user_id, AVG(steps) avg_steps FROM DailySteps GROUP BY user_id) subquery WHERE avg_steps \u003e 10000;", + "sql_explanation": "The SQL query calculates the average steps per day for each user and then filters the subquery result set to count the number of users with an average step count greater than 10,000 for the entire year of 2021." +}, { + "id": "2731", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the average duration of meditation sessions for members who also use wearable technology?", + "sql_context": "CREATE TABLE Members (ID INT, UsesWearableTech BOOLEAN); CREATE TABLE Meditation (MemberID INT, Duration INT); INSERT INTO Members (ID, UsesWearableTech) VALUES (1, true), (2, false); INSERT INTO Meditation (MemberID, Duration) VALUES (1, 30), (1, 45), (2, 60);", + "sql": "SELECT AVG(Duration) FROM Meditation WHERE MemberID IN (SELECT MemberID FROM Members WHERE UsesWearableTech \u003d true);", + "sql_explanation": "The SQL query calculates the average duration by using the AVG function on the Duration column, filtering for members who also use wearable technology." +}, { + "id": "194", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total number of publications in the top 3 departments with the highest publication count in the past year.", + "sql_context": "CREATE TABLE faculty (faculty_id INT, faculty_name VARCHAR(255), faculty_gender VARCHAR(10), faculty_department VARCHAR(255)); CREATE TABLE publications (publication_id INT, faculty_id INT, publication_title VARCHAR(255), publication_date DATE);", + "sql": "SELECT SUM(cnt) FROM (SELECT f.faculty_department, COUNT(*) AS cnt FROM faculty f INNER JOIN publications p ON f.faculty_id \u003d p.faculty_id WHERE p.publication_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) GROUP BY f.faculty_department ORDER BY cnt DESC LIMIT 3) subquery;", + "sql_explanation": "This query finds the total number of publications in the top 3 departments with the highest publication count in the past year. It uses a subquery to find the top 3 departments based on their publication count in the past year. The outer query then sums the publication count for those 3 departments." +}, { + "id": "201", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of research grants awarded to female faculty members in the College of Social Sciences in the last 5 years?", + "sql_context": "CREATE TABLE if not exists FACULTY(id INT, name TEXT, gender TEXT, department TEXT, position TEXT, salary INT);CREATE TABLE if not exists GRANTS(id INT, faculty_id INT, grant_name TEXT, grant_amount INT, grant_date DATE, college TEXT);", + "sql": "SELECT AVG(grant_count) FROM (SELECT faculty_id, COUNT(*) as grant_count FROM GRANTS WHERE college \u003d \u0027College of Social Sciences\u0027 AND grant_date \u003e\u003d DATE(\u0027now\u0027,\u0027-5 year\u0027) AND faculty_id IN (SELECT id FROM FACULTY WHERE gender \u003d \u0027female\u0027) GROUP BY faculty_id) as subquery;", + "sql_explanation": "The SQL query calculates the average number (AVG(grant_count)) of records in the subquery for the GRANTS table where college is \u0027College of Social Sciences\u0027, grant_date is within the last 5 years, and faculty_id exists in the FACULTY table with gender \u0027female\u0027." +}, { + "id": "255", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of underrepresented minority faculty members in each department in the past decade?", + "sql_context": "CREATE TABLE departments (id INT, name VARCHAR(50)); INSERT INTO departments (id, name) VALUES (1, \u0027Computer Science\u0027); INSERT INTO departments (id, name) VALUES (2, \u0027Mathematics\u0027); CREATE TABLE faculty (id INT, name VARCHAR(50), department VARCHAR(50), race VARCHAR(50)); INSERT INTO faculty (id, name, department, race) VALUES (1, \u0027Alice\u0027, \u0027Computer Science\u0027, \u0027Asian\u0027); INSERT INTO faculty (id, name, department, race) VALUES (2, \u0027Bob\u0027, \u0027Mathematics\u0027, \u0027African American\u0027);", + "sql": "SELECT d.name, (COUNT(f.id) / (SELECT COUNT(id) FROM faculty WHERE department \u003d d.name)) * 100 AS percentage FROM faculty f RIGHT JOIN departments d ON f.department \u003d d.name WHERE f.race IN (\u0027African American\u0027, \u0027Hispanic\u0027, \u0027Native American\u0027) GROUP BY d.name;", + "sql_explanation": "This query calculates the percentage of underrepresented minority faculty members in each department in the past decade. It first performs a right join between the faculty and departments tables based on the department column. Then, it filters the results by race using the WHERE clause and the IN operator. Next, it calculates the percentage of underrepresented minority faculty members using a subquery and the COUNT and GROUP BY clauses. Finally, it multiplies the result by 100 to obtain a percentage." +}, { + "id": "588", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of research grants awarded to female researchers in the Mathematics department?", + "sql_context": "CREATE TABLE grant (id INT, researcher VARCHAR(50), department VARCHAR(30), amount FLOAT, date DATE); INSERT INTO grant (id, researcher, department, amount, date) VALUES (1, \u0027Nia\u0027, \u0027Mathematics\u0027, 100000.00, \u00272021-01-01\u0027), (2, \u0027Oliver\u0027, \u0027Mathematics\u0027, 120000.00, \u00272020-07-14\u0027);", + "sql": "SELECT AVG(amount) as avg_grant_amount FROM (SELECT researcher, amount FROM grant INNER JOIN researcher ON grant.researcher \u003d researcher.name WHERE department \u003d \u0027Mathematics\u0027 AND gender \u003d \u0027Female\u0027) AS subquery;", + "sql_explanation": "This query calculates the average number of research grants awarded to female researchers in the Mathematics department by joining the \u0027grant\u0027 and \u0027researcher\u0027 tables on the \u0027researcher\u0027 column, filtering rows where the \u0027department\u0027 is \u0027Mathematics\u0027 and \u0027gender\u0027 is \u0027Female\u0027, and then calculating the average grant amount for the filtered rows." +}, { + "id": "672", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names of graduate students who have published in journals with the highest impact factors?", + "sql_context": "CREATE TABLE student (id INT, name VARCHAR(50), program VARCHAR(50)); CREATE TABLE publication (id INT, title VARCHAR(100), journal_name VARCHAR(50), impact_factor DECIMAL(3,1));", + "sql": "SELECT s.name FROM student s JOIN publication p ON s.id IN (SELECT student_id FROM grant WHERE title IN (SELECT title FROM publication WHERE impact_factor \u003d (SELECT MAX(impact_factor) FROM publication)));", + "sql_explanation": "Join the student and subquery (SELECT student_id FROM grant WHERE title IN (SELECT title FROM publication WHERE impact_factor \u003d (SELECT MAX(impact_factor) FROM publication))) on the id and student_id columns, respectively, then select the name column from the student table." +}, { + "id": "690", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many graduate students in the Mathematics department have published more than 3 papers in the past year?", + "sql_context": "CREATE TABLE graduate_students (id INT, name VARCHAR(50), department VARCHAR(50), enrollment_date DATE); CREATE TABLE publications (id INT, student_id INT, title VARCHAR(100), publication_date DATE);", + "sql": "SELECT COUNT(*) FROM graduate_students gs WHERE department \u003d \u0027Mathematics\u0027 AND (SELECT COUNT(*) FROM publications p WHERE p.student_id \u003d gs.id AND YEAR(publication_date) \u003d YEAR(CURRENT_DATE()) - 1) \u003e 3;", + "sql_explanation": "This query counts the number of graduate students in the Mathematics department who have published more than 3 papers in the past year. It does so by first selecting the graduate students from the graduate_students table who meet the specified criteria, and then using a subquery to count the number of publications each student has in the past year." +}, { + "id": "729", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the publications with at least two authors from underrepresented communities.", + "sql_context": "CREATE TABLE publication (id INT, title VARCHAR(100), num_authors INT); CREATE TABLE author (id INT, publication_id INT, community VARCHAR(50));", + "sql": "SELECT p.title FROM publication p JOIN (SELECT publication_id FROM author WHERE community IN (\u0027Community A\u0027, \u0027Community B\u0027) GROUP BY publication_id HAVING COUNT(*) \u003e\u003d 2) a ON p.id \u003d a.publication_id;", + "sql_explanation": "Join the publication and subquery a on the id and publication_id columns, respectively, then select the title column from the publication table." +}, { + "id": "1323", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total grant amount awarded to graduate students in the Physics department in 2019?", + "sql_context": "CREATE TABLE research_grants (grant_id INT, title VARCHAR(50), amount DECIMAL(10, 2), year INT, student_id INT, department VARCHAR(50)); INSERT INTO research_grants VALUES (1, \u0027Grant1\u0027, 50000, 2019, 456, \u0027Physics\u0027);", + "sql": "SELECT SUM(amount) FROM research_grants WHERE department \u003d \u0027Physics\u0027 AND year \u003d 2019 AND student_id IN (SELECT student_id FROM students WHERE program \u003d \u0027Graduate\u0027);", + "sql_explanation": "We perform a subquery to find the student IDs enrolled in the graduate program and filter the research_grants table by the Physics department, year 2019, and student IDs in the subquery result. Finally, we calculate the total grant amount for the filtered dataset using the SUM function." +}, { + "id": "1652", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of tenured faculty who are female?", + "sql_context": "CREATE TABLE faculty (id INT, tenured BOOLEAN, gender VARCHAR(6)); INSERT INTO faculty (id, tenured, gender) VALUES (1, true, \u0027Female\u0027), (2, false, \u0027Male\u0027), (3, true, \u0027Female\u0027), (4, true, \u0027Male\u0027);", + "sql": "SELECT (COUNT(*) * 100.0 / (SELECT COUNT(*) FROM faculty WHERE tenured \u003d true)) AS percentage FROM faculty WHERE gender \u003d \u0027Female\u0027 AND tenured \u003d true;", + "sql_explanation": "This SQL query calculates the percentage of tenured faculty who are female. It does this by using a subquery to count the total number of tenured faculty, then dividing the number of female tenured faculty by that total and multiplying by 100 to get a percentage." +}, { + "id": "1732", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of research grants awarded per department in the \u0027research_grants\u0027 table, excluding departments with less than 3 grants?", + "sql_context": "CREATE TABLE research_grants (id INT, department VARCHAR(255), amount FLOAT); INSERT INTO research_grants (id, department, amount) VALUES (1, \u0027Physics\u0027, 100000), (2, \u0027Physics\u0027, 200000), (3, \u0027Mathematics\u0027, 150000), (4, \u0027Chemistry\u0027, 250000), (5, \u0027Chemistry\u0027, 300000), (6, \u0027Biology\u0027, 50000);", + "sql": "SELECT AVG(grant_count) FROM (SELECT department, COUNT(*) AS grant_count FROM research_grants GROUP BY department HAVING COUNT(*) \u003e\u003d 3) AS subquery;", + "sql_explanation": "The subquery calculates the count of grants for each department and filters out departments with less than 3 grants. The outer query then calculates the average of these counts." +}, { + "id": "2386", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum amount of a research grant awarded to the Mathematics department?", + "sql_context": "CREATE TABLE department (id INT, name TEXT);CREATE TABLE research_grant (id INT, department_id INT, amount INT);", + "sql": "SELECT MAX(rg.amount) FROM research_grant rg WHERE rg.department_id IN (SELECT id FROM department WHERE name \u003d \u0027Mathematics\u0027);", + "sql_explanation": "1. Select the maximum amount from the research_grant table where the department_id is in the id column of the department table with a name of \u0027Mathematics\u0027." +}, { + "id": "2402", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of publications per year for graduate students in the Physics department?", + "sql_context": "CREATE TABLE department (name VARCHAR(255), id INT);CREATE TABLE graduate_student (name VARCHAR(255), department_id INT, publication_year INT);", + "sql": "SELECT MAX(publication_year) FROM graduate_student WHERE department_id IN (SELECT id FROM department WHERE name \u003d \u0027Physics\u0027);", + "sql_explanation": "This SQL query determines the maximum number of publications per year for graduate students in the Physics department. It filters the records in the graduate_student table to include only those with a department ID corresponding to the Physics department. Then, it calculates the maximum publication year using the MAX function." +}, { + "id": "2425", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of publications by graduate students in the year 2019?", + "sql_context": "CREATE TABLE publications (id INT, author VARCHAR(50), year INT, journal VARCHAR(50)); INSERT INTO publications (id, author, year, journal) VALUES (1, \u0027Alice\u0027, 2019, \u0027Journal of Computer Science\u0027), (2, \u0027Bob\u0027, 2018, \u0027Journal of Physics\u0027), (3, \u0027Eve\u0027, 2019, \u0027Journal of Mathematics\u0027);", + "sql": "SELECT COUNT(*) FROM publications WHERE year \u003d 2019 AND author IN (SELECT name FROM students WHERE graduate_student \u003d \u0027Yes\u0027);", + "sql_explanation": "This query counts the total number of publications by graduate students in the year 2019 by selecting all records with the specified conditions and then counting the number of rows in the result. It uses a subquery to get the names of all graduate students from the students table." +}, { + "id": "1281", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average amount of funding received by disaster relief organizations in the last 5 years?", + "sql_context": "CREATE TABLE funding (id INT, organization VARCHAR(255), year INT, amount DECIMAL(10,2));", + "sql": "SELECT AVG(amount) FROM funding WHERE organization LIKE \u0027%disaster relief%\u0027 AND year \u003e (SELECT MAX(year) FROM funding WHERE organization LIKE \u0027%disaster relief%\u0027) - 5;", + "sql_explanation": "This SQL query calculates the average amount of funding received by disaster relief organizations in the last 5 years. It does this by using the AVG() function on the \u0027amount\u0027 column, filtering the \u0027funding\u0027 table to only include rows where the \u0027organization\u0027 column contains the phrase \u0027disaster relief\u0027, and the \u0027year\u0027 column is greater than the maximum year of the previous 5 years." +}, { + "id": "3026", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of shelters in \u0027asia\u0027 region?", + "sql_context": "CREATE TABLE regions (region_id INT, region_name VARCHAR(20)); INSERT INTO regions (region_id, region_name) VALUES (1, \u0027americas\u0027), (2, \u0027europe\u0027), (3, \u0027asia\u0027), (4, \u0027africa\u0027); CREATE TABLE shelters (shelter_id INT, shelter_name VARCHAR(30), region_id INT); INSERT INTO shelters (shelter_id, shelter_name, region_id) VALUES (1, \u0027Emergency Shelter 1\u0027, 3), (2, \u0027Temporary Home\u0027, 3), (3, \u0027Relief House\u0027, 1);", + "sql": "SELECT COUNT(*) FROM shelters WHERE region_id \u003d (SELECT region_id FROM regions WHERE region_name \u003d \u0027asia\u0027);", + "sql_explanation": "The SQL query first finds the region_id for \u0027asia\u0027 using a subquery, and then counts the number of shelters in that region." +}, { + "id": "1645", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the current landfill capacity in cubic meters for the city of Lagos?", + "sql_context": "CREATE TABLE landfill_capacities (city TEXT, capacity_cubic_meters INT, date DATE); INSERT INTO landfill_capacities (city, capacity_cubic_meters, date) VALUES (\u0027Lagos\u0027, 150000, \u00272023-02-16\u0027);", + "sql": "SELECT capacity_cubic_meters FROM landfill_capacities WHERE city \u003d \u0027Lagos\u0027 AND date \u003d (SELECT MAX(date) FROM landfill_capacities WHERE city \u003d \u0027Lagos\u0027);", + "sql_explanation": "This SQL query calculates the current landfill capacity in cubic meters for the city of Lagos by selecting the capacity_cubic_meters column in the landfill_capacities table where the city is \u0027Lagos\u0027 and the date is the most recent date for Lagos." +}, { + "id": "1194", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete records of transactions with a price per gram of zero or null in Washington dispensaries since August 2021", + "sql_context": "CREATE TABLE WAProducers (ProducerID INT, Name VARCHAR(100), State VARCHAR(100)); CREATE TABLE WAFlowerPrices (PriceID INT, ProducerID INT, PricePerGram DECIMAL(5,2), PriceDate DATE);", + "sql": "DELETE FROM WAFlowerPrices WHERE PricePerGram IN (0, NULL) AND ProducerID IN (SELECT ProducerID FROM WAProducers WHERE State \u003d \u0027Washington\u0027) AND PriceDate \u003e\u003d \u00272021-08-01\u0027;", + "sql_explanation": "The SQL query deletes records of transactions with a price per gram of zero or null in Washington dispensaries since August 2021, filtering for flower prices in Washington on or after August 1, 2021 and with a price per gram of zero or null." +}, { + "id": "1650", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which strains are not compliant with regulatory limits for THC content?", + "sql_context": "CREATE TABLE StrainRegulations (StrainName TEXT, MaximumTHCContent FLOAT); INSERT INTO StrainRegulations (StrainName, MaximumTHCContent) VALUES (\u0027Purple Haze\u0027, 20.0), (\u0027Blue Dream\u0027, 18.0), (\u0027Sour Diesel\u0027, 19.0); CREATE TABLE StrainTesting (StrainName TEXT, THCContent FLOAT); INSERT INTO StrainTesting (StrainName, THCContent) VALUES (\u0027Purple Haze\u0027, 22.0), (\u0027Blue Dream\u0027, 17.5), (\u0027Sour Diesel\u0027, 21.0);", + "sql": "SELECT StrainName FROM StrainTesting WHERE THCContent \u003e (SELECT MaximumTHCContent FROM StrainRegulations WHERE StrainName \u003d StrainTesting.StrainName);", + "sql_explanation": "The SQL query uses a subquery to select all StrainNames from the StrainTesting table that have a THCContent greater than the MaximumTHCContent for that strain in the StrainRegulations table." +}, { + "id": "1993", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete sales records from \u0027Herbal Haven\u0027 dispensary before 2022.", + "sql_context": "CREATE TABLE dispensaries (dispensary_id INT, name VARCHAR(255)); INSERT INTO dispensaries (dispensary_id, name) VALUES (4, \u0027Herbal Haven\u0027); CREATE TABLE sales (sale_id INT, dispensary_id INT, product_id INT, quantity INT, sale_date DATE); INSERT INTO sales (sale_id, dispensary_id, product_id, quantity, sale_date) VALUES (20, 4, 2, 2, \u00272021-12-31\u0027);", + "sql": "DELETE FROM sales WHERE dispensary_id \u003d (SELECT dispensary_id FROM dispensaries WHERE name \u003d \u0027Herbal Haven\u0027) AND sale_date \u003c \u00272022-01-01\u0027;", + "sql_explanation": "This query deletes sales records from \u0027Herbal Haven\u0027 dispensary before 2022. It uses a subquery to find the dispensary_id for \u0027Herbal Haven\u0027, then filters the sales table for that dispensary_id and sale_date before 2022. The DELETE statement then removes the matching rows." +}, { + "id": "2534", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which projects have a higher cost than any project in \u0027WaterSupply\u0027 category?", + "sql_context": "CREATE TABLE Infrastructure (id INT, category VARCHAR(20), cost FLOAT); INSERT INTO Infrastructure (id, category, cost) VALUES (1, \u0027Transportation\u0027, 5000000), (2, \u0027WaterSupply\u0027, 3000000), (3, \u0027Transportation\u0027, 7000000);", + "sql": "SELECT * FROM Infrastructure i1 WHERE cost \u003e (SELECT MAX(cost) FROM Infrastructure i2 WHERE i2.category \u003d \u0027WaterSupply\u0027);", + "sql_explanation": "This query uses a subquery to find the maximum cost of projects in the \u0027WaterSupply\u0027 category, and then returns all projects with a higher cost from the \u0027Infrastructure\u0027 table." +}, { + "id": "4466", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names of the roads and their lengths in the road network where the average road length is greater than 10 miles?", + "sql_context": "CREATE TABLE Roads (id INT, name TEXT, length REAL); INSERT INTO Roads (id, name, length) VALUES (1, \u0027I-5\u0027, 1381.5), (2, \u0027I-80\u0027, 2899.8), (3, \u0027I-90\u0027, 3020.5);", + "sql": "SELECT name, length FROM Roads WHERE length \u003e (SELECT AVG(length) FROM Roads)", + "sql_explanation": "This query calculates the average road length using a subquery and then filters the Roads table for entries with a length greater than the average length. It projects the name and length columns for the result set." +}, { + "id": "987", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the astronauts who have been on the most space missions, and how many missions have they been on?", + "sql_context": "CREATE TABLE Astronauts (AstronautID INT, NumberOfMissions INT);", + "sql": "SELECT AstronautID, NumberOfMissions FROM (SELECT AstronautID, COUNT(*) AS NumberOfMissions FROM SpaceMissions GROUP BY AstronautID) subquery ORDER BY NumberOfMissions DESC LIMIT 1;", + "sql_explanation": "This query retrieves the astronauts who have been on the most space missions, and how many missions they have been on. It does so by first creating a subquery that selects the AstronautID and counts the number of rows in the SpaceMissions table for each astronaut, grouped by AstronautID. Then, it selects the AstronautID and NumberOfMissions columns from this subquery, ordered by NumberOfMissions in descending order, and limits the results to 1 row." +}, { + "id": "1023", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of spacecraft manufactured by \u0027Stellar Systems\u0027 that are for Mars missions?", + "sql_context": "CREATE TABLE spacecraft(id INT, name VARCHAR(50), manufacturer VARCHAR(50), destination VARCHAR(50), mass FLOAT); INSERT INTO spacecraft VALUES(1, \u0027Perseverance Rover\u0027, \u0027Stellar Systems\u0027, \u0027Mars\u0027, 1050.), (2, \u0027Spirit Rover\u0027, \u0027Stellar Systems\u0027, \u0027Mars\u0027, 174.);", + "sql": "SELECT 100.0 * COUNT(*) / (SELECT COUNT(*) FROM spacecraft WHERE manufacturer \u003d \u0027Stellar Systems\u0027) FROM spacecraft WHERE manufacturer \u003d \u0027Stellar Systems\u0027 AND destination \u003d \u0027Mars\u0027;", + "sql_explanation": "This query calculates the percentage of spacecraft manufactured by \u0027Stellar Systems\u0027 that are for Mars missions by using a subquery to get the total count of spacecraft manufactured by \u0027Stellar Systems\u0027 and then dividing the count of spacecraft for Mars by that total count." +}, { + "id": "1153", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average astronaut\u0027s age during their first mission?", + "sql_context": "CREATE TABLE Astronauts(ID INT, Name VARCHAR(50), Age INT, FirstMissionDate DATE);", + "sql": "SELECT AVG(Age) FROM Astronauts INNER JOIN (SELECT MIN(FirstMissionDate) AS FirstMission FROM Astronauts) AS Subquery ON Astronauts.FirstMissionDate \u003d Subquery.FirstMission;", + "sql_explanation": "This SQL query uses a subquery to find the earliest FirstMissionDate from the Astronauts table and then calculates the average Age of all astronauts on their first mission." +}, { + "id": "1274", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the unique spacecraft names manufactured by companies that have manufactured more than 5 spacecraft.", + "sql_context": "CREATE TABLE Spacecraft_Manufacturers_5 (Company VARCHAR(50), Spacecraft_Name VARCHAR(50), Manufacturing_Date DATE); INSERT INTO Spacecraft_Manufacturers_5 (Company, Spacecraft_Name, Manufacturing_Date) VALUES (\u0027SpaceX\u0027, \u0027Cargo Dragon\u0027, \u00272012-05-25\u0027); INSERT INTO Spacecraft_Manufacturers_5 (Company, Spacecraft_Name, Manufacturing_Date) VALUES (\u0027SpaceX\u0027, \u0027Crew Dragon\u0027, \u00272019-03-02\u0027); INSERT INTO Spacecraft_Manufacturers_5 (Company, Spacecraft_Name, Manufacturing_Date) VALUES (\u0027Boeing\u0027, \u0027Starliner\u0027, \u00272019-08-03\u0027);", + "sql": "SELECT DISTINCT Spacecraft_Name FROM Spacecraft_Manufacturers_5 WHERE Company IN (SELECT Company FROM Spacecraft_Manufacturers_5 GROUP BY Company HAVING COUNT(*) \u003e 5);", + "sql_explanation": "This SQL query retrieves the unique spacecraft names using a subquery that filters the data by companies that have manufactured more than 5 spacecraft." +}, { + "id": "1743", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the success rate of \u0027ISA\u0027 missions?", + "sql_context": "CREATE TABLE MissionStatus (id INT, mission VARCHAR(50), agency VARCHAR(50), status VARCHAR(10)); INSERT INTO MissionStatus (id, mission, agency, status) VALUES (1, \u0027Ares 1\u0027, \u0027NASA\u0027, \u0027failed\u0027), (2, \u0027Artemis 1\u0027, \u0027ISA\u0027, \u0027success\u0027), (3, \u0027Apollo 11\u0027, \u0027NASA\u0027, \u0027success\u0027), (4, \u0027Artemis 2\u0027, \u0027ISA\u0027, \u0027success\u0027);", + "sql": "SELECT COUNT(*) * 100.0 / (SELECT COUNT(*) FROM MissionStatus WHERE agency \u003d \u0027ISA\u0027) FROM MissionStatus WHERE agency \u003d \u0027ISA\u0027 AND status \u003d \u0027success\u0027;", + "sql_explanation": "This query calculates the success rate of \u0027ISA\u0027 missions by first counting the number of successful missions (where the status is \u0027success\u0027) and the total number of missions (without filtering by status) for \u0027ISA\u0027 using subqueries, and then dividing the count of successful missions by the total number of missions and multiplying by 100.0 to get a percentage." +}, { + "id": "2166", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all space missions that had an astronaut from a country in Asia.", + "sql_context": "CREATE TABLE SpaceMissions (id INT, name VARCHAR(255), launch_date DATE);CREATE TABLE Astronauts (id INT, name VARCHAR(255), country VARCHAR(10), mission_id INT);CREATE VIEW Asia_Astronauts AS SELECT * FROM Astronauts WHERE country IN (\u0027China\u0027, \u0027India\u0027, \u0027Japan\u0027, \u0027South Korea\u0027, \u0027North Korea\u0027, \u0027Indonesia\u0027, \u0027Vietnam\u0027, \u0027Malaysia\u0027, \u0027Thailand\u0027, \u0027Philippines\u0027, \u0027Pakistan\u0027, \u0027Bangladesh\u0027, \u0027Sri Lanka\u0027, \u0027Nepal\u0027, \u0027Bhutan\u0027, \u0027Afghanistan\u0027, \u0027Mongolia\u0027, \u0027Cambodia\u0027, \u0027Laos\u0027, \u0027Myanmar\u0027, \u0027Brunei\u0027, \u0027Singapore\u0027, \u0027Timor-Leste\u0027);", + "sql": "SELECT name FROM SpaceMissions sm JOIN Astronauts a ON sm.id \u003d a.mission_id WHERE a.country IN (SELECT country FROM Asia_Astronauts);", + "sql_explanation": "This query lists all space missions that had an astronaut from a country in Asia. It uses a join to combine the SpaceMissions and Astronauts tables based on the mission ID, and then filters the results based on the astronaut\u0027s country being in the Asia_Astronauts view." +}, { + "id": "2413", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify space missions launched in 2025 with the shortest duration.", + "sql_context": "CREATE TABLE space_missions(id INT, launch_year INT, duration INT, mission_name VARCHAR(50)); INSERT INTO space_missions(id, launch_year, duration, mission_name) VALUES (1, 2005, 120, \u0027Mars Explorer 1\u0027); INSERT INTO space_missions(id, launch_year, duration, mission_name) VALUES (2, 2025, 150, \u0027Astro-Travel 1\u0027);", + "sql": "SELECT mission_name FROM space_missions WHERE duration \u003d (SELECT MIN(duration) FROM space_missions WHERE launch_year \u003d 2025);", + "sql_explanation": "This query retrieves the names of space missions launched in 2025 with the shortest duration by selecting the \u0027mission_name\u0027 column value where the \u0027duration\u0027 column equals the minimum \u0027duration\u0027 value in the \u0027space_missions\u0027 table where the \u0027launch_year\u0027 column equals 2025." +}, { + "id": "2417", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum age of astronauts who have flown on SpaceX or NASA crafts?", + "sql_context": "CREATE TABLE astronauts (astronaut_id INT, name VARCHAR(100), age INT, craft VARCHAR(50)); INSERT INTO astronauts (astronaut_id, name, age, craft) VALUES (1, \u0027John\u0027, 45, \u0027Dragon\u0027), (2, \u0027Sarah\u0027, 36, \u0027Starship\u0027), (3, \u0027Mike\u0027, 50, \u0027Falcon\u0027), (4, \u0027Jane\u0027, 42, \u0027Apollo\u0027), (5, \u0027Emma\u0027, 34, \u0027Shuttle\u0027), (6, \u0027Bruce\u0027, 30, \u0027Shuttle\u0027); CREATE TABLE spacex_crafts (craft VARCHAR(50), manufacturer VARCHAR(50)); INSERT INTO spacex_crafts (craft, manufacturer) VALUES (\u0027Dragon\u0027, \u0027SpaceX\u0027), (\u0027Starship\u0027, \u0027SpaceX\u0027), (\u0027Falcon\u0027, \u0027SpaceX\u0027); CREATE TABLE nasa_crafts (craft VARCHAR(50), manufacturer VARCHAR(50)); INSERT INTO nasa_crafts (craft, manufacturer) VALUES (\u0027Apollo\u0027, \u0027NASA\u0027), (\u0027Shuttle\u0027, \u0027NASA\u0027);", + "sql": "SELECT MIN(age) FROM astronauts WHERE craft IN (SELECT craft FROM spacex_crafts) OR craft IN (SELECT craft FROM nasa_crafts);", + "sql_explanation": "This query retrieves the minimum age of astronauts who have flown on SpaceX or NASA crafts, by selecting the minimum age from the astronauts table where the craft is in the spacex_crafts or nasa_crafts table." +}, { + "id": "2915", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many space missions have had an astronaut from Japan as a leader?", + "sql_context": "CREATE TABLE Astronauts(id INT, name VARCHAR(50), nationality VARCHAR(50)); CREATE TABLE SpaceMissions(id INT, mission VARCHAR(50), leader_id INT, duration FLOAT); INSERT INTO Astronauts(id, name, nationality) VALUES (1, \u0027Tanikawa Shigeo\u0027, \u0027Japan\u0027), (2, \u0027Yamazaki Naoko\u0027, \u0027Japan\u0027), (3, \u0027Chang Diaz Franklin\u0027, \u0027Costa Rica\u0027); INSERT INTO SpaceMissions(id, mission, leader_id, duration) VALUES (1, \u0027Apollo 11\u0027, 1, 12), (2, \u0027Artemis I\u0027, 2, 15), (3, \u0027Ares III\u0027, 3, 18);", + "sql": "SELECT COUNT(*) FROM SpaceMissions WHERE leader_id IN (SELECT id FROM Astronauts WHERE nationality \u003d \u0027Japan\u0027);", + "sql_explanation": "This query counts the number of space missions that have had an astronaut from Japan as a leader by selecting the number of records in the SpaceMissions table where the leader_id is in the list of ids for astronauts with a nationality of \u0027Japan\u0027." +}, { + "id": "413", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify customers from Texas who have made more than 5 transactions in the last week.", + "sql_context": "CREATE TABLE transactions (id INT, account_id INT, transaction_date DATE); CREATE TABLE customers (id INT, name VARCHAR(100), age INT, gender VARCHAR(10), city VARCHAR(50), state VARCHAR(50));", + "sql": "SELECT c.id, c.name FROM customers c JOIN (SELECT account_id FROM transactions t WHERE t.transaction_date \u003e\u003d DATEADD(day, -7, CURRENT_DATE) GROUP BY account_id HAVING COUNT(id) \u003e 5) t ON c.id \u003d t.account_id WHERE c.state \u003d \u0027Texas\u0027;", + "sql_explanation": "This query first filters transactions from the last week, groups them by account ID, and filters records with more than 5 transactions. It then joins the result with the customers table, filtering records where the state is Texas. This returns the IDs and names of customers from Texas who have made more than 5 transactions in the last week." +}, { + "id": "598", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of transactions performed by a single customer in a day?", + "sql_context": "CREATE TABLE transactions (transaction_id INT, customer_id INT, transaction_date DATE); INSERT INTO transactions (transaction_id, customer_id, transaction_date) VALUES (1, 1, \u00272022-01-01\u0027), (2, 1, \u00272022-01-01\u0027), (3, 2, \u00272022-01-02\u0027), (4, 3, \u00272022-01-03\u0027), (5, 3, \u00272022-01-03\u0027), (6, 3, \u00272022-01-03\u0027);", + "sql": "SELECT customer_id, MAX(count_per_day) FROM (SELECT customer_id, transaction_date, COUNT(*) AS count_per_day FROM transactions GROUP BY customer_id, transaction_date) AS daily_transactions GROUP BY customer_id;", + "sql_explanation": "The SQL query first groups transactions by customer and date, then counts the number of transactions per customer per day. It then finds the maximum count for each customer." +}, { + "id": "1383", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the difference between the average transaction amount in the United Kingdom and Canada.", + "sql_context": "CREATE TABLE transactions (customer_id INT, transaction_amount DECIMAL(10,2), country VARCHAR(50)); INSERT INTO transactions (customer_id, transaction_amount, country) VALUES (1, 120.50, \u0027UK\u0027), (2, 75.30, \u0027UK\u0027), (3, 150.00, \u0027UK\u0027), (4, 50.00, \u0027UK\u0027), (5, 100.50, \u0027Canada\u0027), (6, 80.30, \u0027Canada\u0027), (7, 120.00, \u0027Canada\u0027), (8, 60.00, \u0027Canada\u0027);", + "sql": "SELECT AVG(transaction_amount) - (SELECT AVG(transaction_amount) FROM transactions WHERE country \u003d \u0027Canada\u0027) AS difference FROM transactions WHERE country \u003d \u0027UK\u0027;", + "sql_explanation": "This query calculates the difference between the average transaction amount in the United Kingdom and Canada from the transactions table. It uses a subquery to calculate the average transaction amount in Canada and subtracts it from the average transaction amount in the United Kingdom." +}, { + "id": "1434", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all transactions made by \u0027Jane Smith\u0027 between \u00272021-01-01\u0027 and \u00272021-12-31\u0027?", + "sql_context": "CREATE TABLE transactions (id INT, customer_id INT, transaction_date DATE, amount DECIMAL(10,2)); INSERT INTO transactions (id, customer_id, transaction_date, amount) VALUES (1, 1, \u00272021-02-03\u0027, 1000.00), (2, 2, \u00272021-04-05\u0027, 2000.00), (3, 2, \u00272021-11-20\u0027, 500.00);", + "sql": "SELECT * FROM transactions WHERE customer_id \u003d (SELECT id FROM customers WHERE name \u003d \u0027Jane Smith\u0027) AND transaction_date BETWEEN \u00272021-01-01\u0027 AND \u00272021-12-31\u0027;", + "sql_explanation": "This SQL query lists all transactions made by \u0027Jane Smith\u0027 between \u00272021-01-01\u0027 and \u00272021-12-31\u0027 by selecting all records from the \u0027transactions\u0027 table where the \u0027customer_id\u0027 matches the id of \u0027Jane Smith\u0027 and the \u0027transaction_date\u0027 is between the specified dates." +}, { + "id": "1540", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total amount of transactions made in \u0027Los Angeles\u0027 in the month of March.", + "sql_context": "CREATE TABLE transactions (id INT PRIMARY KEY, account_id INT, type VARCHAR(255), amount DECIMAL(10,2), date DATE, client_id INT); INSERT INTO transactions (id, account_id, type, amount, date, client_id) VALUES (1, 1, \u0027Deposit\u0027, 2000.00, \u00272021-01-01\u0027, 1001), (2, 2, \u0027Withdrawal\u0027, 1500.00, \u00272021-02-10\u0027, 1002), (3, 3, \u0027Transfer\u0027, 500.00, \u00272021-03-20\u0027, 1003), (4, 1003, \u0027Withdrawal\u0027, 1000.00, \u00272021-04-01\u0027, 1005), (5, 1002, \u0027Withdrawal\u0027, 500.00, \u00272021-05-15\u0027, 1006), (6, 5, \u0027Deposit\u0027, 1000.00, \u00272021-06-01\u0027, 1004), (7, 4, \u0027Payment\u0027, 500.00, \u00272021-06-15\u0027, 1004);", + "sql": "SELECT SUM(amount) FROM transactions WHERE date BETWEEN \u00272021-03-01\u0027 AND \u00272021-03-31\u0027 AND client_id IN (SELECT id FROM clients WHERE city \u003d \u0027Los Angeles\u0027);", + "sql_explanation": "This query calculates the total amount of transactions made in \u0027Los Angeles\u0027 in the month of March by filtering the transactions table based on date and client_id, which is obtained by filtering the clients table based on city." +}, { + "id": "2567", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the total transactions and their values for all customers from the United States and Canada?", + "sql_context": "CREATE TABLE customers (id INT, name VARCHAR(255), country VARCHAR(255)); INSERT INTO customers (id, name, country) VALUES (1, \u0027John Doe\u0027, \u0027USA\u0027), (2, \u0027Jane Smith\u0027, \u0027Canada\u0027); CREATE TABLE transactions (id INT, customer_id INT, amount DECIMAL(10,2)); INSERT INTO transactions (id, customer_id, amount) VALUES (1, 1, 100.00), (2, 1, 200.00), (3, 2, 50.00);", + "sql": "SELECT SUM(amount) FROM transactions WHERE customer_id IN (SELECT id FROM customers WHERE country IN (\u0027USA\u0027, \u0027Canada\u0027));", + "sql_explanation": "The SQL query first selects the customer IDs from the customers table where the country is either the USA or Canada. Then, it uses this result to select the sum of amounts from the transactions table where the customer ID is in the previously selected set of IDs." +}, { + "id": "2749", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many customers have made more than 5 transactions in the financial crimes table?", + "sql_context": "CREATE TABLE financial_crimes (customer_id INT, transaction_date DATE, transaction_value DECIMAL(10,2)); INSERT INTO financial_crimes (customer_id, transaction_date, transaction_value) VALUES (1, \u00272022-01-01\u0027, 5000.00), (1, \u00272022-01-02\u0027, 3000.00), (1, \u00272022-01-03\u0027, 1000.00), (2, \u00272022-01-01\u0027, 1000.00), (2, \u00272022-01-02\u0027, 2000.00), (3, \u00272022-01-01\u0027, 3000.00), (3, \u00272022-01-02\u0027, 2000.00), (3, \u00272022-01-03\u0027, 1000.00), (3, \u00272022-01-04\u0027, 500.00);", + "sql": "SELECT COUNT(*) FROM (SELECT customer_id, COUNT(*) FROM financial_crimes GROUP BY customer_id HAVING COUNT(*) \u003e 5);", + "sql_explanation": "This SQL query calculates the number of customers who have made more than 5 transactions in the financial crimes table. It does so by using a subquery to group the data by customer_id and count the number of transactions for each customer. The main query then filters the results to only include customers who have made more than 5 transactions, and calculates the total number of customers using the COUNT() function." +}, { + "id": "3358", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which products have a higher sales count than the average sales count?", + "sql_context": "CREATE TABLE product_sales (product VARCHAR(20), sales_count INT); INSERT INTO product_sales (product, sales_count) VALUES (\u0027Software\u0027, 10), (\u0027Hardware\u0027, 5), (\u0027Consulting\u0027, 15);", + "sql": "SELECT product FROM product_sales WHERE sales_count \u003e (SELECT AVG(sales_count) FROM product_sales);", + "sql_explanation": "This SQL query finds the products with a higher sales count than the average sales count. It calculates the average sales count for all products, and then selects the products with a sales count above this value." +}, { + "id": "579", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of green buildings in \u0027GreenBuilding\u0027 table, in each state, and the corresponding percentage of total green buildings?", + "sql_context": "CREATE TABLE GreenBuilding (building_id INT, state VARCHAR(50), is_green BOOLEAN);", + "sql": "SELECT state, COUNT(building_id) as total_green_buildings, (COUNT(building_id) / (SELECT COUNT(building_id) FROM GreenBuilding)) * 100 as percentage_of_total FROM GreenBuilding WHERE is_green \u003d 1 GROUP BY state;", + "sql_explanation": "The SQL query calculates the total number of green buildings in \u0027GreenBuilding\u0027 table, in each state, and the corresponding percentage of total green buildings. It uses the COUNT function to find the total number of green buildings, subquery to find the total number of buildings, and GROUP BY clause to group the data based on state. The percentage of total green buildings is calculated using the formula (total green buildings in the state / total buildings in all states) * 100." +}, { + "id": "2016", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many more solar panels have been installed in Spain compared to Portugal?", + "sql_context": "CREATE TABLE solar_panels (id INT, country VARCHAR(50), installed_date DATE); INSERT INTO solar_panels (id, country, installed_date) VALUES (1, \u0027Spain\u0027, \u00272020-01-01\u0027), (2, \u0027Spain\u0027, \u00272021-01-01\u0027), (3, \u0027Portugal\u0027, \u00272019-01-01\u0027);", + "sql": "SELECT COUNT(*) - (SELECT COUNT(*) FROM solar_panels WHERE country \u003d \u0027Portugal\u0027) as difference FROM solar_panels WHERE country \u003d \u0027Spain\u0027;", + "sql_explanation": "Calculates the number of solar panels installed in Spain and subtracts the number of solar panels installed in Portugal by using two separate subqueries, one for each country, and subtracting the count of the Portuguese subquery from the Spanish subquery." +}, { + "id": "1201", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of mobile customers using 5G in the European region?", + "sql_context": "CREATE TABLE mobile_customers (customer_id INT, network_type VARCHAR(10)); INSERT INTO mobile_customers (customer_id, network_type) VALUES (1, \u00275G\u0027), (2, \u00274G\u0027), (3, \u00275G\u0027);", + "sql": "SELECT (COUNT(*) * 100.0 / (SELECT COUNT(*) FROM mobile_customers)) FROM mobile_customers WHERE network_type \u003d \u00275G\u0027 AND country IN (\u0027Germany\u0027, \u0027France\u0027, \u0027United Kingdom\u0027);", + "sql_explanation": "Calculates the percentage of mobile customers using 5G in the European region by using a subquery to determine the total number of mobile customers, and a COUNT(*) query to determine the number of customers using 5G. The percentage is calculated by dividing the number of customers using 5G by the total number of mobile customers and multiplying the result by 100." +}, { + "id": "673", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the earliest accommodation date and the accommodation type for students with learning disabilities.", + "sql_context": "CREATE TABLE accommodation (student_id INT, accommodation_type TEXT, accommodation_date DATE); INSERT INTO accommodation (student_id, accommodation_type, accommodation_date) VALUES (1, \u0027Tutoring\u0027, \u00272022-01-01\u0027), (2, \u0027Quiet Space\u0027, \u00272022-02-01\u0027), (3, \u0027Extended Testing Time\u0027, \u00272022-03-01\u0027), (4, \u0027Tutoring\u0027, \u00272022-04-01\u0027);", + "sql": "SELECT accommodation_type, MIN(accommodation_date) as min_date FROM accommodation WHERE student_id IN (SELECT student_id FROM student WHERE disability \u003d \u0027Learning Disability\u0027) GROUP BY accommodation_type;", + "sql_explanation": "Find the earliest accommodation date and accommodation type for students with learning disabilities." +}, { + "id": "1601", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average cost of accommodations per student who received accommodations in the MobilityImpairment category in the AssistiveTechnology table?", + "sql_context": "CREATE TABLE AssistiveTechnology (studentID INT, accommodationType VARCHAR(50), cost DECIMAL(5,2));", + "sql": "SELECT AVG(cost) FROM AssistiveTechnology WHERE studentID IN (SELECT studentID FROM AssistiveTechnology WHERE accommodationType \u003d \u0027MobilityImpairment\u0027);", + "sql_explanation": "This query calculates the average cost of accommodations per student who received accommodations in the MobilityImpairment category by summing up the cost column values for each studentID that appears in the AssistiveTechnology table with an accommodationType value of \u0027MobilityImpairment\u0027 and dividing by the number of rows in the table with a matching studentID." +}, { + "id": "109", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the number of hospitals and clinics in indigenous communities, and calculate the ratio.", + "sql_context": "CREATE TABLE areas (name text, type text, community text); INSERT INTO areas VALUES (\u0027Urban\u0027, \u0027CityA\u0027, \u0027\u0027), (\u0027Suburban\u0027, \u0027CityB\u0027, \u0027\u0027), (\u0027Rural\u0027, \u0027CityC\u0027, \u0027Indigenous\u0027), (\u0027Rural\u0027, \u0027CityD\u0027, \u0027Indigenous\u0027); CREATE TABLE hospitals (name text, area_type text); INSERT INTO hospitals VALUES (\u0027Hospital1\u0027, \u0027Urban\u0027), (\u0027Hospital2\u0027, \u0027Rural\u0027), (\u0027Hospital3\u0027, \u0027Suburban\u0027); CREATE TABLE clinics (name text, area_type text); INSERT INTO clinics VALUES (\u0027Clinic1\u0027, \u0027Urban\u0027), (\u0027Clinic2\u0027, \u0027Rural\u0027), (\u0027Clinic3\u0027, \u0027Suburban\u0027);", + "sql": "SELECT (SELECT COUNT(*) FROM hospitals WHERE area_type \u003d \u0027Rural\u0027 AND communities \u003d \u0027Indigenous\u0027) / COUNT(DISTINCT areas.type) AS indigenous_hospital_ratio, (SELECT COUNT(*) FROM clinics WHERE area_type \u003d \u0027Rural\u0027 AND communities \u003d \u0027Indigenous\u0027) / COUNT(DISTINCT areas.type) AS indigenous_clinic_ratio", + "sql_explanation": "The query calculates the ratio of hospitals and clinics in indigenous communities to the total number of areas." +}, { + "id": "1082", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of citizens who are satisfied with public services in urban areas?", + "sql_context": "CREATE TABLE feedback (id INT, year INT, service TEXT, sentiment TEXT, city_type TEXT); INSERT INTO feedback (id, year, service, sentiment, city_type) VALUES (1, 2021, \u0027Healthcare\u0027, \u0027Positive\u0027, \u0027Urban\u0027), (2, 2022, \u0027Healthcare\u0027, \u0027Neutral\u0027, \u0027Urban\u0027); CREATE TABLE cities (id INT, name TEXT, population INT, type TEXT); INSERT INTO cities (id, name, population, type) VALUES (1, \u0027City A\u0027, 120000, \u0027Urban\u0027), (2, \u0027City B\u0027, 200000, \u0027Rural\u0027);", + "sql": "SELECT (COUNT(*) * 100.0 / (SELECT COUNT(*) FROM feedback WHERE city_type \u003d f.city_type)) AS percentage FROM feedback f WHERE f.sentiment \u003d \u0027Positive\u0027 AND f.city_type \u003d \u0027Urban\u0027;", + "sql_explanation": "This query calculates the percentage of citizens who are satisfied with public services in urban areas by using a subquery to count the total number of feedback entries for urban areas and then dividing the number of positive feedback entries for urban areas by the total number of feedback entries for urban areas." +}, { + "id": "2368", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average budget allocated for education in urban areas?", + "sql_context": "CREATE TABLE district (id INT, name VARCHAR(50), type VARCHAR(50)); INSERT INTO district (id, name, type) VALUES (1, \u0027City A\u0027, \u0027urban\u0027), (2, \u0027Town B\u0027, \u0027urban\u0027), (3, \u0027Village C\u0027, \u0027rural\u0027); CREATE TABLE budget (district_id INT, category VARCHAR(50), amount INT); INSERT INTO budget (district_id, category, amount) VALUES (1, \u0027education\u0027, 500000), (1, \u0027healthcare\u0027, 300000), (2, \u0027education\u0027, 350000), (2, \u0027healthcare\u0027, 400000), (3, \u0027education\u0027, 200000), (3, \u0027healthcare\u0027, 500000);", + "sql": "SELECT AVG(amount) FROM budget WHERE category \u003d \u0027education\u0027 AND district_id IN (SELECT id FROM district WHERE type \u003d \u0027urban\u0027);", + "sql_explanation": "This query calculates the average amount of the \u0027amount\u0027 column in the \u0027budget\u0027 table where the \u0027category\u0027 is \u0027education\u0027 and \u0027district_id\u0027 is in the subquery that selects the ids of districts that are urban." +}, { + "id": "1866", + "domain": "media entertainment", + "domain_description": "Audience demographics, viewership trends, content ratings, production budgets, and marketing strategies for movies, TV shows, and music.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "number of fans per million people in the world; use a multiplier of 1 million", + "sql_context": "INSERT INTO music_genres_ext (id, genre, popularity) VALUES (2, \u0027Jazz\u0027, 1500000);", + "sql": "SELECT 2 AS id, \u0027Jazz\u0027 AS genre, 1500000 AS popularity, 1500000 / (SELECT SUM(popularity) FROM music_genres_ext) * 1000000 AS fans_per_million;", + "sql_explanation": "To calculate fans_per_million, we divide the genre\u0027s popularity by the sum of all popularities in the music_genres_ext table, then multiply the result by 1000000." +}, { + "id": "373", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete the production data for Gadolinium from the Australian mine in 2019.", + "sql_context": "CREATE TABLE mine (id INT, name TEXT, location TEXT, Gadolinium_monthly_production FLOAT, timestamp TIMESTAMP); INSERT INTO mine (id, name, location, Gadolinium_monthly_production, timestamp) VALUES (1, \u0027Australian Mine\u0027, \u0027Australia\u0027, 120.5, \u00272019-03-01\u0027), (2, \u0027Californian Mine\u0027, \u0027USA\u0027, 150.3, \u00272019-03-01\u0027), (3, \u0027Brazilian Mine\u0027, \u0027Brazil\u0027, 80.0, \u00272019-03-01\u0027);", + "sql": "DELETE FROM mine WHERE name \u003d \u0027Australian Mine\u0027 AND EXTRACT(YEAR FROM timestamp) \u003d 2019 AND EXISTS (SELECT * FROM mine WHERE name \u003d \u0027Australian Mine\u0027 AND Gadolinium_monthly_production IS NOT NULL AND EXTRACT(YEAR FROM timestamp) \u003d 2019);", + "sql_explanation": "This query deletes the production data for Gadolinium from the Australian mine in 2019. It does this by filtering the \u0027mine\u0027 table for rows where the name is \u0027Australian Mine\u0027 and the year of the timestamp is 2019 and there is production data for Gadolinium. It then deletes these rows." +}, { + "id": "1002", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Count the number of rare earth elements extracted by companies from Oceania in 2022?", + "sql_context": "CREATE TABLE rare_earth_elements_2 (element TEXT); INSERT INTO rare_earth_elements_2 VALUES (\u0027Terbium\u0027), (\u0027Holmium\u0027), (\u0027Erbium\u0027); CREATE TABLE extraction_data_2 (year INT, company_name TEXT, element TEXT, quantity INT); INSERT INTO extraction_data_2 (year, company_name, element, quantity) VALUES (2022, \u0027EFG Mining\u0027, \u0027Terbium\u0027, 1200), (2022, \u0027HIJ Mining\u0027, \u0027Holmium\u0027, 900), (2022, \u0027KLM Mining\u0027, \u0027Erbium\u0027, 1800), (2022, \u0027NOP Mining\u0027, \u0027Terbium\u0027, 1500); CREATE TABLE mining_locations_2 (company_name TEXT, region TEXT); INSERT INTO mining_locations_2 (company_name, region) VALUES (\u0027EFG Mining\u0027, \u0027Oceania\u0027), (\u0027HIJ Mining\u0027, \u0027Antarctica\u0027), (\u0027KLM Mining\u0027, \u0027Oceania\u0027), (\u0027NOP Mining\u0027, \u0027Oceania\u0027);", + "sql": "SELECT COUNT(DISTINCT element) as total_elements FROM extraction_data_2 WHERE year \u003d 2022 AND company_name IN (SELECT company_name FROM mining_locations_2 WHERE region \u003d \u0027Oceania\u0027);", + "sql_explanation": "This query calculates the total number of rare earth elements extracted by companies from Oceania in 2022. It first selects all records from the extraction_data_2 table for 2022, where the company_name is present in the mining_locations_2 table with the region \u0027Oceania\u0027. It then calculates the number of distinct elements for these records, giving the total number of rare earth elements extracted by companies from Oceania in 2022." +}, { + "id": "2053", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show producers with the highest land degradation impact", + "sql_context": "CREATE TABLE mining_impact (id INT PRIMARY KEY, location VARCHAR(255), water_usage INT, air_pollution INT, land_degradation INT); CREATE VIEW environmental_impact AS SELECT location, SUM(water_usage) AS total_water_usage, SUM(air_pollution) AS total_air_pollution, SUM(land_degradation) AS total_land_degradation FROM mining_impact GROUP BY location;", + "sql": "SELECT location FROM environmental_impact WHERE total_land_degradation \u003d (SELECT MAX(total_land_degradation) FROM environmental_impact);", + "sql_explanation": "This query identifies the producers with the highest land degradation impact in the mining_impact table." +}, { + "id": "5016", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List producers with a production volume greater than 1500", + "sql_context": "CREATE TABLE producers (id INT PRIMARY KEY, name VARCHAR(255), location VARCHAR(255), production_volume INT);", + "sql": "SELECT name FROM producers WHERE production_volume \u003e (SELECT 1500);", + "sql_explanation": "This query selects the names of producers from the producers table where their production volume is greater than 1500." +}, { + "id": "28", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of male and female fans who participated in athlete wellbeing programs in the last 6 months, by age group?", + "sql_context": "CREATE TABLE wellbeing_participants (fan_id INT, gender VARCHAR(50), age INT, last_event_date DATE); INSERT INTO wellbeing_participants (fan_id, gender, age, last_event_date) VALUES (1, \u0027Male\u0027, 25, \u00272022-01-01\u0027), (2, \u0027Female\u0027, 35, \u00272022-02-01\u0027), (3, \u0027Male\u0027, 28, \u00272022-03-01\u0027), (4, \u0027Female\u0027, 45, \u00272022-04-01\u0027), (5, \u0027Male\u0027, 32, \u00272022-05-01\u0027);", + "sql": "SELECT age_group, gender, COUNT(*) * 100.0 / (SELECT COUNT(*) FROM wellbeing_participants WHERE gender \u003d age_group AND last_event_date \u003e\u003d CURDATE() - INTERVAL 6 MONTH) AS percentage FROM (SELECT CASE WHEN age \u003c 30 THEN \u002718-29\u0027 WHEN age \u003c 40 THEN \u002730-39\u0027 ELSE \u002740+\u0027 END AS age_group, gender FROM wellbeing_participants WHERE last_event_date \u003e\u003d CURDATE() - INTERVAL 6 MONTH) AS age_groups GROUP BY age_group, gender;", + "sql_explanation": "The query first filters the wellbeing participants data for the last 6 months and then calculates the percentage of male and female fans who participated in athlete wellbeing programs during this period by age group. To do this, the query uses a subquery to calculate the total number of fans who participated in wellbeing programs in the last 6 months. The outer query then calculates the percentage of fans in each age group and gender using the COUNT and CASE functions, and the result is divided by the total number of fans obtained from the subquery." +}, { + "id": "537", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many fans attended baseball games in the Central region last season?", + "sql_context": "CREATE TABLE Stadiums(id INT, name TEXT, location TEXT, sport TEXT, capacity INT); CREATE VIEW BaseballStadiums AS SELECT * FROM Stadiums WHERE sport \u003d \u0027Baseball\u0027;", + "sql": "SELECT SUM(attendance) FROM (SELECT stadium_id, COUNT(ticket_id) AS attendance FROM TicketSales WHERE season \u003d \u0027Last\u0027 AND stadium_id IN (SELECT id FROM BaseballStadiums WHERE location \u003d \u0027Central\u0027)) AS GameAttendance", + "sql_explanation": "This query first filters the Stadiums table to get only baseball stadiums in the Central region. Then, it selects the stadium IDs to find the total attendance for baseball games in the Central region during the last season from the TicketSales table." +}, { + "id": "1024", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many tickets were sold for the away games of the \u0027Bears\u0027 in the second half of the 2023 season?", + "sql_context": "CREATE TABLE TicketSales (TicketID INT, GameID INT, Team VARCHAR(20), SaleDate DATE, Quantity INT); INSERT INTO TicketSales (TicketID, GameID, Team, SaleDate, Quantity) VALUES (1, 1, \u0027Bears\u0027, \u00272023-07-01\u0027, 600);", + "sql": "SELECT SUM(Quantity) FROM TicketSales WHERE Team \u003d \u0027Bears\u0027 AND SaleDate BETWEEN \u00272023-07-01\u0027 AND \u00272023-12-31\u0027 AND GameID NOT IN (SELECT GameID FROM Game WHERE HomeTeam \u003d \u0027Bears\u0027);", + "sql_explanation": "The SQL query calculates the number of tickets sold for the away games of the Bears in the second half of the 2023 season by filtering the TicketSales table based on the Team column equal to \u0027Bears\u0027, the SaleDate between \u00272023-07-01\u0027 and \u00272023-12-31\u0027, and the GameID not in the subquery that returns the GameID of the home games of the Bears. Then it uses the SUM function to find the total number of tickets sold." +}, { + "id": "1152", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of football players in the Premier League who participated in the 2022 World Cup?", + "sql_context": "CREATE TABLE IF NOT EXISTS players (id INT, name VARCHAR(50), age INT, team VARCHAR(50), league VARCHAR(50));", + "sql": "SELECT AVG(age) FROM players WHERE team IN (SELECT name FROM teams WHERE league \u003d \u0027Premier League\u0027) AND name IN (SELECT team FROM games WHERE tournament \u003d \u00272022 World Cup\u0027);", + "sql_explanation": "This query filters for players in the Premier League who participated in the 2022 World Cup, and calculates the average age using the AVG() function." +}, { + "id": "2378", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the names of athletes who have participated in the Olympics but not in the Commonwealth Games.", + "sql_context": "CREATE TABLE athletes (id INT, name VARCHAR(50), age INT, sport VARCHAR(50), event VARCHAR(50)); INSERT INTO athletes (id, name, age, sport, event) VALUES (1, \u0027John Doe\u0027, 25, \u0027Athletics\u0027, \u0027Olympics\u0027), (2, \u0027Jane Smith\u0027, 30, \u0027Swimming\u0027, \u0027Commonwealth Games\u0027), (3, \u0027Richard Roe\u0027, 28, \u0027Athletics\u0027, \u0027Commonwealth Games\u0027), (4, \u0027Jessica Brown\u0027, 27, \u0027Athletics\u0027, \u0027Olympics\u0027), (5, \u0027Michael Green\u0027, 31, \u0027Swimming\u0027, \u0027Olympics\u0027);", + "sql": "SELECT name FROM athletes WHERE event \u003d \u0027Olympics\u0027 AND id NOT IN (SELECT id FROM athletes WHERE event \u003d \u0027Commonwealth Games\u0027);", + "sql_explanation": "First, we select the name column for the rows with the event \u0027Olympics\u0027 and apply the NOT IN clause to filter out the rows where the id is present in the subquery that selects the id column for the rows with the event \u0027Commonwealth Games\u0027. This gives us the names of athletes who have participated in the Olympics but not in the Commonwealth Games." +}, { + "id": "2467", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the total revenue for the top 3 teams by revenue in the \u0027team_performances\u0027 table.", + "sql_context": "CREATE TABLE team_performances (team VARCHAR(20), sport VARCHAR(20), games_played INT, wins INT, losses INT, revenue DECIMAL(10,2));", + "sql": "SELECT team, SUM(revenue) FROM (SELECT team, revenue FROM team_performances ORDER BY revenue DESC LIMIT 3) t GROUP BY team;", + "sql_explanation": "This query lists the total revenue for the top 3 teams by revenue in the \u0027team_performances\u0027 table. It uses a subquery to select the top 3 teams by revenue and then calculates the total revenue for each team. The GROUP BY clause groups the results by team." +}, { + "id": "1463", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the percentage of community engagement events in each city, ordered by the percentage in descending order.", + "sql_context": "CREATE TABLE community_events (event_id INT, event_name TEXT, city TEXT, year INT); INSERT INTO community_events (event_id, event_name, city, year) VALUES (1, \u0027Cultural Festival\u0027, \u0027New York\u0027, 2020), (2, \u0027Traditional Music Concert\u0027, \u0027Los Angeles\u0027, 2019);", + "sql": "SELECT city, ROUND(100.0 * COUNT(*) / (SELECT COUNT(*) FROM community_events) , 2) as percentage FROM community_events GROUP BY city ORDER BY percentage DESC;", + "sql_explanation": "The SQL query calculates the percentage of community engagement events in each city. The query uses the COUNT() function to count the number of community engagement events in each city. The subquery calculates the total number of community engagement events. The ROUND() function is used to round the percentage to two decimal places. The GROUP BY clause groups the results by city, and the ORDER BY clause orders the results in descending order based on the percentage of community engagement events." +}, { + "id": "3783", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of heritage sites and traditional art pieces?", + "sql_context": "CREATE TABLE heritage_sites_2 (id INT, type VARCHAR(50), name VARCHAR(100)); INSERT INTO heritage_sites_2 (id, type, name) VALUES (1, \u0027Historic Site\u0027, \u0027Anasazi Ruins\u0027), (2, \u0027Museum\u0027, \u0027Metropolitan Museum of Art\u0027), (3, \u0027Historic Site\u0027, \u0027Alamo\u0027); CREATE TABLE traditional_art_3 (id INT, artist VARCHAR(50), title VARCHAR(100)); INSERT INTO traditional_art_3 (id, artist, title) VALUES (1, \u0027Picasso\u0027, \u0027Guernica\u0027), (2, \u0027Dali\u0027, \u0027Persistence of Memory\u0027), (3, \u0027Picasso\u0027, \u0027Three Musicians\u0027);", + "sql": "SELECT (SELECT COUNT(*) FROM heritage_sites_2) + (SELECT COUNT(*) FROM traditional_art_3);", + "sql_explanation": "This query uses subqueries to count the number of rows in both the heritage_sites_2 and traditional_art_3 tables and then adds those counts together." +}, { + "id": "44", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average temperature and humidity for the greenhouses with the highest energy consumption in the last month.", + "sql_context": "CREATE TABLE greenhouse (id INT, name VARCHAR(255)); CREATE TABLE sensor (id INT, greenhouse_id INT, temperature INT, humidity INT, energy_consumption INT, timestamp TIMESTAMP); INSERT INTO greenhouse VALUES (1, \u0027Greenhouse A\u0027), (2, \u0027Greenhouse B\u0027); INSERT INTO sensor VALUES (1, 1, 25, 60, 1000, \u00272022-04-01 10:00:00\u0027), (2, 2, 22, 70, 1200, \u00272022-04-01 10:00:00\u0027);", + "sql": "SELECT g.name, AVG(s.temperature) as avg_temperature, AVG(s.humidity) as avg_humidity FROM greenhouse g INNER JOIN sensor s ON g.id \u003d s.greenhouse_id WHERE s.energy_consumption \u003d (SELECT MAX(energy_consumption) FROM sensor WHERE timestamp BETWEEN DATE_SUB(NOW(), INTERVAL 1 MONTH) AND NOW()) AND s.timestamp BETWEEN DATE_SUB(NOW(), INTERVAL 1 MONTH) AND NOW() GROUP BY g.name;", + "sql_explanation": "The query calculates the average temperature and humidity for the greenhouses with the highest energy consumption in the last month." +}, { + "id": "89", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the difference in average temperature between the hottest and coldest regions for each month, in a given year?", + "sql_context": "CREATE TABLE temperature_data (region VARCHAR(255), temperature INT, date DATE); INSERT INTO temperature_data (region, temperature, date) VALUES (\u0027North\u0027, 25, \u00272022-01-01\u0027), (\u0027South\u0027, 10, \u00272022-01-01\u0027), (\u0027East\u0027, 15, \u00272022-01-01\u0027), (\u0027West\u0027, 30, \u00272022-01-01\u0027);", + "sql": "SELECT hottest.region, hottest.max_temp - coldest.min_temp as temp_diff FROM (SELECT region, MAX(temperature) as max_temp FROM temperature_data GROUP BY region) hottest INNER JOIN (SELECT region, MIN(temperature) as min_temp FROM temperature_data GROUP BY region) coldest ON hottest.region \u003d coldest.region;", + "sql_explanation": "Calculate the difference in average temperature between the hottest and coldest regions for each month by finding the maximum and minimum temperature for each region, then subtracting the minimum from the maximum." +}, { + "id": "1166", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the percentage of IoT devices with firmware version 1.x.x in the \u0027Europe\u0027 region.", + "sql_context": "CREATE TABLE IoTDevices (region VARCHAR(255), device_id INT, firmware_version VARCHAR(255)); INSERT INTO IoTDevices (region, device_id, firmware_version) VALUES (\u0027Europe\u0027, 1001, \u00271.4.5\u0027), (\u0027Europe\u0027, 1002, \u00271.5.1\u0027), (\u0027Europe\u0027, 1003, \u00271.4.8\u0027), (\u0027Europe\u0027, 1004, \u00271.6.0\u0027), (\u0027Asia\u0027, 1005, \u00272.3.2\u0027), (\u0027Asia\u0027, 1006, \u00272.5.1\u0027);", + "sql": "SELECT (COUNT(*) * 100.0 / (SELECT COUNT(*) FROM IoTDevices WHERE region \u003d \u0027Europe\u0027)) AS Percentage FROM IoTDevices WHERE region \u003d \u0027Europe\u0027 AND firmware_version LIKE \u00271.%\u0027;", + "sql_explanation": "The SQL query calculates the percentage of IoT devices with firmware version 1.x.x in the Europe region by filtering the IoTDevices table and using a subquery to calculate the total number of devices in the Europe region. It then calculates the percentage based on the number of devices with a firmware version starting with 1." +}, { + "id": "1828", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all farms with a corn yield higher than the average corn yield across all farms.", + "sql_context": "CREATE TABLE crop_yield (id INT, farm_id INT, crop VARCHAR(20), yield INT, year INT); INSERT INTO crop_yield (id, farm_id, crop, yield, year) VALUES (1, 1, \u0027Corn\u0027, 120, 2018); INSERT INTO crop_yield (id, farm_id, crop, yield, year) VALUES (2, 1, \u0027Soybean\u0027, 50, 2018); INSERT INTO crop_yield (id, farm_id, crop, yield, year) VALUES (3, 2, \u0027Corn\u0027, 150, 2019);", + "sql": "SELECT farm_id FROM crop_yield WHERE crop \u003d \u0027Corn\u0027 GROUP BY farm_id HAVING AVG(yield) \u003e (SELECT AVG(yield) FROM crop_yield WHERE crop \u003d \u0027Corn\u0027);", + "sql_explanation": "Select farm_id where the average yield of corn is greater than the overall average yield of corn across all farms." +}, { + "id": "2136", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average yield of crops in the top 50% of farmland.", + "sql_context": "CREATE TABLE Farmers (id INT, name VARCHAR(50), age INT, location VARCHAR(50), acres INT); INSERT INTO Farmers (id, name, age, location, acres) VALUES (1, \u0027John Doe\u0027, 35, \u0027USA\u0027, 100); INSERT INTO Farmers (id, name, age, location, acres) VALUES (2, \u0027Jane Smith\u0027, 40, \u0027Canada\u0027, 200); CREATE TABLE Crops (id INT, farmer_id INT, crop_name VARCHAR(50), yield INT, sale_price DECIMAL(5,2)); INSERT INTO Crops (id, farmer_id, crop_name, yield, sale_price) VALUES (1, 1, \u0027Corn\u0027, 120, 2.35); INSERT INTO Crops (id, farmer_id, crop_name, yield, sale_price) VALUES (2, 2, \u0027Soybeans\u0027, 80, 1.98);", + "sql": "SELECT AVG(c.yield) as avg_yield FROM Crops c JOIN Farmers f ON c.farmer_id \u003d f.id WHERE f.acres \u003e\u003d (SELECT AVG(acres) FROM Farmers);", + "sql_explanation": "This SQL query finds the average yield of crops in the top 50% of farmland by performing a join between the Crops and Farmers tables on farmer_id. It then filters for rows where the acres value is greater than or equal to the average acres value across all farmers, and calculates the average yield using the AVG function." +}, { + "id": "637", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total fare collected from wheelchair-accessible vehicles in the last month?", + "sql_context": "CREATE TABLE vehicles (vehicle_id INT, vehicle_type VARCHAR(255)); INSERT INTO vehicles (vehicle_id, vehicle_type) VALUES (1, \u0027Wheelchair Accessible\u0027), (2, \u0027Standard\u0027); CREATE TABLE transactions (transaction_id INT, vehicle_id INT, fare_amount DECIMAL(5,2), transaction_date DATE); INSERT INTO transactions (transaction_id, vehicle_id, fare_amount, transaction_date) VALUES (1, 1, 5.00, \u00272022-03-01\u0027), (2, 2, 4.00, \u00272022-03-02\u0027), (3, 1, 5.00, \u00272022-03-03\u0027);", + "sql": "SELECT SUM(fare_amount) FROM transactions WHERE vehicle_id IN (SELECT vehicle_id FROM vehicles WHERE vehicle_type \u003d \u0027Wheelchair Accessible\u0027) AND transaction_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH);", + "sql_explanation": "Summarizes the total fare collected from wheelchair-accessible vehicles in the last month by filtering transactions table based on vehicle_id and transaction_date." +}, { + "id": "747", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the most frequently used boarding station for each route?", + "sql_context": "CREATE TABLE Stations (StationID INT, StationName VARCHAR(50), RouteID INT, IsStart VARCHAR(50)); INSERT INTO Stations (StationID, StationName, RouteID, IsStart) VALUES (1, \u0027StationA\u0027, 1, \u0027true\u0027), (2, \u0027StationB\u0027, 1, \u0027false\u0027), (3, \u0027StationC\u0027, 1, \u0027false\u0027), (4, \u0027StationD\u0027, 2, \u0027true\u0027), (5, \u0027StationE\u0027, 2, \u0027false\u0027), (6, \u0027StationF\u0027, 3, \u0027true\u0027), (7, \u0027StationG\u0027, 3, \u0027false\u0027), (8, \u0027StationH\u0027, 3, \u0027false\u0027);", + "sql": "SELECT RouteID, StationName FROM Stations S1 WHERE IsStart \u003d \u0027true\u0027 AND NOT EXISTS (SELECT * FROM Stations S2 WHERE S2.RouteID \u003d S1.RouteID AND S2.IsStart \u003d \u0027true\u0027 AND S2.StationID \u003e S1.StationID);", + "sql_explanation": "The SQL query returns the most frequently used boarding station for each route by using the NOT EXISTS clause to select the earliest start station for each route." +}, { + "id": "357", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the \u0027animal_name\u0027 and \u0027conservation_status\u0027 of animals in the \u0027endangered_species\u0027 table that weigh more than the average weight of their \u0027animal_type\u0027.", + "sql_context": "CREATE TABLE marine_mammals_new(id INT, animal_name VARCHAR(50), animal_type VARCHAR(50), weight FLOAT); INSERT INTO marine_mammals_new(id, animal_name, animal_type, weight) VALUES (1, \u0027Walrus\u0027, \u0027Pinniped\u0027, 1500), (2, \u0027Dolphin\u0027, \u0027Cetacean\u0027, 150), (3, \u0027Sea Otter\u0027, \u0027Pinniped\u0027, 33);", + "sql": "SELECT animal_name, conservation_status FROM endangered_species es JOIN (SELECT animal_type, AVG(weight) avg_weight FROM marine_mammals_new GROUP BY animal_type) mm ON es.conservation_status \u003d mm.animal_type WHERE es.weight \u003e mm.avg_weight;", + "sql_explanation": "The query joins the \u0027endangered_species\u0027 and subquery results and filters the results by weight." +}, { + "id": "1623", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of animals in the \"HabitatPreservation\" view that share the same habitat?", + "sql_context": "CREATE VIEW HabitatPreservation AS SELECT habitat_id, animal_id FROM AnimalHabitats; INSERT INTO AnimalHabitats (habitat_id, animal_id) VALUES (1, 1), (1, 2), (2, 3), (3, 4), (3, 5), (4, 6);", + "sql": "SELECT MAX(habitat_count) FROM (SELECT habitat_id, COUNT(DISTINCT animal_id) AS habitat_count FROM HabitatPreservation GROUP BY habitat_id) AS subquery;", + "sql_explanation": "This query finds the maximum number of animals that share the same habitat in the HabitatPreservation view. It does this by using a subquery to first count the number of distinct animal_ids for each habitat_id and then selecting the maximum count from that subquery." +}, { + "id": "444", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find hotels with below-average AI assistant usage and above-average booking prices", + "sql_context": "CREATE TABLE hotel_bookings (hotel_id INT, user_id INT, booking_date DATE, price DECIMAL(5,2)); INSERT INTO hotel_bookings (hotel_id, user_id, booking_date, price) VALUES (1, 23, \u00272022-01-01\u0027, 120.00), (2, 45, \u00272022-01-05\u0027, 250.00); CREATE TABLE hotel_ai_usage (hotel_id INT, ai_usage INT); INSERT INTO hotel_ai_usage (hotel_id, ai_usage) VALUES (1, 500), (2, 1500), (3, 700);", + "sql": "SELECT hotel_id FROM hotel_bookings INNER JOIN hotel_ai_usage ON hotel_bookings.hotel_id \u003d hotel_ai_usage.hotel_id WHERE ai_usage \u003c (SELECT AVG(ai_usage) FROM hotel_ai_usage) AND price \u003e (SELECT AVG(price) FROM hotel_bookings);", + "sql_explanation": "This query finds hotels with below-average AI assistant usage and above-average booking prices by using INNER JOIN to combine the hotel_bookings and hotel_ai_usage tables and then applying conditions on the ai_usage and price columns to filter the results." +}, { + "id": "753", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average revenue per hotel for \u0027Resort\u0027 hotels?", + "sql_context": "CREATE TABLE hotels_types (hotel_id INT, type TEXT); CREATE TABLE bookings (booking_id INT, hotel_id INT, revenue FLOAT);", + "sql": "SELECT AVG(subquery.revenue) FROM (SELECT hotel_id, SUM(revenue) as revenue FROM bookings GROUP BY hotel_id) as subquery WHERE hotel_id IN (SELECT hotel_id FROM hotels_types WHERE type \u003d \u0027Resort\u0027);", + "sql_explanation": "This SQL query calculates the average revenue per hotel for Resort hotels. It does this by using a subquery to sum the revenue for each hotel, and then using another subquery to filter for Resort hotels. It then calculates the average revenue using the AVG function." +}, { + "id": "972", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the trend of AI adoption in the hotel industry in the last year?", + "sql_context": "CREATE TABLE ai_adoption (adoption_id INT, hotel_name VARCHAR(255), adoption_date DATE, adoption_level INT);", + "sql": "SELECT adoption_date, AVG(adoption_level) FROM ai_adoption WHERE hotel_name IN (SELECT hotel_name FROM hotels WHERE industry \u003d \u0027hotel\u0027) GROUP BY adoption_date ORDER BY adoption_date;", + "sql_explanation": "This query calculates the average level of AI adoption in the hotel industry over the last year. It filters the data to only include hotels in the hotel industry, and then groups the data by adoption date and calculates the average adoption level for each group. Finally, it orders the results by adoption date." +}, { + "id": "1376", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the \u0027hotel tech adoption\u0027 percentage for \u0027boutique hotels\u0027?", + "sql_context": "CREATE TABLE hotel_tech_adoption (id INT, type TEXT, adoption BOOLEAN); INSERT INTO hotel_tech_adoption (id, type, adoption) VALUES (1, \u0027Boutique\u0027, true), (2, \u0027Luxury\u0027, false), (3, \u0027Boutique\u0027, false);", + "sql": "SELECT 100.0 * COUNT(*) / (SELECT COUNT(*) FROM hotel_tech_adoption WHERE type \u003d \u0027Boutique\u0027) FROM hotel_tech_adoption WHERE type \u003d \u0027Boutique\u0027 AND adoption \u003d true;", + "sql_explanation": "This query calculates the hotel tech adoption percentage for \u0027boutique hotels\u0027 by dividing the number of boutique hotels with tech adoption by the total number of boutique hotels." +}, { + "id": "1587", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of hotels using AI chatbot support in the EU?", + "sql_context": "CREATE TABLE hotel_ai (hotel_id INT, hotel_name TEXT, region TEXT, ai_chatbot BOOLEAN); INSERT INTO hotel_ai (hotel_id, hotel_name, region, ai_chatbot) VALUES (1, \u0027Hotel A\u0027, \u0027EU\u0027, true), (2, \u0027Hotel B\u0027, \u0027NA\u0027, false), (3, \u0027Hotel C\u0027, \u0027EU\u0027, true), (4, \u0027Hotel D\u0027, \u0027APAC\u0027, false);", + "sql": "SELECT MAX(hotel_count) FROM (SELECT COUNT(*) AS hotel_count FROM hotel_ai WHERE region \u003d \u0027EU\u0027 AND ai_chatbot \u003d true GROUP BY hotel_name) AS hotel_group;", + "sql_explanation": "This query finds the maximum number of hotels using AI chatbot support in the EU. It does this by using the MAX function on a subquery that groups hotels by name and filters the data where the region is EU and ai_chatbot is true." +}, { + "id": "1920", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue generated by hotels in the top 2 countries with the highest revenue?", + "sql_context": "CREATE TABLE hotel_revenue (hotel_id INT, country TEXT, revenue FLOAT); INSERT INTO hotel_revenue (hotel_id, country, revenue) VALUES (1, \u0027USA\u0027, 100000), (2, \u0027USA\u0027, 150000), (3, \u0027Canada\u0027, 75000), (4, \u0027Germany\u0027, 200000), (5, \u0027France\u0027, 120000);", + "sql": "SELECT SUM(revenue) FROM (SELECT country, SUM(revenue) as revenue FROM hotel_revenue GROUP BY country ORDER BY revenue DESC LIMIT 2) as top2;", + "sql_explanation": "This SQL query calculates the total revenue generated by hotels in the top 2 countries with the highest revenue. It first calculates the total revenue per country using a subquery, and then filters the top 2 countries based on the revenue. It then calculates the sum of revenue using the SUM() function." +}, { + "id": "3673", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of hotels in the \u0027asia_hotels\u0027 view that have adopted AI technology?", + "sql_context": "CREATE VIEW asia_hotels AS SELECT * FROM hotels WHERE continent \u003d \u0027Asia\u0027; CREATE VIEW ai_adopters AS SELECT hotel_id FROM ai_tech WHERE adoption_date IS NOT NULL;", + "sql": "SELECT COUNT(*) * 100.0 / (SELECT COUNT(*) FROM asia_hotels) as percentage FROM ai_adopters;", + "sql_explanation": "The SQL query calculates the percentage of hotels in the \u0027asia_hotels\u0027 view that have adopted AI technology. It uses a subquery to get the total number of hotels in the \u0027asia_hotels\u0027 view and another subquery to get the number of hotels that have adopted AI technology. The main query then calculates the percentage by dividing the number of AI adopters by the total number of hotels." +}, { + "id": "161", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of algorithmic fairness assessments conducted per month in India in 2022?", + "sql_context": "CREATE TABLE fairness_assessments (assessment_id INT, assessment_date DATE, country TEXT); INSERT INTO fairness_assessments (assessment_id, assessment_date, country) VALUES (1, \u00272022-01-02\u0027, \u0027India\u0027), (2, \u00272022-02-15\u0027, \u0027India\u0027), (3, \u00272022-03-27\u0027, \u0027India\u0027);", + "sql": "SELECT AVG(num_assessments) as avg_assessments_per_month FROM (SELECT COUNT(*) as num_assessments, EXTRACT(MONTH FROM assessment_date) as month FROM fairness_assessments WHERE country \u003d \u0027India\u0027 AND assessment_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-12-31\u0027 GROUP BY month) as subquery;", + "sql_explanation": "This query calculates the average number of algorithmic fairness assessments conducted per month in India in 2022." +}, { + "id": "759", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the organizations that have not had any AI safety incidents, in alphabetical order.", + "sql_context": "CREATE TABLE ai_safety (incident_id INT, incident_date DATE, organization_name TEXT, incident_description TEXT); INSERT INTO ai_safety (incident_id, incident_date, organization_name, incident_description) VALUES (1, \u00272021-01-01\u0027, \u0027TechCo\u0027, \u0027AI system caused harm to a user\u0027); INSERT INTO ai_safety (incident_id, incident_date, organization_name, incident_description) VALUES (2, \u00272021-02-01\u0027, \u0027AI Lab\u0027, \u0027AI system made a biased decision\u0027);", + "sql": "SELECT DISTINCT organization_name FROM ai_safety WHERE organization_name NOT IN (SELECT organization_name FROM ai_safety GROUP BY organization_name HAVING COUNT(*) \u003e 0) ORDER BY organization_name;", + "sql_explanation": "This SQL query lists the organizations that have not had any AI safety incidents, in alphabetical order. It first selects the distinct organization_name values from the ai_safety table, then filters out the organizations that have had incidents by using a subquery to select the organization_name values that have a count greater than 0, and finally orders the results by the organization_name column in ascending order." +}, { + "id": "2774", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the AI explainability algorithms that have a higher transaction count than any AI safety algorithm.", + "sql_context": "CREATE TABLE ai_explainability_transactions (algorithm_id INT, id INT); INSERT INTO ai_explainability_transactions (algorithm_id, id) VALUES (1, 10); INSERT INTO ai_explainability_transactions (algorithm_id, id) VALUES (2, 20); INSERT INTO ai_explainability_transactions (algorithm_id, id) VALUES (3, 30);", + "sql": "SELECT algorithm_id, id FROM ai_explainability_transactions WHERE id \u003e (SELECT MAX(id) FROM ai_safety_algorithms);", + "sql_explanation": "The subquery (SELECT MAX(id) FROM ai_safety_algorithms)) gets the maximum transaction count for all AI safety algorithms. The main query then selects the algorithm_id and id from ai_explainability_transactions where the id is greater than the maximum transaction count for AI safety algorithms, giving the AI explainability algorithms with a higher transaction count." +}, { + "id": "2977", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the AI creative applications with transaction counts greater than or equal to the average number of transactions for all AI creative applications.", + "sql_context": "CREATE TABLE creative_apps_transactions (app_name VARCHAR(20), id INT); INSERT INTO creative_apps_transactions (app_name, id) VALUES (\u0027ArtBot\u0027, 1); INSERT INTO creative_apps_transactions (app_name, id) VALUES (\u0027MusicGen\u0027, 2); INSERT INTO creative_apps_transactions (app_name, id) VALUES (\u0027StoryGen\u0027, 3);", + "sql": "SELECT app_name FROM creative_apps_transactions WHERE id \u003e\u003d (SELECT AVG(id) FROM creative_apps_transactions);", + "sql_explanation": "The subquery (SELECT AVG(id) FROM creative_apps_transactions)) calculates the average transaction count for all AI creative applications. The main query selects the app_name from creative_apps_transactions where the id is greater than or equal to the average transaction count, giving the AI creative applications with transaction counts greater than or equal to the average." +}, { + "id": "932", + "domain": "startups venture capital", + "domain_description": "Company founding data, investment rounds, exit strategies, and diversity metrics in the startup ecosystem.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many funding rounds have startups founded by immigrants gone through?", + "sql_context": "CREATE TABLE startups(id INT, name TEXT, founders TEXT, founding_year INT); INSERT INTO startups VALUES (1, \u0027StartupA\u0027, \u0027Ahmed, Bob\u0027, 2010); INSERT INTO startups VALUES (2, \u0027StartupB\u0027, \u0027Eve\u0027, 2015); INSERT INTO startups VALUES (3, \u0027StartupC\u0027, \u0027Charlie\u0027, 2018); CREATE TABLE investments(startup_id INT, round INT, funding INT); INSERT INTO investments VALUES (1, 1, 1000000); INSERT INTO investments VALUES (1, 2, 2000000); INSERT INTO investments VALUES (2, 1, 3000000); INSERT INTO investments VALUES (3, 1, 4000000);", + "sql": "SELECT startup_id, COUNT(*) as num_rounds FROM investments GROUP BY startup_id HAVING startup_id IN (SELECT id FROM startups WHERE founders LIKE \u0027%Ahmed%\u0027 OR founders LIKE \u0027%Charlie%\u0027);", + "sql_explanation": "This query calculates the number of funding rounds for startups founded by at least one immigrant. It does so by grouping the investments table by startup_id and calculating the count of rows within each group. It then filters for startups founded by at least one immigrant (in this case, Ahmed or Charlie) using a subquery in the HAVING clause." +}, { + "id": "960", + "domain": "startups venture capital", + "domain_description": "Company founding data, investment rounds, exit strategies, and diversity metrics in the startup ecosystem.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum funding round size for companies founded in the last 5 years?", + "sql_context": "CREATE TABLE company (id INT, name TEXT, founding_year INT, founder_gender TEXT); INSERT INTO company (id, name, founding_year, founder_gender) VALUES (1, \u0027Acme Inc\u0027, 2015, \u0027female\u0027); INSERT INTO company (id, name, founding_year, founder_gender) VALUES (2, \u0027Beta Corp\u0027, 2018, \u0027male\u0027);", + "sql": "SELECT MIN(funding_round_size) FROM investment_rounds INNER JOIN company ON investment_rounds.company_id \u003d company.id WHERE company.founding_year \u003e\u003d (SELECT YEAR(CURRENT_DATE()) - 5);", + "sql_explanation": "This query returns the minimum funding round size for companies founded in the last 5 years. It does this by performing a subquery that gets the current year, subtracting 5 from it, and then filtering the company table to only include rows where the founding_year column is greater than or equal to the result of the subquery. Then it performs an inner join between the investment_rounds and company tables on the company_id and id columns, respectively. Finally it calculates the min of the funding_round_size column." +}, { + "id": "1357", + "domain": "startups venture capital", + "domain_description": "Company founding data, investment rounds, exit strategies, and diversity metrics in the startup ecosystem.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update records of diversity metrics for companies in the green energy sector with accurate and complete data.", + "sql_context": "CREATE TABLE diversity_metrics (id INT, company_id INT, year INT, gender_distribution TEXT, racial_distribution TEXT); INSERT INTO diversity_metrics (id, company_id, year, gender_distribution, racial_distribution) VALUES (1, 1, 2020, \u0027...\u0027, \u0027...\u0027); CREATE TABLE company (id INT, name TEXT, industry TEXT); INSERT INTO company (id, name, industry) VALUES (1, \u0027SunPower\u0027, \u0027Green Energy\u0027);", + "sql": "UPDATE diversity_metrics SET gender_distribution \u003d \u0027...\u0027, racial_distribution \u003d \u0027...\u0027 WHERE company_id IN (SELECT id FROM company WHERE industry \u003d \u0027Green Energy\u0027);", + "sql_explanation": "The SQL query updates records of diversity metrics for green energy companies with accurate and complete data for gender_distribution and racial_distribution." +}, { + "id": "179", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the Green building certification with the lowest certification authority count, along with the certification authority", + "sql_context": "CREATE TABLE green_buildings ( id INT PRIMARY KEY, building_name VARCHAR(255), certification VARCHAR(255), certification_authority VARCHAR(255) ); INSERT INTO green_buildings (id, building_name, certification, certification_authority) VALUES (1, \u0027EcoCampus\u0027, \u0027LEED\u0027, \u0027USGBC\u0027); INSERT INTO green_buildings (id, building_name, certification, certification_authority) VALUES (2, \u0027GreenApartments\u0027, \u0027BREEAM\u0027, \u0027BRE\u0027); INSERT INTO green_buildings (id, building_name, certification, certification_authority) VALUES (3, \u0027EcoOffice\u0027, \u0027Green Star\u0027, \u0027GBCA\u0027);", + "sql": "SELECT certification, certification_authority FROM green_buildings GROUP BY certification, certification_authority HAVING COUNT(*) \u003d (SELECT MIN(cert_count) FROM (SELECT certification_authority, COUNT(*) as cert_count FROM green_buildings GROUP BY certification_authority) t);", + "sql_explanation": "This SQL query identifies the Green building certification with the lowest certification authority count, along with the certification authority, by counting the rows for each certification and certification_authority pair and grouping them, and then selecting the rows with the minimum count using subquery and HAVING clause." +}, { + "id": "520", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Update the CO2 emissions of buildings in the \u0027smart_cities\u0027 schema with IDs 1, 3, and 5 to the new_emissions value.", + "sql_context": "CREATE TABLE smart_cities.buildings (id INT, city VARCHAR(255), co2_emissions INT); CREATE VIEW smart_cities.buildings_view AS SELECT id, city, co2_emissions FROM smart_cities.buildings;", + "sql": " UPDATE smart_cities.buildings SET co2_emissions \u003d new_emissions FROM ( SELECT id, 800 as new_emissions FROM generate_series(1, 5) as seq WHERE seq % 2 \u003d 1 ) AS subquery WHERE smart_cities.buildings.id \u003d subquery.id; ", + "sql_explanation": "The query uses a correlated subquery to update the CO2 emissions of buildings with IDs 1, 3, and 5 to the new_emissions value." +}, { + "id": "879", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total energy consumption of green buildings in each city in the \u0027smart_cities\u0027 schema?", + "sql_context": "CREATE TABLE smart_cities.building_data (city VARCHAR(255), energy_consumption FLOAT);", + "sql": "SELECT city, SUM(energy_consumption) FROM smart_cities.building_data WHERE city IN (SELECT city FROM (SELECT DISTINCT city FROM smart_cities.building_data) AS unique_cities) GROUP BY city;", + "sql_explanation": "This query calculates the total energy consumption of green buildings for each city in the \u0027smart_cities\u0027 schema by summing up the \u0027energy_consumption\u0027 column for all rows in the \u0027building_data\u0027 table that have a city matching those in the distinct list of cities. It groups the result by city, providing the total energy consumption for each city." +}, { + "id": "1136", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average cost of green building materials for projects in the \u0027smart_cities\u0027 schema?", + "sql_context": "CREATE TABLE green_materials (project_id INT, material_name TEXT, cost FLOAT); INSERT INTO green_materials (project_id, material_name, cost) VALUES (1, \u0027solar panels\u0027, 15000.0), (1, \u0027smart glass\u0027, 25000.0), (2, \u0027wind turbines\u0027, 30000.0), (2, \u0027geothermal systems\u0027, 40000.0);", + "sql": "SELECT AVG(cost) FROM green_materials WHERE project_id IN (SELECT project_id FROM projects WHERE schema_name \u003d \u0027smart_cities\u0027) AND material_name \u003d \u0027green building materials\u0027;", + "sql_explanation": "This query calculates the average cost of green building materials for projects in the \u0027smart_cities\u0027 schema. It uses a subquery to get the project IDs from the \u0027projects\u0027 table where the schema name is \u0027smart_cities\u0027. Then, the main query filters the \u0027green_materials\u0027 table to only include rows with those project IDs and where the material name is \u0027green building materials\u0027. Finally, it calculates the average cost of these materials." +}, { + "id": "1731", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total carbon offset for each smart city in the \u0027smart_cities\u0027 table", + "sql_context": "CREATE TABLE smart_cities (id INT, name VARCHAR(50), location VARCHAR(50), carbon_offset INT);", + "sql": "SELECT name, (SELECT SUM(carbon_offset) FROM smart_cities WHERE smart_cities.name \u003d cities.name) AS total_carbon_offset FROM smart_cities AS cities;", + "sql_explanation": "This query calculates the total carbon offset for each smart city in the \u0027smart_cities\u0027 table by using a correlated subquery that calculates the sum of carbon offsets for each city." +}, { + "id": "2907", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find cities with no renewable energy projects", + "sql_context": "CREATE TABLE city_renewable_projects (city VARCHAR(50), project_type VARCHAR(50), PRIMARY KEY (city, project_type));", + "sql": "SELECT city FROM city_renewable_projects WHERE city NOT IN (SELECT city FROM renewable_projects GROUP BY city);", + "sql_explanation": "This query finds cities with no renewable energy projects by first selecting distinct cities from the \u0027renewable_projects\u0027 table and then finding cities that are not present in the \u0027city_renewable_projects\u0027 table using the NOT IN operator." +}, { + "id": "4539", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "subqueries", + "sql_complexity_description": "subqueries, including correlated and nested subqueries", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the name of the city with the largest population?", + "sql_context": "CREATE TABLE city (id INT, name VARCHAR(50), population INT); INSERT INTO city (id, name, population) VALUES (1, \u0027San Francisco\u0027, 884000); INSERT INTO city (id, name, population) VALUES (2, \u0027New York\u0027, 8601000); INSERT INTO city (id, name, population) VALUES (3, \u0027Los Angeles\u0027, 4000000);", + "sql": "SELECT name FROM city WHERE population \u003d (SELECT MAX(population) FROM city);", + "sql_explanation": "This SQL query selects the name of the city from the city table where the population is equal to the maximum population value in the table." +}, { + "id": "822", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of movies, produced in India or Nigeria, with a female lead actor and released between 2015 and 2020.", + "sql_context": "CREATE TABLE movies (id INT, title TEXT, release_year INT, lead_actor VARCHAR(255), production_country VARCHAR(255));", + "sql": "SELECT release_year, COUNT(*) as num_movies FROM movies WHERE production_country IN (\u0027India\u0027, \u0027Nigeria\u0027) AND lead_actor \u003d \u0027female\u0027 AND release_year BETWEEN 2015 AND 2020 GROUP BY release_year;", + "sql_explanation": "The SQL query finds the number of movies, produced in India or Nigeria, with a female lead actor and released between 2015 and 2020. It uses the GROUP BY clause to group the results by release year and the COUNT function to count the number of movies." +}, { + "id": "848", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of likes received by articles about climate change, published in Europe and South America, in the last quarter?", + "sql_context": "CREATE TABLE articles (id INT, title VARCHAR(50), likes INT, topic VARCHAR(50), region VARCHAR(50)); INSERT INTO articles (id, title, likes, topic, region) VALUES (1, \u0027Article1\u0027, 50, \u0027Climate Change\u0027, \u0027Europe\u0027), (2, \u0027Article2\u0027, 75, \u0027Politics\u0027, \u0027South America\u0027), (3, \u0027Article3\u0027, 100, \u0027Climate Change\u0027, \u0027Europe\u0027), (4, \u0027Article4\u0027, 80, \u0027Climate Change\u0027, \u0027South America\u0027);", + "sql": "SELECT region, MAX(likes) as max_likes FROM articles WHERE topic \u003d \u0027Climate Change\u0027 AND region IN (\u0027Europe\u0027, \u0027South America\u0027) AND publication_date \u003e\u003d NOW() - INTERVAL 90 DAY GROUP BY region;", + "sql_explanation": "This query calculates the maximum number of likes received by articles about climate change, published in Europe and South America, in the last quarter. It does this by filtering the rows in the articles table to only include rows where the topic is \u0027Climate Change\u0027 and the region is either \u0027Europe\u0027 or \u0027South America\u0027 and the publication date is within the last 90 days. It then uses the group by clause to group the results by region. Finally, it uses the max function to calculate the maximum number of likes received by articles about climate change in each region." +}, { + "id": "957", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many news articles were published per month in 2021?", + "sql_context": "CREATE TABLE news (id INT, title VARCHAR(255), publication_date DATE); INSERT INTO news (id, title, publication_date) VALUES (1, \u0027Article1\u0027, \u00272021-01-01\u0027), (2, \u0027Article2\u0027, \u00272021-02-15\u0027), (3, \u0027Article3\u0027, \u00272021-03-05\u0027);", + "sql": "SELECT DATE_FORMAT(publication_date, \u0027%Y-%m\u0027) as month, COUNT(*) as articles_count FROM news WHERE publication_date \u003e\u003d \u00272021-01-01\u0027 AND publication_date \u003c \u00272022-01-01\u0027 GROUP BY month;", + "sql_explanation": "This query counts the number of news articles published each month in 2021. It extracts the year and month from the \u0027publication_date\u0027 column using the DATE_FORMAT function, filters the data for 2021, and then groups the data by the \u0027month\u0027 column to count the number of articles for each month." +}, { + "id": "1676", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of articles published per month in the \u0027articles\u0027 table for the year 2022.", + "sql_context": "CREATE TABLE articles (title VARCHAR(255), publication_date DATE);", + "sql": "SELECT EXTRACT(MONTH FROM publication_date) AS month, COUNT(*) AS count FROM articles WHERE EXTRACT(YEAR FROM publication_date) \u003d 2022 GROUP BY month;", + "sql_explanation": "This query extracts the month from the \u0027publication_date\u0027 column and groups the results by month. It then filters for articles published in the year 2022 and counts the number of articles per month." +}, { + "id": "1767", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of disinformation cases detected per month in South America since 2018?", + "sql_context": "CREATE TABLE disinformation (id INT, case_name VARCHAR(50), date DATE, location VARCHAR(50)); INSERT INTO disinformation (id, case_name, date, location) VALUES (1, \u0027Case1\u0027, \u00272018-01-01\u0027, \u0027South America\u0027), (2, \u0027Case2\u0027, \u00272019-03-15\u0027, \u0027North America\u0027), (3, \u0027Case3\u0027, \u00272020-12-31\u0027, \u0027Europe\u0027);", + "sql": "SELECT AVG(COUNT(*)) FROM disinformation WHERE location LIKE \u0027%South America%\u0027 AND date BETWEEN \u00272018-01-01\u0027 AND \u00272022-12-31\u0027 GROUP BY MONTH(date);", + "sql_explanation": "This query calculates the average number of disinformation cases detected per month in South America since 2018 by filtering the disinformation table based on the location and date, grouping the rows by month, and using the AVG and COUNT functions to calculate the average number of rows in each group." +}, { + "id": "2060", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 3 most common media types for content from the USA?", + "sql_context": "CREATE TABLE media_content (id INT PRIMARY KEY, title VARCHAR(255), description TEXT, country VARCHAR(64), media_type VARCHAR(64)); INSERT INTO media_content (id, title, description, country, media_type) VALUES (1, \u0027Movie A\u0027, \u0027Description A\u0027, \u0027USA\u0027, \u0027Movie\u0027), (2, \u0027Movie B\u0027, \u0027Description B\u0027, \u0027Canada\u0027, \u0027Movie\u0027), (3, \u0027Show C\u0027, \u0027Description C\u0027, \u0027Mexico\u0027, \u0027Show\u0027), (4, \u0027Podcast D\u0027, \u0027Description D\u0027, \u0027USA\u0027, \u0027Podcast\u0027);", + "sql": "SELECT media_type, COUNT(*) AS type_count FROM media_content WHERE country \u003d \u0027USA\u0027 GROUP BY media_type ORDER BY type_count DESC LIMIT 3;", + "sql_explanation": "This query filters the media content to only include items from the USA, then groups the remaining records by media type and counts the number of entries for each. By ordering in descending order and limiting the results to the top 3, we find the most common media types for content from the USA." +}, { + "id": "2773", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average rating of movies produced in the US and released between 2010 and 2020, grouped by genre?", + "sql_context": "CREATE TABLE movies (id INT, title VARCHAR(255), release_year INT, rating FLOAT, genre VARCHAR(255), country VARCHAR(255)); INSERT INTO movies (id, title, release_year, rating, genre, country) VALUES (1, \u0027Movie1\u0027, 2015, 7.8, \u0027Action\u0027, \u0027USA\u0027), (2, \u0027Movie2\u0027, 2018, 6.4, \u0027Drama\u0027, \u0027USA\u0027);", + "sql": "SELECT genre, AVG(rating) FROM movies WHERE release_year BETWEEN 2010 AND 2020 AND country \u003d \u0027USA\u0027 GROUP BY genre;", + "sql_explanation": "Calculate the average rating for movies released between 2010 and 2020 in the USA, grouped by genre." +}, { + "id": "2775", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the genres that are unique to movies produced in Spain and Portugal.", + "sql_context": "CREATE TABLE movies (id INT, title VARCHAR(255), genre VARCHAR(100), country VARCHAR(50)); INSERT INTO movies VALUES (1, \u0027Movie A\u0027, \u0027Comedy\u0027, \u0027Spain\u0027); INSERT INTO movies VALUES (2, \u0027Movie B\u0027, \u0027Drama\u0027, \u0027Portugal\u0027); INSERT INTO movies VALUES (3, \u0027Movie C\u0027, \u0027Action\u0027, \u0027Spain\u0027); INSERT INTO movies VALUES (4, \u0027Movie D\u0027, \u0027Comedy\u0027, \u0027Portugal\u0027); INSERT INTO movies VALUES (5, \u0027Movie E\u0027, \u0027Horror\u0027, \u0027Spain\u0027);", + "sql": "SELECT genre FROM movies WHERE country IN (\u0027Spain\u0027, \u0027Portugal\u0027) GROUP BY genre HAVING COUNT(DISTINCT country) \u003d 1;", + "sql_explanation": "First, create a table movies with respective inserts. Then, find the genres that are unique to Spain or Portugal by using GROUP BY and HAVING clauses." +}, { + "id": "2801", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average disinformation score for content creators in the United Kingdom, grouped by gender?", + "sql_context": "CREATE TABLE content_creators (creator_id INT, gender VARCHAR(50), country VARCHAR(50), disinformation_score INT); INSERT INTO content_creators (creator_id, gender, country, disinformation_score) VALUES (1, \u0027Female\u0027, \u0027UK\u0027, 50), (2, \u0027Male\u0027, \u0027Canada\u0027, 45), (3, \u0027Female\u0027, \u0027UK\u0027, 55);", + "sql": "SELECT gender, AVG(disinformation_score) as avg_score FROM content_creators WHERE country \u003d \u0027UK\u0027 GROUP BY gender;", + "sql_explanation": "This query calculates the average disinformation score for content creators in the United Kingdom, grouped by gender. It filters the content_creators table to only include rows with \u0027UK\u0027 in the country column, then calculates the average disinformation score for each unique gender value in the filtered data." +}, { + "id": "3360", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the monthly trend of news article word counts for the \"politics\" section in 2022?", + "sql_context": "CREATE TABLE news_articles (id INT, title TEXT, publish_date DATE, word_count INT); CREATE VIEW news_summary AS SELECT id, title, publish_date, EXTRACT(MONTH FROM publish_date) as month, EXTRACT(YEAR FROM publish_date) as year, word_count FROM news_articles WHERE section \u003d \u0027politics\u0027;", + "sql": "SELECT month, AVG(word_count) as avg_word_count FROM news_summary WHERE year \u003d 2022 GROUP BY month;", + "sql_explanation": "This query calculates the average monthly word count for news articles in the \"politics\" section in the news_summary view during 2022." +}, { + "id": "3565", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get the total duration of news programs in minutes for each day of the week.", + "sql_context": "CREATE TABLE news_programs (title VARCHAR(255), duration INT, air_date DATE);", + "sql": "SELECT air_date, SUM(duration) FROM news_programs WHERE title LIKE \u0027%news%\u0027 GROUP BY air_date;", + "sql_explanation": "Filter the news_programs table where title is like \u0027%news%\u0027, apply the SUM function on duration column and group the result set by air_date to get the total duration of news programs in minutes for each day of the week." +}, { + "id": "3833", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which studios have produced more than 50 movies in the \"movies\" table?", + "sql_context": "CREATE TABLE movies (id INT, title VARCHAR(100), release_year INT, studio_country VARCHAR(50)); INSERT INTO movies (id, title, release_year, studio_country) VALUES (1, \u0027Movie1\u0027, 2000, \u0027USA\u0027), (2, \u0027Movie2\u0027, 2005, \u0027Canada\u0027), (3, \u0027Movie3\u0027, 2010, \u0027USA\u0027), (4, \u0027Movie4\u0027, 2015, \u0027USA\u0027), (5, \u0027Movie5\u0027, 2020, \u0027France\u0027);", + "sql": "SELECT studio_country, COUNT(*) FROM movies GROUP BY studio_country HAVING COUNT(*) \u003e 50;", + "sql_explanation": "This SQL query performs a group by operation on the studio_country column from the movies table, and then filters the results to only include groups with a count greater than 50." +}, { + "id": "4361", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many movies were released per year?", + "sql_context": "CREATE TABLE movies (title VARCHAR(255), release_year INT); INSERT INTO movies (title, release_year) VALUES (\u0027Movie1\u0027, 2010), (\u0027Movie2\u0027, 2005), (\u0027Movie3\u0027, 2015), (\u0027Movie4\u0027, 2010), (\u0027Movie5\u0027, 2005), (\u0027Movie6\u0027, 2020);", + "sql": "SELECT release_year, COUNT(*) as movie_count FROM movies GROUP BY release_year;", + "sql_explanation": "Find the number of movies released per year by grouping the movies by release_year and applying the COUNT function to each group." +}, { + "id": "4627", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of news channels by genre in the \u0027news_channels\u0027 table?", + "sql_context": "CREATE TABLE news_channels (channel_name VARCHAR(50), country VARCHAR(50), genre VARCHAR(50)); INSERT INTO news_channels (channel_name, country, genre) VALUES (\u0027CNN\u0027, \u0027USA\u0027, \u0027News\u0027); INSERT INTO news_channels (channel_name, country, genre) VALUES (\u0027BBC\u0027, \u0027UK\u0027, \u0027News\u0027);", + "sql": "SELECT genre, COUNT(*) as channel_count FROM news_channels GROUP BY genre;", + "sql_explanation": "This SQL query uses the \u0027COUNT\u0027 function to count the number of news channels per genre and the \u0027GROUP BY\u0027 clause to group the results by genre." +}, { + "id": "4940", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many unique genres are associated with media published in each country?", + "sql_context": "CREATE TABLE media (id INT, title VARCHAR(50), location VARCHAR(50), genre VARCHAR(50)); INSERT INTO media (id, title, location, genre) VALUES (1, \u0027Article 1\u0027, \u0027USA\u0027, \u0027News\u0027), (2, \u0027Article 2\u0027, \u0027Canada\u0027, \u0027Entertainment\u0027), (3, \u0027News 1\u0027, \u0027USA\u0027, \u0027Politics\u0027), (4, \u0027News 2\u0027, \u0027Canada\u0027, \u0027Sports\u0027);", + "sql": "SELECT location, COUNT(DISTINCT genre) FROM media GROUP BY location;", + "sql_explanation": "We use the COUNT and GROUP BY functions to find the number of unique genres associated with media published in each country in the media table." +}, { + "id": "5188", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many shows were released in each genre, and what is the total runtime for each genre?", + "sql_context": "CREATE TABLE shows (id INT, title VARCHAR(100), genre VARCHAR(50), country VARCHAR(50), release_year INT, runtime INT);", + "sql": "SELECT genre, COUNT(*), SUM(runtime) FROM shows GROUP BY genre;", + "sql_explanation": "We use the COUNT and SUM functions to calculate the number of shows and total runtime for each genre, respectively." +}, { + "id": "5200", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average frequency of content for each genre in the media_content table?", + "sql_context": "CREATE TABLE media_content (id INT, genre VARCHAR(50), frequency INT); INSERT INTO media_content (id, genre, frequency) VALUES (1, \u0027Movie\u0027, 100), (2, \u0027TV Show\u0027, 30), (3, \u0027Documentary\u0027, 40);", + "sql": "SELECT genre, AVG(frequency) FROM media_content GROUP BY genre;", + "sql_explanation": "This query uses a GROUP BY clause to group results by genre and an AVG function to calculate the average frequency of content for each genre." +}, { + "id": "1510", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many new users registered on the music platform per week?", + "sql_context": "CREATE TABLE UserRegistrations (reg_id INT, reg_date DATE, user_id INT, user_info VARCHAR(255)); INSERT INTO UserRegistrations (reg_id, reg_date, user_id, user_info) VALUES (1, \u00272020-01-01\u0027, 1, \u0027John Doe\u0027), (2, \u00272020-01-07\u0027, 2, \u0027Jane Doe\u0027), (3, \u00272020-01-05\u0027, 3, \u0027Mike Johnson\u0027);", + "sql": "SELECT DATE_FORMAT(reg_date, \u0027%Y-%u\u0027) as registration_week, COUNT(DISTINCT user_id) as new_users_per_week FROM UserRegistrations GROUP BY registration_week;", + "sql_explanation": "This SQL query calculates the number of new users who registered on the music platform per week. It uses the DATE_FORMAT() function to format the reg_date column as a week number and then groups the data by this week number. The COUNT() function with the DISTINCT keyword is used to count the number of unique user_id values in each group, which represents the number of new users." +}, { + "id": "1658", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of unique users who streamed music during each month of 2021?", + "sql_context": "CREATE TABLE music_streaming (id INT, user_id INT, artist VARCHAR(50), song VARCHAR(50), genre VARCHAR(20), streamed_on DATE, revenue DECIMAL(10,2), streams INT); CREATE VIEW monthly_user_streams AS SELECT DATE_TRUNC(\u0027month\u0027, streamed_on) AS month, user_id FROM music_streaming GROUP BY month, user_id;", + "sql": "SELECT month, COUNT(DISTINCT user_id) FROM monthly_user_streams WHERE streamed_on BETWEEN \u00272021-01-01\u0027 AND \u00272021-12-31\u0027 GROUP BY month ORDER BY month;", + "sql_explanation": "This query calculates the number of unique users who streamed music during each month of 2021. It creates a CTE named monthly_user_streams that selects the month and user_id columns from the music_streaming table, grouped by month and user_id. Then, it selects the month and count of distinct user_id values from the monthly_user_streams CTE where the streamed_on date is between \u00272021-01-01\u0027 and \u00272021-12-31\u0027, grouped by month and ordered by month." +}, { + "id": "2036", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average duration of songs in the hip-hop genre on Youtube and AppleMusic?", + "sql_context": "CREATE TABLE Song (Platform VARCHAR(20), Genre VARCHAR(10), Duration FLOAT); INSERT INTO Song (Platform, Genre, Duration) VALUES (\u0027Youtube\u0027, \u0027HipHop\u0027, 3.15), (\u0027Youtube\u0027, \u0027Pop\u0027, 4.23), (\u0027AppleMusic\u0027, \u0027HipHop\u0027, 2.87), (\u0027AppleMusic\u0027, \u0027Pop\u0027, 3.12);", + "sql": "SELECT Platform, AVG(Duration) FROM Song WHERE Genre \u003d \u0027HipHop\u0027 AND (Platform \u003d \u0027Youtube\u0027 OR Platform \u003d \u0027AppleMusic\u0027) GROUP BY Platform;", + "sql_explanation": "The SQL query calculates the average duration of songs in the hip-hop genre on Youtube and AppleMusic, groups the results by platform, and orders them by platform." +}, { + "id": "2356", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which artists have released the most songs in the Jazz genre on Apple Music?", + "sql_context": "CREATE TABLE AppleMusicSongs (ArtistID INT, ArtistName VARCHAR(100), Genre VARCHAR(50), SongID INT); INSERT INTO AppleMusicSongs (ArtistID, ArtistName, Genre, SongID) VALUES (1, \u0027Miles Davis\u0027, \u0027Jazz\u0027, 1), (2, \u0027John Coltrane\u0027, \u0027Jazz\u0027, 2), (3, \u0027Miles Davis\u0027, \u0027Jazz\u0027, 3);", + "sql": "SELECT ArtistName, COUNT(*) as SongCount FROM AppleMusicSongs WHERE Genre \u003d \u0027Jazz\u0027 GROUP BY ArtistName ORDER BY SongCount DESC;", + "sql_explanation": "This query uses the COUNT() function to calculate the number of songs released in the Jazz genre on Apple Music by each artist, then uses the GROUP BY clause to group the results by artist. It then uses the ORDER BY clause to order the results by the number of songs released in descending order. The result is a list of artists with the most songs released in the Jazz genre on Apple Music." +}, { + "id": "4439", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many songs were released per year?", + "sql_context": "CREATE TABLE songs (id INT, title TEXT, release_year INT); INSERT INTO songs (id, title, release_year) VALUES (1, \u0027Song 1\u0027, 2020), (2, \u0027Song 2\u0027, 2019), (3, \u0027Song 3\u0027, 2021);", + "sql": "SELECT release_year, COUNT(id) as num_songs FROM songs GROUP BY release_year;", + "sql_explanation": "The SQL query groups the songs table by the release_year column and then calculates the count of songs for each year, providing the number of songs released per year." +}, { + "id": "4748", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of unique users who have streamed music in each genre?", + "sql_context": "CREATE TABLE genre_streams (stream_id INT, genre VARCHAR(255), user_id INT); CREATE TABLE user (user_id INT, user_name VARCHAR(255));", + "sql": "SELECT genre, COUNT(DISTINCT user_id) FROM genre_streams GROUP BY genre;", + "sql_explanation": "This query calculates the total number of unique users who have streamed music in each genre by counting the number of distinct user_id for each genre and grouping them by genre." +}, { + "id": "260", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the total number of space missions and the success rate for each space agency?", + "sql_context": "CREATE TABLE space_missions (mission_id INT, agency VARCHAR(50), launch_year INT, mission_status VARCHAR(50)); INSERT INTO space_missions (mission_id, agency, launch_year, mission_status) VALUES (1, \u0027NASA\u0027, 2010, \u0027Success\u0027), (2, \u0027NASA\u0027, 2012, \u0027Failure\u0027), (3, \u0027ESA\u0027, 2014, \u0027Success\u0027), (4, \u0027ESA\u0027, 2016, \u0027Success\u0027), (5, \u0027ISRO\u0027, 2018, \u0027Failure\u0027), (6, \u0027ISRO\u0027, 2020, \u0027Success\u0027);", + "sql": "SELECT agency, COUNT(*) as total_missions, SUM(CASE WHEN mission_status \u003d \u0027Success\u0027 THEN 1 ELSE 0 END) as successful_missions, 100.0 * SUM(CASE WHEN mission_status \u003d \u0027Success\u0027 THEN 1 ELSE 0 END) / COUNT(*) as success_rate FROM space_missions GROUP BY agency;", + "sql_explanation": "The SQL query groups the space missions table by space agency and calculates the total number of missions and the number of successful missions for each agency. It then calculates the success rate as the percentage of successful missions out of the total number of missions. Finally, it returns the agency, total number of missions, number of successful missions, and success rate for each group." +}, { + "id": "1185", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of satellites launched by each country in the satellite_data table, grouped by launch decade?", + "sql_context": "CREATE TABLE satellite_data (satellite_id INT, name VARCHAR(100), launch_date DATE, country_of_origin VARCHAR(50), function VARCHAR(50), lifespan INT);", + "sql": "SELECT STRING_AGG(country_of_origin, \u0027,\u0027) AS country_of_origin, EXTRACT(YEAR FROM launch_date)/10*10 AS launch_decade, COUNT(*) FROM satellite_data GROUP BY launch_decade;", + "sql_explanation": "This SQL query counts the number of satellites launched by each country, grouped by launch decade. It uses the EXTRACT function to extract the year from the launch_date and divide it by 10 to get the launch decade. The query then uses the GROUP BY clause to group the satellites by their launch decade. The STRING_AGG function is used to concatenate the country_of_origin for all satellites launched in the same launch decade." +}, { + "id": "1524", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What were the top 3 countries with the most space missions launched between 2010 and 2020?", + "sql_context": "CREATE TABLE space_missions (mission_id INT, country VARCHAR(50), launch_year INT); INSERT INTO space_missions (mission_id, country, launch_year) VALUES (1, \u0027USA\u0027, 2010), (2, \u0027China\u0027, 2012), (3, \u0027Russia\u0027, 2015), (4, \u0027India\u0027, 2016), (5, \u0027Japan\u0027, 2017);", + "sql": "SELECT country, COUNT(*) as mission_count FROM space_missions WHERE launch_year BETWEEN 2010 AND 2020 GROUP BY country ORDER BY mission_count DESC LIMIT 3;", + "sql_explanation": "The SQL query groups the space missions by country and filters for the years between 2010 and 2020. It then counts the number of missions for each country and orders the results in descending order. Finally, it limits the output to the top 3 countries with the most space missions." +}, { + "id": "2153", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of space missions launched by each country in the 1990s", + "sql_context": "CREATE TABLE space_missions (launch_year INT, launch_country VARCHAR(50)); INSERT INTO space_missions (launch_year, launch_country) VALUES (1990, \u0027USA\u0027), (1991, \u0027Russia\u0027), (1992, \u0027USA\u0027), (1993, \u0027China\u0027), (1994, \u0027Russia\u0027), (1995, \u0027USA\u0027), (1996, \u0027Russia\u0027), (1997, \u0027USA\u0027), (1998, \u0027Russia\u0027), (1999, \u0027USA\u0027);", + "sql": "SELECT launch_country, COUNT(*) as mission_count FROM space_missions WHERE launch_year BETWEEN 1990 AND 1999 GROUP BY launch_country;", + "sql_explanation": "This query calculates the number of space missions launched by each country in the 1990s by counting the number of records for each launch_country and grouping by launch_country. It filters the data by launch_year between 1990 and 1999." +}, { + "id": "2461", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of satellites in the satellite_database table, grouped by their country, and order by the count in descending order, only showing the top 5 countries", + "sql_context": "CREATE TABLE satellite_database (id INT, name VARCHAR(50), type VARCHAR(50), orbit_type VARCHAR(50), country VARCHAR(50), launch_date DATE);", + "sql": "SELECT country, COUNT(*) as satellite_count FROM satellite_database GROUP BY country ORDER BY satellite_count DESC LIMIT 5;", + "sql_explanation": "This query groups the satellites by their country, orders the results by the number of satellites per country in descending order, and only shows the top 5 countries with the most satellites." +}, { + "id": "2476", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which space agency has launched the most satellites in descending order?", + "sql_context": "CREATE TABLE Space_Satellites (Satellite_ID INT, Satellite_Name VARCHAR(100), Launch_Date DATE, Country_Name VARCHAR(50), Agency_Name VARCHAR(50)); INSERT INTO Space_Satellites (Satellite_ID, Satellite_Name, Launch_Date, Country_Name, Agency_Name) VALUES (1, \u0027Sat1\u0027, \u00272000-01-01\u0027, \u0027USA\u0027, \u0027NASA\u0027), (2, \u0027Sat2\u0027, \u00272001-01-01\u0027, \u0027Russia\u0027, \u0027Roscosmos\u0027), (3, \u0027Sat3\u0027, \u00272002-01-01\u0027, \u0027China\u0027, \u0027CNSA\u0027), (4, \u0027Sat4\u0027, \u00272003-01-01\u0027, \u0027USA\u0027, \u0027NASA\u0027), (5, \u0027Sat5\u0027, \u00272004-01-01\u0027, \u0027India\u0027, \u0027ISRO\u0027);", + "sql": "SELECT Agency_Name, COUNT(*) as Total_Satellites FROM Space_Satellites GROUP BY Agency_Name ORDER BY Total_Satellites DESC;", + "sql_explanation": "This SQL query groups the Space_Satellites table by the Agency_Name column, calculates the count of satellites for each agency, and orders the results in descending order based on the Total_Satellites column." +}, { + "id": "2585", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which countries have launched the most spacecraft?", + "sql_context": "CREATE TABLE spacecraft (id INT, name VARCHAR(255), launch_country VARCHAR(255), launch_date DATE, max_speed FLOAT);", + "sql": "SELECT launch_country, COUNT(*) as num_spacecraft FROM spacecraft GROUP BY launch_country ORDER BY num_spacecraft DESC;", + "sql_explanation": "The SQL query calculates the number of spacecraft launched by each launch country and orders the results in descending order. It groups the spacecraft table by launch_country and then calculates the count of rows for each group." +}, { + "id": "2665", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many space missions were successfully launched by each country?", + "sql_context": "CREATE TABLE space_missions (id INT, mission_name VARCHAR(255), country VARCHAR(255), launch_status VARCHAR(10)); INSERT INTO space_missions (id, mission_name, country, launch_status) VALUES (1, \u0027Apollo 11\u0027, \u0027USA\u0027, \u0027Success\u0027), (2, \u0027Mars Orbiter Mission\u0027, \u0027India\u0027, \u0027Success\u0027), (3, \u0027Chandrayaan-1\u0027, \u0027India\u0027, \u0027Success\u0027), (4, \u0027Grail\u0027, \u0027USA\u0027, \u0027Success\u0027), (5, \u0027Mars Express\u0027, \u0027Europe\u0027, \u0027Success\u0027), (6, \u0027Venus Express\u0027, \u0027Europe\u0027, \u0027Failure\u0027), (7, \u0027Hayabusa\u0027, \u0027Japan\u0027, \u0027Success\u0027), (8, \u0027Akatsuki\u0027, \u0027Japan\u0027, \u0027Failure\u0027);", + "sql": "SELECT country, COUNT(*) as successful_launches FROM space_missions WHERE launch_status \u003d \u0027Success\u0027 GROUP BY country;", + "sql_explanation": "This SQL query groups the rows in the \u0027space_missions\u0027 table by the \u0027country\u0027 column and filters the rows to only include rows where the \u0027launch_status\u0027 column is \u0027Success\u0027, and then calculates the count of successful launches for each country, providing a count of successful space missions launched by each country." +}, { + "id": "3261", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Count of satellites launched per country, ordered by launch count.", + "sql_context": "CREATE TABLE satellites (id INT, name VARCHAR(255), country VARCHAR(255), launch_date DATE, type VARCHAR(255)); INSERT INTO satellites (id, name, country, launch_date, type) VALUES (1, \u0027Sentinel-1A\u0027, \u0027European Union\u0027, \u00272014-04-03\u0027, \u0027Radar Imaging\u0027);", + "sql": "SELECT country, COUNT(*) as launch_count FROM satellites GROUP BY country ORDER BY launch_count DESC;", + "sql_explanation": "This SQL query groups the satellites by country, then counts the number of satellites for each group, ordering the results by launch count in descending order." +}, { + "id": "3454", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the stars hosting at least 3 habitable exoplanets.", + "sql_context": "CREATE TABLE exoplanets (id INT, name VARCHAR(50), discovery_date DATE, discovery_method VARCHAR(50), host_star VARCHAR(50), habitable BOOLEAN);", + "sql": "SELECT host_star FROM exoplanets WHERE habitable \u003d TRUE GROUP BY host_star HAVING COUNT(*) \u003e\u003d 3;", + "sql_explanation": "This query lists the stars hosting at least 3 habitable exoplanets by grouping the exoplanets table by host_star and filtering the groups having at least 3 habitable exoplanets with the HAVING clause." +}, { + "id": "3472", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of satellites in a specific orbit, based on the SatelliteOrbits table?", + "sql_context": "CREATE TABLE SatelliteOrbits (SatelliteID INT, OrbitType VARCHAR(50), OrbitHeight INT); INSERT INTO SatelliteOrbits (SatelliteID, OrbitType, OrbitHeight) VALUES (101, \u0027LEO\u0027, 500), (201, \u0027MEO\u0027, 8000), (301, \u0027GEO\u0027, 36000), (401, \u0027LEO\u0027, 600), (501, \u0027MEO\u0027, 10000);", + "sql": "SELECT OrbitType, COUNT(SatelliteID) AS TotalSatellites FROM SatelliteOrbits GROUP BY OrbitType;", + "sql_explanation": "This SQL query groups the SatelliteOrbits table by OrbitType and counts the number of SatelliteIDs for each OrbitType, providing the total number of satellites in a specific orbit." +}, { + "id": "3684", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average speed of spacecraft launched by country?", + "sql_context": "CREATE TABLE spacecraft (id INT, name VARCHAR(255), launch_country VARCHAR(255), launch_date DATE, max_speed FLOAT);", + "sql": "SELECT launch_country, AVG(max_speed) as avg_speed FROM spacecraft GROUP BY launch_country;", + "sql_explanation": "The SQL query calculates the average speed of spacecraft for each launch country. It groups the spacecraft table by launch_country and then calculates the average of the max_speed column for each group." +}, { + "id": "4278", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which countries have launched satellites more than once?", + "sql_context": "CREATE TABLE SpaceLaunchs (LaunchID INT, Country VARCHAR(50), SatelliteID INT); INSERT INTO SpaceLaunchs (LaunchID, Country, SatelliteID) VALUES (1, \u0027USA\u0027, 101), (2, \u0027Russia\u0027, 201), (3, \u0027China\u0027, 301), (4, \u0027India\u0027, 401), (5, \u0027Japan\u0027, 501);", + "sql": "SELECT Country FROM SpaceLaunchs GROUP BY Country HAVING COUNT(SatelliteID) \u003e 1;", + "sql_explanation": "This SQL query groups the SpaceLaunchs table by Country and filters countries with more than one satellite launch by using the HAVING clause, counting the number of SatelliteIDs for each Country." +}, { + "id": "4465", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of space missions conducted by each country in the SpaceMissions table?", + "sql_context": "CREATE TABLE SpaceMissions (id INT, mission VARCHAR(50), year INT, country VARCHAR(50)); INSERT INTO SpaceMissions (id, mission, year, country) VALUES (1, \u0027Apollo 11\u0027, 1969, \u0027USA\u0027), (2, \u0027Apollo 13\u0027, 1970, \u0027USA\u0027), (3, \u0027STS-1\u0027, 1981, \u0027USA\u0027), (4, \u0027Shenzhou 5\u0027, 2003, \u0027China\u0027);", + "sql": "SELECT country, COUNT(*) AS num_missions FROM SpaceMissions GROUP BY country;", + "sql_explanation": "This SQL query calculates the total number of space missions conducted by each country in the SpaceMissions table. It uses the COUNT() function to count the number of missions and the GROUP BY clause to group the results by country." +}, { + "id": "4929", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total cost of space missions by agency?", + "sql_context": "CREATE TABLE space_agency_missions (id INT, agency VARCHAR(50), name VARCHAR(50), cost INT); INSERT INTO space_agency_missions (id, agency, name, cost) VALUES (1, \u0027NASA\u0027, \u0027Mars Rover 2001\u0027, 2500000), (2, \u0027NASA\u0027, \u0027ISS\u0027, 150000000), (3, \u0027ESA\u0027, \u0027Hubble Space Telescope\u0027, 1000000000);", + "sql": "SELECT agency, SUM(cost) FROM space_agency_missions GROUP BY agency;", + "sql_explanation": "The SQL query uses the GROUP BY clause to group the data by space agency. The SUM function is used to calculate the total cost of the missions for each agency. The result is a breakdown of total costs for each space agency." +}, { + "id": "5489", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which space agencies have launched missions to Mars?", + "sql_context": "CREATE TABLE space_mars_missions (agency VARCHAR(50), mission VARCHAR(50), year INTEGER); INSERT INTO space_mars_missions (agency, mission, year) VALUES (\u0027NASA\u0027, \u0027Mars Pathfinder\u0027, 1997), (\u0027NASA\u0027, \u0027Mars Global Surveyor\u0027, 1997), (\u0027ISAS\u0027, \u0027Nozomi\u0027, 1998), (\u0027NASA\u0027, \u0027Mars Climate Orbiter\u0027, 1999), (\u0027NASA\u0027, \u0027Mars Polar Lander\u0027, 1999), (\u0027NASA\u0027, \u0027Mars Odyssey\u0027, 2001), (\u0027ESA\u0027, \u0027Mars Express\u0027, 2003), (\u0027Beagle Consortium\u0027, \u0027Beagle 2\u0027, 2003), (\u0027NASA\u0027, \u0027Mars Reconnaissance Orbiter\u0027, 2006), (\u0027NASA\u0027, \u0027Phoenix\u0027, 2008), (\u0027NASA\u0027, \u0027Mars Science Laboratory\u0027, 2012), (\u0027NASA\u0027, \u0027Mars Atmosphere and Volatile Evolution\u0027, 2013), (\u0027NASA\u0027, \u0027MAVEN\u0027, 2014), (\u0027ESA\u0027, \u0027ExoMars Trace Gas Orbiter\u0027, 2016), (\u0027NASA\u0027, \u0027InSight\u0027, 2018), (\u0027UAE Space Agency\u0027, \u0027Hope Mars Mission\u0027, 2021), (\u0027CNSA\u0027, \u0027Tianwen-1\u0027, 2021);", + "sql": "SELECT agency FROM space_mars_missions GROUP BY agency;", + "sql_explanation": "This query retrieves the space agencies that have launched missions to Mars by selecting the \u0027agency\u0027 column from the \u0027space_mars_missions\u0027 table and using the GROUP BY clause to group rows by the \u0027agency\u0027 column." +}, { + "id": "5503", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total mass of space debris launched in each year?", + "sql_context": "CREATE TABLE space_debris (year INT, category TEXT, mass FLOAT); INSERT INTO space_debris (year, category, mass) VALUES (2015, \u0027Aluminum\u0027, 120.5), (2015, \u0027Titanium\u0027, 170.1), (2016, \u0027Aluminum\u0027, 150.2), (2016, \u0027Titanium\u0027, 180.1), (2017, \u0027Copper\u0027, 100.1), (2017, \u0027Steel\u0027, 250.7);", + "sql": "SELECT year, SUM(mass) FROM space_debris GROUP BY year;", + "sql_explanation": "The SQL query calculates the total mass of space debris launched in each year by grouping the \u0027space_debris\u0027 table by the \u0027year\u0027 column and summing up the \u0027mass\u0027 values for each group." +}, { + "id": "103", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total energy consumption in New York for the year 2020, segmented by renewable and non-renewable sources?", + "sql_context": "CREATE TABLE energy_consumption (state VARCHAR(20), year INT, energy_type VARCHAR(10), consumption FLOAT); INSERT INTO energy_consumption (state, year, energy_type, consumption) VALUES (\u0027New York\u0027, 2020, \u0027Renewable\u0027, 12000), (\u0027New York\u0027, 2020, \u0027Non-Renewable\u0027, 25000);", + "sql": "SELECT state, year, SUM(CASE WHEN energy_type \u003d \u0027Renewable\u0027 THEN consumption ELSE 0 END) AS renewable_consumption, SUM(CASE WHEN energy_type \u003d \u0027Non-Renewable\u0027 THEN consumption ELSE 0 END) AS non_renewable_consumption FROM energy_consumption WHERE state \u003d \u0027New York\u0027 AND year \u003d 2020 GROUP BY state, year;", + "sql_explanation": "The query calculates the total energy consumption in New York for the year 2020, by summing up the consumption for both renewable and non-renewable sources separately using conditional aggregation (CASE statement)." +}, { + "id": "278", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total energy generated from renewable energy sources in Kenya, Nigeria, and South Africa for Q1 2021?", + "sql_context": "CREATE TABLE energy_generation (country VARCHAR(20), source_type VARCHAR(20), generation FLOAT, generation_date DATETIME); INSERT INTO energy_generation (country, source_type, generation, generation_date) VALUES (\u0027Kenya\u0027, \u0027Solar\u0027, 1200.0, \u00272021-01-01\u0027), (\u0027Kenya\u0027, \u0027Wind\u0027, 1500.0, \u00272021-01-01\u0027), (\u0027Nigeria\u0027, \u0027Solar\u0027, 1800.0, \u00272021-01-01\u0027), (\u0027Nigeria\u0027, \u0027Wind\u0027, 2000.0, \u00272021-01-01\u0027), (\u0027South Africa\u0027, \u0027Solar\u0027, 2100.0, \u00272021-01-01\u0027), (\u0027South Africa\u0027, \u0027Wind\u0027, 2400.0, \u00272021-01-01\u0027);", + "sql": "SELECT country, SUM(generation) as total_generation FROM energy_generation WHERE country IN (\u0027Kenya\u0027, \u0027Nigeria\u0027, \u0027South Africa\u0027) AND generation_date \u003e\u003d \u00272021-01-01\u0027 AND generation_date \u003c \u00272021-04-01\u0027 AND source_type IN (\u0027Solar\u0027, \u0027Wind\u0027) GROUP BY country;", + "sql_explanation": "This query calculates the total energy generated from renewable energy sources (solar and wind) in Kenya, Nigeria, and South Africa for Q1 2021." +}, { + "id": "338", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many renewable energy patents were granted in the United States and China in 2019?", + "sql_context": "CREATE TABLE RenewablePatents (Country TEXT, Year INT, NumPatents INT); INSERT INTO RenewablePatents (Country, Year, NumPatents) VALUES (\u0027United States\u0027, 2019, 56000), (\u0027China\u0027, 2019, 83000), (\u0027Germany\u0027, 2019, 28000), (\u0027India\u0027, 2019, 15000); CREATE TABLE EnergyPatents (Country TEXT, Year INT, NumPatents INT); INSERT INTO EnergyPatents (Country, Year, NumPatents) VALUES (\u0027United States\u0027, 2018, 65000), (\u0027China\u0027, 2018, 102000), (\u0027Germany\u0027, 2018, 31000), (\u0027India\u0027, 2018, 18000);", + "sql": "SELECT RenewablePatents.Country, SUM(RenewablePatents.NumPatents) AS Total_Renewable_Patents FROM RenewablePatents WHERE RenewablePatents.Country IN (\u0027United States\u0027, \u0027China\u0027) AND RenewablePatents.Year \u003d 2019 GROUP BY RenewablePatents.Country;", + "sql_explanation": "Select the Country column and calculate the sum of the NumPatents column from the RenewablePatents table, filtering the records by Country and Year. Group the results by Country." +}, { + "id": "344", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average, minimum, and maximum daily carbon pricing in Germany, France, and Italy for January 2021?", + "sql_context": "CREATE TABLE carbon_pricing (country VARCHAR(20), date DATETIME, daily_price FLOAT); INSERT INTO carbon_pricing (country, date, daily_price) VALUES (\u0027Germany\u0027, \u00272021-01-01\u0027, 30.5), (\u0027Germany\u0027, \u00272021-01-02\u0027, 31.2), (\u0027France\u0027, \u00272021-01-01\u0027, 25.3), (\u0027France\u0027, \u00272021-01-02\u0027, 26.1), (\u0027Italy\u0027, \u00272021-01-01\u0027, 28.8), (\u0027Italy\u0027, \u00272021-01-02\u0027, 29.6);", + "sql": "SELECT country, AVG(daily_price) as avg_price, MIN(daily_price) as min_price, MAX(daily_price) as max_price FROM carbon_pricing WHERE country IN (\u0027Germany\u0027, \u0027France\u0027, \u0027Italy\u0027) AND date \u003e\u003d \u00272021-01-01\u0027 AND date \u003c \u00272021-02-01\u0027 GROUP BY country;", + "sql_explanation": "This query calculates the average, minimum, and maximum daily carbon pricing for Germany, France, and Italy in January 2021." +}, { + "id": "526", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total energy production by renewable source in Canada for the month of January 2022?", + "sql_context": "CREATE TABLE energy_production (id INT, country VARCHAR(50), source VARCHAR(50), production FLOAT, timestamp TIMESTAMP); INSERT INTO energy_production (id, country, source, production, timestamp) VALUES (1, \u0027Canada\u0027, \u0027Wind\u0027, 500.2, \u00272022-01-01 10:00:00\u0027), (2, \u0027Canada\u0027, \u0027Solar\u0027, 700.3, \u00272022-01-02 15:00:00\u0027);", + "sql": "SELECT source, SUM(production) as total_production FROM energy_production WHERE country \u003d \u0027Canada\u0027 AND timestamp BETWEEN \u00272022-01-01 00:00:00\u0027 AND \u00272022-01-31 23:59:59\u0027 AND source IN (\u0027Wind\u0027, \u0027Solar\u0027) GROUP BY source;", + "sql_explanation": "This query calculates the total energy production for each renewable source (wind and solar) in the \u0027energy_production\u0027 table, for Canada during January 2022. It groups the data by source and calculates the total production using the SUM function." +}, { + "id": "1042", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the number of wind energy transactions for each buyer in 2023.", + "sql_context": "CREATE TABLE renewable_energy_transactions (id INT, buyer_id INT, seller_id INT, type VARCHAR(15), amount INT, date DATE); INSERT INTO renewable_energy_transactions (id, buyer_id, seller_id, type, amount, date) VALUES (1, 1001, 2001, \u0027Wind Energy\u0027, 200, \u00272023-01-01\u0027);", + "sql": "SELECT buyer_id, COUNT(*) as num_wind_transactions FROM renewable_energy_transactions WHERE type \u003d \u0027Wind Energy\u0027 AND date BETWEEN \u00272023-01-01\u0027 AND \u00272023-12-31\u0027 GROUP BY buyer_id;", + "sql_explanation": "Count the number of wind energy transactions for each buyer in 2023." +}, { + "id": "1206", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of renewable energy patents issued per year for the top 3 countries?", + "sql_context": "CREATE TABLE Patent (Year INT, Country VARCHAR(50), Type VARCHAR(50)); INSERT INTO Patent (Year, Country, Type) VALUES (2018, \u0027Country1\u0027, \u0027Renewable\u0027), (2018, \u0027Country2\u0027, \u0027Renewable\u0027), (2018, \u0027Country3\u0027, \u0027Renewable\u0027), (2019, \u0027Country1\u0027, \u0027Renewable\u0027), (2019, \u0027Country2\u0027, \u0027Renewable\u0027), (2019, \u0027Country3\u0027, \u0027Renewable\u0027);", + "sql": "SELECT Year, Country, COUNT(*) AS RenewableEnergyPatents FROM Patent WHERE Type \u003d \u0027Renewable\u0027 GROUP BY Year, Country ORDER BY Year, COUNT(*) DESC FETCH FIRST 3 ROWS ONLY;", + "sql_explanation": "The SQL query calculates the number of renewable energy patents issued per year for the top 3 countries by filtering the records based on Type, grouping them based on Year and Country, sorting the result set in descending order based on the count of renewable energy patents, and finally returning the top 3 rows." +}, { + "id": "1735", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 3 CO2 emission states from the energy sector?", + "sql_context": "CREATE TABLE co2_emissions (state VARCHAR(20), sector VARCHAR(20), co2_emissions FLOAT); INSERT INTO co2_emissions (state, sector, co2_emissions) VALUES (\u0027Texas\u0027, \u0027Energy\u0027, 256.12), (\u0027California\u0027, \u0027Energy\u0027, 176.54), (\u0027Pennsylvania\u0027, \u0027Energy\u0027, 134.65), (\u0027Florida\u0027, \u0027Energy\u0027, 121.98);", + "sql": "SELECT state, SUM(co2_emissions) as total_emissions FROM co2_emissions WHERE sector \u003d \u0027Energy\u0027 GROUP BY state ORDER BY total_emissions DESC LIMIT 3;", + "sql_explanation": "The SQL query lists the top 3 CO2 emission states from the energy sector by grouping the data by state, applying the SUM function to the co2_emissions column, filtering the data where sector equals \u0027Energy\u0027, and ordering the results in descending order based on the total emissions." +}, { + "id": "1746", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the number of carbon capture and storage facilities in the United States, Germany, and Saudi Arabia, as of 2020.", + "sql_context": "CREATE TABLE carbon_storage (country VARCHAR(50), operational BOOLEAN, year INT); INSERT INTO carbon_storage (country, operational, year) VALUES (\u0027United States\u0027, true, 2020), (\u0027Germany\u0027, true, 2020), (\u0027Saudi Arabia\u0027, true, 2020), (\u0027Norway\u0027, false, 2020);", + "sql": "SELECT country, COUNT(*) FROM carbon_storage WHERE country IN (\u0027United States\u0027, \u0027Germany\u0027, \u0027Saudi Arabia\u0027) AND operational \u003d true GROUP BY country;", + "sql_explanation": "Filter the data for the carbon capture and storage facilities in the specified countries and operational status, and then count the number of rows for each country." +}, { + "id": "2580", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many renewable energy projects are there in the \u0027renewables\u0027 schema, grouped by project type and the continent of their location, ordered by the count in descending order?", + "sql_context": "CREATE SCHEMA renewables; CREATE TABLE renewable_projects (id INT, type VARCHAR(50), location VARCHAR(50)); INSERT INTO renewable_projects (id, type, location) VALUES (1, \u0027Solar\u0027, \u0027North America\u0027), (2, \u0027Wind\u0027, \u0027Europe\u0027), (3, \u0027Hydro\u0027, \u0027South America\u0027), (4, \u0027Geothermal\u0027, \u0027Africa\u0027), (5, \u0027Biomass\u0027, \u0027Asia\u0027), (6, \u0027Solar\u0027, \u0027Australia\u0027), (7, \u0027Wind\u0027, \u0027Antarctica\u0027);", + "sql": "SELECT type, location, COUNT(*) as count FROM renewables.renewable_projects GROUP BY type, location ORDER BY count DESC;", + "sql_explanation": "The SQL query counts the number of renewable energy projects in the \u0027renewables\u0027 schema, grouped by project type and the continent of their location, and orders the results by count in descending order. It uses the COUNT function to calculate the number of projects." +}, { + "id": "2777", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total energy storage capacity for each energy storage technology in the year 2020.", + "sql_context": "CREATE TABLE Storage (Year INT, Technology VARCHAR(50), Capacity INT); INSERT INTO Storage (Year, Technology, Capacity) VALUES (2020, \u0027Battery\u0027, 100), (2020, \u0027Flywheel\u0027, 150), (2020, \u0027Pumped Hydro\u0027, 200), (2021, \u0027Battery\u0027, 120), (2021, \u0027Flywheel\u0027, 180), (2021, \u0027Pumped Hydro\u0027, 250);", + "sql": "SELECT Technology, SUM(Capacity) AS TotalEnergyStorageCapacity FROM Storage WHERE Year \u003d 2020 GROUP BY Technology;", + "sql_explanation": "The SQL query calculates the total energy storage capacity for each energy storage technology in the year 2020 by filtering the records based on Year, grouping them based on Technology, and then finding the sum of Capacity for each group." +}, { + "id": "2825", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the difference between the maximum and minimum energy storage usage in 2022 for each region", + "sql_context": "CREATE TABLE energy_storage (date DATE, region VARCHAR(255), capacity INT, usage INT); INSERT INTO energy_storage (date, region, capacity, usage) VALUES (\u00272022-01-01\u0027, \u0027West Coast\u0027, 1000, 600), (\u00272022-01-01\u0027, \u0027East Coast\u0027, 800, 400);", + "sql": "SELECT region, MAX(usage) - MIN(usage) FROM energy_storage WHERE EXTRACT(YEAR FROM date) \u003d 2022 GROUP BY region;", + "sql_explanation": "Calculate the difference between the maximum and minimum energy storage usage in 2022 for each region." +}, { + "id": "3088", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which state had the highest renewable energy generation capacity in Q1 2021?", + "sql_context": "CREATE TABLE state_renewable_energy (state VARCHAR(255), quarter INT, technology VARCHAR(255), capacity FLOAT); INSERT INTO state_renewable_energy (state, quarter, technology, capacity) VALUES (\u0027California\u0027, 1, \u0027Solar\u0027, 5000), (\u0027California\u0027, 1, \u0027Wind\u0027, 3000), (\u0027Texas\u0027, 1, \u0027Solar\u0027, 4000), (\u0027Texas\u0027, 1, \u0027Wind\u0027, 4500), (\u0027New_York\u0027, 1, \u0027Solar\u0027, 2500), (\u0027New_York\u0027, 1, \u0027Wind\u0027, 3500);", + "sql": "SELECT state, MAX(capacity) as max_capacity FROM state_renewable_energy WHERE quarter \u003d 1 GROUP BY state;", + "sql_explanation": "The SQL query filters the state_renewable_energy table to only include records from Q1 2021, then groups the data by state and calculates the maximum renewable energy generation capacity for each state." +}, { + "id": "3123", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total installed capacity of solar power plants (in GW) in Japan, grouped by prefecture?", + "sql_context": "CREATE TABLE SolarPowerPlants (id INT, prefecture VARCHAR(50), capacity FLOAT); INSERT INTO SolarPowerPlants (id, prefecture, capacity) VALUES (1, \u0027Hokkaido\u0027, 1.2), (2, \u0027Tokyo\u0027, 2.5), (3, \u0027Hokkaido\u0027, 1.8), (4, \u0027Kyoto\u0027, 0.9);", + "sql": "SELECT prefecture, SUM(capacity) FROM SolarPowerPlants WHERE prefecture \u003d \u0027Hokkaido\u0027 GROUP BY prefecture;", + "sql_explanation": "This query calculates the total installed capacity of solar power plants (in GW) in Japan, grouped by prefecture by filtering the records based on the prefecture, grouping the records by prefecture, and then calculating the sum of capacity values for each group." +}, { + "id": "3185", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many renewable energy installations are there in each region in Australia?", + "sql_context": "CREATE TABLE renewable_energy (id INT, region VARCHAR(255), installation INT); INSERT INTO renewable_energy (id, region, installation) VALUES (1, \u0027New South Wales\u0027, 1000), (2, \u0027Victoria\u0027, 1500), (3, \u0027New South Wales\u0027, 1200), (4, \u0027Victoria\u0027, 1800);", + "sql": "SELECT region, COUNT(DISTINCT installation) as num_installations FROM renewable_energy GROUP BY region;", + "sql_explanation": "This SQL query calculates the number of renewable energy installations in each region in Australia. It groups the data by region and calculates the number of distinct installations for each group." +}, { + "id": "3375", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of power plants in \u0027USA\u0027 and \u0027France\u0027 from the \u0027power_plants\u0027 table?", + "sql_context": "CREATE TABLE power_plants (id INT, name VARCHAR(255), type VARCHAR(255), capacity INT, location VARCHAR(255)); INSERT INTO power_plants (id, name, type, capacity, location) VALUES (1, \u0027La Grande-1\u0027, \u0027Hydro\u0027, 2730, \u0027Canada\u0027); INSERT INTO power_plants (id, name, type, capacity, location) VALUES (2, \u0027Three Gorges\u0027, \u0027Hydro\u0027, 22500, \u0027China\u0027); INSERT INTO power_plants (id, name, type, capacity, location) VALUES (3, \u0027Itaipu\u0027, \u0027Hydro\u0027, 14000, \u0027Brazil\u0027); INSERT INTO power_plants (id, name, type, capacity, location) VALUES (4, \u0027Bonneville\u0027, \u0027Hydro\u0027, 5203, \u0027USA\u0027); INSERT INTO power_plants (id, name, type, capacity, location) VALUES (5, \u0027Cattenom\u0027, \u0027Nuclear\u0027, 5496, \u0027France\u0027);", + "sql": "SELECT location, COUNT(*) FROM power_plants WHERE location IN (\u0027USA\u0027, \u0027France\u0027) GROUP BY location;", + "sql_explanation": "Calculate the number of power plants in \u0027USA\u0027 and \u0027France\u0027." +}, { + "id": "3420", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total energy consumption in France, and how does it break down by sector?", + "sql_context": "CREATE TABLE energy_consumption (id INT, country VARCHAR(255), sector VARCHAR(255), consumption FLOAT);", + "sql": "SELECT sector, SUM(consumption) FROM energy_consumption WHERE country \u003d \u0027France\u0027 GROUP BY sector;", + "sql_explanation": "This SQL query calculates the total energy consumption in France and breaks it down by sector by summing up the consumption values in the energy_consumption table where the country is France, and grouping the results by sector." +}, { + "id": "3525", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total capacity of energy storage in each location in the energy_storage and energy_transactions tables?", + "sql_context": "CREATE TABLE energy_storage (id INT, type VARCHAR(255), capacity FLOAT, location VARCHAR(255)); CREATE TABLE energy_transactions (id INT, storage_id INT, energy_source VARCHAR(255), quantity FLOAT, timestamp DATETIME); INSERT INTO energy_storage (id, type, capacity, location) VALUES (1, \u0027Battery\u0027, 5000.0, \u0027TX\u0027), (2, \u0027Pumped Hydro\u0027, 7000.0, \u0027CA\u0027); INSERT INTO energy_transactions (id, storage_id, energy_source, quantity, timestamp) VALUES (1, 1, \u0027Wind\u0027, 2000.0, \u00272022-01-01 10:00:00\u0027), (2, 2, \u0027Solar\u0027, 3000.0, \u00272022-01-01 10:00:00\u0027);", + "sql": "SELECT e.location, SUM(e.capacity) as total_capacity FROM energy_storage e GROUP BY e.location;", + "sql_explanation": "This query calculates the total capacity of energy storage in each location from the \u0027energy_storage\u0027 table. It uses the sum function to add up the capacity of energy storage in each location. The results are grouped by location." +}, { + "id": "3559", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total energy storage capacity (in GWh) in Texas, grouped by technology?", + "sql_context": "CREATE TABLE EnergyStorage (id INT, state VARCHAR(50), technology VARCHAR(50), capacity FLOAT); INSERT INTO EnergyStorage (id, state, technology, capacity) VALUES (1, \u0027Texas\u0027, \u0027Batteries\u0027, 12.3), (2, \u0027Texas\u0027, \u0027Pumped Hydro\u0027, 18.7), (3, \u0027California\u0027, \u0027Batteries\u0027, 21.5);", + "sql": "SELECT technology, SUM(capacity) FROM EnergyStorage WHERE state \u003d \u0027Texas\u0027 GROUP BY technology;", + "sql_explanation": "This query calculates the total energy storage capacity (in GWh) in Texas, grouped by technology by filtering the records based on the state, grouping the records by technology, and then calculating the sum of capacity values for each group." +}, { + "id": "3575", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total capacity (MW) of wind farms in the \u0027renewable_farms\u0027 table, grouped by country?", + "sql_context": "CREATE TABLE renewable_farms (id INT, farm_name VARCHAR(50), technology VARCHAR(50), capacity FLOAT, country VARCHAR(50)); INSERT INTO renewable_farms (id, farm_name, technology, capacity, country) VALUES (1, \u0027Windfarm 1\u0027, \u0027Wind\u0027, 100.5, \u0027Germany\u0027), (2, \u0027Windfarm 2\u0027, \u0027Wind\u0027, 120.3, \u0027France\u0027);", + "sql": "SELECT country, SUM(capacity) FROM renewable_farms WHERE technology \u003d \u0027Wind\u0027 GROUP BY country;", + "sql_explanation": "This query calculates the total capacity of wind farms by country. It filters the \u0027renewable_farms\u0027 table for wind technology and then groups the results by country, summing the capacities." +}, { + "id": "4492", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which province in Canada has the highest energy storage capacity?", + "sql_context": "CREATE TABLE energy_storage_Canada (province VARCHAR(255), source_type VARCHAR(255), capacity INT); INSERT INTO energy_storage_Canada (province, source_type, capacity) VALUES (\u0027Ontario\u0027, \u0027Batteries\u0027, 3000), (\u0027Quebec\u0027, \u0027Batteries\u0027, 4000), (\u0027Ontario\u0027, \u0027Pumped Hydro\u0027, 8000);", + "sql": "SELECT province, MAX(capacity) FROM energy_storage_Canada GROUP BY province;", + "sql_explanation": "This SQL query identifies the province with the highest energy storage capacity in Canada by finding the maximum capacity value in the energy_storage_Canada table and grouping the results by province." +}, { + "id": "1821", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of player levels in \"EpicQuest\" for players from historically underrepresented communities, grouped by region?", + "sql_context": "CREATE TABLE epicquest_underrepresented_players (player_id INT, level INT, region VARCHAR(20), underrepresented_community VARCHAR(20)); INSERT INTO epicquest_underrepresented_players (player_id, level, region, underrepresented_community) VALUES (1, 25, \u0027North America\u0027, \u0027African American\u0027), (2, 30, \u0027Europe\u0027, \u0027Female\u0027), (3, 22, \u0027Asia\u0027, \u0027LGBTQ+\u0027), (4, 35, \u0027North America\u0027, \u0027Latino\u0027), (5, 18, \u0027Europe\u0027, \u0027Female\u0027), (6, 28, \u0027Asia\u0027, \u0027Non-binary\u0027);", + "sql": "SELECT region, AVG(level) AS avg_level, MIN(level) AS min_level, MAX(level) AS max_level FROM epicquest_underrepresented_players GROUP BY region;", + "sql_explanation": "This query calculates the average, minimum, and maximum player levels in the game \"EpicQuest\" for players from historically underrepresented communities, grouped by region. It groups the epicquest_underrepresented_players table by the region column and calculates the average, minimum, and maximum level for each group using the AVG, MIN, and MAX functions, respectively." +}, { + "id": "2442", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the most common game genre played by players in a specific age range?", + "sql_context": "CREATE TABLE Players (PlayerID INT, Age INT, GameGenre VARCHAR(10)); INSERT INTO Players (PlayerID, Age, GameGenre) VALUES (1, 25, \u0027Action\u0027), (2, 30, \u0027Strategy\u0027), (3, 22, \u0027Action\u0027), (4, 19, \u0027Simulation\u0027), (5, 35, \u0027Strategy\u0027);", + "sql": "SELECT GameGenre, COUNT(*) AS Count FROM Players WHERE Age BETWEEN 20 AND 30 GROUP BY GameGenre ORDER BY Count DESC LIMIT 1;", + "sql_explanation": "This query identifies the most common game genre played by players in a specific age range. It does so by grouping records in the Players table by the GameGenre column, filtering for records where the Age column is between 20 and 30 (inclusive), and then ordering these groups by the count of records in each group in descending order. It then returns the GameGenre and Count for the top result, which is the most common game genre played by players in that age range." +}, { + "id": "2722", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the most common age range of players who play \u0027RPG\u0027 games?", + "sql_context": "CREATE TABLE Players (PlayerID INT, Age INT, GameGenre VARCHAR(20));INSERT INTO Players (PlayerID, Age, GameGenre) VALUES (1, 25, \u0027RPG\u0027), (2, 24, \u0027RPG\u0027), (3, 30, \u0027FPS\u0027);", + "sql": "SELECT Age, COUNT(PlayerID) FROM Players WHERE GameGenre \u003d \u0027RPG\u0027 GROUP BY Age ORDER BY COUNT(PlayerID) DESC LIMIT 1;", + "sql_explanation": "The SQL query groups the Players table by Age for \u0027RPG\u0027 games, calculates the count of PlayerIDs for each group, orders the groups in descending order based on the count and returns the top 1." +}, { + "id": "3007", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which game genre has the highest average age of players?", + "sql_context": "CREATE TABLE Players (PlayerID INT, Age INT, GamePreference VARCHAR(20)); INSERT INTO Players (PlayerID, Age, GamePreference) VALUES (1, 35, \u0027Simulation\u0027);", + "sql": "SELECT GamePreference, AVG(Age) AS AvgAge FROM Players GROUP BY GamePreference ORDER BY AvgAge DESC LIMIT 1;", + "sql_explanation": "This query finds the game genre with the highest average age of players. It groups players by game preference and calculates the average age for each group, then orders by the average age in descending order, and finally returns only the top row using LIMIT 1." +}, { + "id": "3125", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the game titles and number of players who have played each game title for more than 5 hours", + "sql_context": "CREATE TABLE GameSessions (PlayerID INT, GameTitle VARCHAR(50), HoursPlayed DECIMAL(5,2)); INSERT INTO GameSessions (PlayerID, GameTitle, HoursPlayed) VALUES (1, \u0027GameA\u0027, 10.5), (2, \u0027GameA\u0027, 5.3), (3, \u0027GameB\u0027, 7.2);", + "sql": "SELECT GameTitle, COUNT(DISTINCT PlayerID) FROM GameSessions WHERE HoursPlayed \u003e 5.0 GROUP BY GameTitle;", + "sql_explanation": "This query lists the game titles and number of players who have played each game title for more than 5 hours. It does so by filtering the GameSessions table for rows where HoursPlayed is greater than 5.0, grouping the results by GameTitle, and then applying the COUNT function to the DISTINCT PlayerID column." +}, { + "id": "3211", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of players who play action and simulation games?", + "sql_context": "CREATE TABLE GameTypes (PlayerID INT, GameType VARCHAR(20)); INSERT INTO GameTypes (PlayerID, GameType) VALUES (1, \u0027Action\u0027), (2, \u0027Adventure\u0027), (3, \u0027Action\u0027), (4, \u0027Simulation\u0027), (5, \u0027Action\u0027), (6, \u0027Simulation\u0027);", + "sql": "SELECT GameType, COUNT(*) FROM GameTypes WHERE GameType IN (\u0027Action\u0027, \u0027Simulation\u0027) GROUP BY GameType;", + "sql_explanation": "This SQL query counts the total number of players who play action and simulation games by using the COUNT function on the PlayerID column, while grouping the data for the GameType column and filtering the data for the GameType column where it is either \u0027Action\u0027 or \u0027Simulation\u0027." +}, { + "id": "3250", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the average age of players from the USA and Canada", + "sql_context": "CREATE TABLE players (id INT PRIMARY KEY, name VARCHAR(50), age INT, country VARCHAR(50)); INSERT INTO players (id, name, age, country) VALUES (1, \u0027John Doe\u0027, 25, \u0027USA\u0027); INSERT INTO players (id, name, age, country) VALUES (2, \u0027Jane Smith\u0027, 30, \u0027Canada\u0027); INSERT INTO players (id, name, age, country) VALUES (3, \u0027Marcos Oliveira\u0027, 35, \u0027Brazil\u0027);", + "sql": "SELECT country, AVG(age) as avg_age FROM players WHERE country IN (\u0027USA\u0027, \u0027Canada\u0027) GROUP BY country;", + "sql_explanation": "This query filters the players table to only include players from the USA and Canada, then groups by country and calculates the average age of players in each country, returning a table with two columns: country and avg_age." +}, { + "id": "3298", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue generated from each game genre, and how many games belong to each genre?", + "sql_context": "CREATE TABLE GameDesign (GameID INT, GameTitle VARCHAR(20), Genre VARCHAR(10), Price DECIMAL(5,2)); INSERT INTO GameDesign (GameID, GameTitle, Genre, Price) VALUES (1, \u0027RacingGame\u0027, \u0027Racing\u0027, 29.99), (2, \u0027RPG\u0027, \u0027RPG\u0027, 49.99), (3, \u0027Shooter\u0027, \u0027FPS\u0027, 39.99), (4, \u0027Puzzle\u0027, \u0027Puzzle\u0027, 19.99), (5, \u0027Strategy\u0027, \u0027Strategy\u0027, 34.99);", + "sql": "SELECT Genre, SUM(Price) AS TotalRevenue, COUNT(GameID) AS GameCount FROM GameDesign GROUP BY Genre;", + "sql_explanation": "The query calculates the total revenue and the number of games for each game genre. It groups the results by genre and returns the required information for each group." +}, { + "id": "3484", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total playtime of players who use VR technology, grouped by their age?", + "sql_context": "CREATE TABLE PlayerPlaytime (PlayerID INT, Age INT, VRUser BOOLEAN, Playtime INT); INSERT INTO PlayerPlaytime (PlayerID, Age, VRUser, Playtime) VALUES (1, 25, true, 500), (2, 30, false, 400), (3, 22, true, 600);", + "sql": "SELECT Age, SUM(Playtime) as TotalPlaytime FROM PlayerPlaytime WHERE VRUser \u003d true GROUP BY Age;", + "sql_explanation": "This query calculates the total playtime of players who use VR technology, grouped by their age. It filters the PlayerPlaytime table to only include rows where VRUser is true, then groups the remaining rows by Age and calculates the sum of the Playtime column for each group, which represents the total playtime for each age group." +}, { + "id": "4058", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of players by gender?", + "sql_context": "CREATE TABLE Players (PlayerID INT, Age INT, Gender VARCHAR(10)); INSERT INTO Players (PlayerID, Age, Gender) VALUES (1, 25, \u0027Male\u0027); INSERT INTO Players (PlayerID, Age, Gender) VALUES (2, 30, \u0027Female\u0027); INSERT INTO Players (PlayerID, Age, Gender) VALUES (3, 22, \u0027Male\u0027); INSERT INTO Players (PlayerID, Age, Gender) VALUES (4, 35, \u0027Female\u0027); INSERT INTO Players (PlayerID, Age, Gender) VALUES (5, 28, \u0027Non-binary\u0027);", + "sql": "SELECT Players.Gender, COUNT(Players.PlayerID) FROM Players GROUP BY Players.Gender;", + "sql_explanation": "The SQL query retrieves the distribution of players by gender by grouping the Players table by the Gender column and applying the COUNT function to the PlayerID column." +}, { + "id": "4872", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many games have been designed by each game design studio?", + "sql_context": "CREATE TABLE GameDesign (GameID INT, Studio VARCHAR(50)); INSERT INTO GameDesign (GameID, Studio) VALUES (1, \u0027Studio A\u0027), (2, \u0027Studio B\u0027), (3, \u0027Studio A\u0027), (4, \u0027Studio C\u0027), (5, \u0027Studio B\u0027);", + "sql": "SELECT Studio, COUNT(*) as GameCount FROM GameDesign GROUP BY Studio;", + "sql_explanation": "The SQL query counts the number of games designed by each game design studio. It groups the data by Studio and calculates the count of GameID for each Studio using the COUNT function." +}, { + "id": "5358", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many players have used each virtual reality headset?", + "sql_context": "CREATE TABLE Players (PlayerID INT, Age INT, Country VARCHAR(20), VRHeadset VARCHAR(20)); INSERT INTO Players (PlayerID, Age, Country, VRHeadset) VALUES (1, 25, \u0027Canada\u0027, \u0027HTC Vive\u0027);", + "sql": "SELECT VRHeadset, COUNT(*) FROM Players GROUP BY VRHeadset;", + "sql_explanation": "This query counts the number of players who have used each virtual reality headset. It sums the VRHeadset column values and groups by VRHeadset." +}, { + "id": "5485", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of players from each country?", + "sql_context": "CREATE TABLE Players (PlayerID INT, Age INT, Gender VARCHAR(10), Country VARCHAR(50)); INSERT INTO Players (PlayerID, Age, Gender, Country) VALUES (1, 25, \u0027Male\u0027, \u0027USA\u0027); INSERT INTO Players (PlayerID, Age, Gender, Country) VALUES (2, 30, \u0027Female\u0027, \u0027Canada\u0027); INSERT INTO Players (PlayerID, Age, Gender, Country) VALUES (3, 22, \u0027Male\u0027, \u0027Mexico\u0027);", + "sql": "SELECT Country, AVG(Age) FROM Players GROUP BY Country;", + "sql_explanation": "1. Group the Players table by the Country column. 2. Select the Country column and the average of the Age column from the grouped table." +}, { + "id": "215", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 3 mining companies in terms of environmental impact assessments in South Africa, from 2018 to 2020?", + "sql_context": "CREATE TABLE CompanyEnvironmentalImpact (year INT, company TEXT, country TEXT, impact_assessment_score INT); INSERT INTO CompanyEnvironmentalImpact (year, company, country, impact_assessment_score) VALUES (2018, \u0027ABC Mining\u0027, \u0027South Africa\u0027, 85), (2019, \u0027XYZ Mining\u0027, \u0027South Africa\u0027, 90), (2020, \u0027LMN Mining\u0027, \u0027South Africa\u0027, 95), (2018, \u0027DEF Mining\u0027, \u0027South Africa\u0027, 80), (2019, \u0027GHI Mining\u0027, \u0027South Africa\u0027, 85), (2020, \u0027JKL Mining\u0027, \u0027South Africa\u0027, 90);", + "sql": "SELECT context.company, SUM(context.impact_assessment_score) as total_impact_score FROM CompanyEnvironmentalImpact context WHERE context.country \u003d \u0027South Africa\u0027 AND context.year BETWEEN 2018 AND 2020 GROUP BY context.company ORDER BY total_impact_score DESC LIMIT 3;", + "sql_explanation": "This query calculates the total environmental impact assessment score for each mining company in South Africa from 2018 to 2020 by summing the impact_assessment_score column grouped by company. It then orders the result set by the total impact assessment score in descending order and limits the result set to the top 3 companies." +}, { + "id": "654", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of gold and silver extracted by each location?", + "sql_context": "CREATE TABLE geological_survey (location VARCHAR(255), mineral VARCHAR(255), quantity FLOAT, year INT); INSERT INTO geological_survey (location, mineral, quantity, year) VALUES (\u0027Mine A\u0027, \u0027Gold\u0027, 1000, 2015), (\u0027Mine A\u0027, \u0027Silver\u0027, 2000, 2015), (\u0027Mine B\u0027, \u0027Gold\u0027, 1500, 2016), (\u0027Mine B\u0027, \u0027Silver\u0027, 2500, 2016);", + "sql": "SELECT location, SUM(CASE WHEN mineral \u003d \u0027Gold\u0027 THEN quantity ELSE 0 END) as total_gold, SUM(CASE WHEN mineral \u003d \u0027Silver\u0027 THEN quantity ELSE 0 END) as total_silver FROM geological_survey GROUP BY location;", + "sql_explanation": "This query calculates the total quantity of gold and silver extracted by each location using the SUM() function with a CASE statement to separate the quantities by mineral." +}, { + "id": "1167", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of minerals extracted in \u0027Queensland\u0027, Australia, by month, for the last 2 years?", + "sql_context": "CREATE TABLE extraction (id INT, site_name VARCHAR(50), date DATE, mineral VARCHAR(50), quantity INT); INSERT INTO extraction (id, site_name, date, mineral, quantity) VALUES (1, \u0027Mine A\u0027, \u00272020-03-15\u0027, \u0027Gold\u0027, 1500);", + "sql": "SELECT MONTH(date) AS month, SUM(quantity) AS total_quantity FROM extraction WHERE site_name \u003d \u0027Queensland\u0027 AND date \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 2 YEAR) GROUP BY month;", + "sql_explanation": "The SQL query extracts the month from the \u0027date\u0027 column, sums the quantity of minerals extracted per month, filters the rows to only include those in \u0027Queensland\u0027 and within the last 2 years, and groups the result set by month." +}, { + "id": "1292", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which mining sites have experienced equipment failures in the past quarter?", + "sql_context": "CREATE TABLE mining_site_equipment (site_id INT, equipment_id INT, failure_date DATE); INSERT INTO mining_site_equipment (site_id, equipment_id, failure_date) VALUES (1, 101, \u00272022-01-01\u0027), (1, 102, \u00272022-02-15\u0027), (2, 101, \u00272022-03-01\u0027), (3, 103, \u00272022-04-01\u0027);", + "sql": "SELECT site_id FROM mining_site_equipment WHERE failure_date \u003e\u003d \u00272022-01-01\u0027 AND failure_date \u003c \u00272022-04-01\u0027 GROUP BY site_id HAVING COUNT(DISTINCT failure_date) \u003e 0;", + "sql_explanation": "This SQL query identifies mining sites that have experienced equipment failures in the past quarter by using a GROUP BY clause to group the results by site_id, and filtering the results to only include rows where the number of distinct failure_date values is greater than 0 (i.e., at least one failure occurred during the quarter)." +}, { + "id": "1487", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the total mineral extraction quantities for each country in 2020, sorted by quantity in descending order?", + "sql_context": "CREATE TABLE MineralExtraction (country VARCHAR(50), year INT, mineral VARCHAR(50), quantity INT); INSERT INTO MineralExtraction (country, year, mineral, quantity) VALUES (\u0027Canada\u0027, 2020, \u0027Gold\u0027, 120), (\u0027Mexico\u0027, 2020, \u0027Silver\u0027, 150), (\u0027Brazil\u0027, 2020, \u0027Iron\u0027, 200);", + "sql": "SELECT context.country, SUM(context.quantity) as total_quantity FROM context WHERE context.year \u003d 2020 GROUP BY context.country ORDER BY total_quantity DESC;", + "sql_explanation": "This query groups the data by country and sums the total mineral extraction quantities for the year 2020. It then orders the results by the total quantity in descending order." +}, { + "id": "2026", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the daily production rate of silver in Bolivia?", + "sql_context": "CREATE TABLE daily_production (id INT, country VARCHAR(255), mineral VARCHAR(255), date DATE, quantity INT); INSERT INTO daily_production (id, country, mineral, date, quantity) VALUES (1, \u0027Bolivia\u0027, \u0027Silver\u0027, \u00272022-01-01\u0027, 50), (2, \u0027Bolivia\u0027, \u0027Silver\u0027, \u00272022-01-02\u0027, 60), (3, \u0027Bolivia\u0027, \u0027Silver\u0027, \u00272022-01-03\u0027, 70);", + "sql": "SELECT date, AVG(quantity) as daily_production_rate FROM daily_production WHERE country \u003d \u0027Bolivia\u0027 AND mineral \u003d \u0027Silver\u0027 GROUP BY date;", + "sql_explanation": "This query calculates the daily production rate of silver in Bolivia by averaging the quantity column, filtering for silver, grouping by date and summarizing the daily production rate." +}, { + "id": "2947", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average production volume for all silver mines in Mexico, grouped by year?", + "sql_context": "CREATE TABLE mines (id INT, name TEXT, location TEXT, production_volume INT, year INT, mineral TEXT); INSERT INTO mines (id, name, location, production_volume, year, mineral) VALUES (1, \u0027Mexican Silver Mine 1\u0027, \u0027Mexico\u0027, 6000, 2020, \u0027silver\u0027); INSERT INTO mines (id, name, location, production_volume, year, mineral) VALUES (2, \u0027Mexican Silver Mine 2\u0027, \u0027Mexico\u0027, 7000, 2020, \u0027silver\u0027); INSERT INTO mines (id, name, location, production_volume, year, mineral) VALUES (3, \u0027Mexican Silver Mine 3\u0027, \u0027Mexico\u0027, 8000, 2019, \u0027silver\u0027);", + "sql": "SELECT year, AVG(production_volume) FROM mines WHERE location \u003d \u0027Mexico\u0027 AND mineral \u003d \u0027silver\u0027 GROUP BY year;", + "sql_explanation": "This query calculates the average production volume for all silver mines in Mexico, grouped by year. It does so by filtering the \u0027mines\u0027 table for entries with location \u0027Mexico\u0027 and mineral \u0027silver\u0027, and then calculating the average production_volume for the filtered set, grouped by year." +}, { + "id": "3097", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 3 states with the highest average salary in the \"survey_data\" table", + "sql_context": "CREATE TABLE survey_data (id INT, state VARCHAR(2), salary DECIMAL(10,2)); INSERT INTO survey_data (id, state, salary) VALUES (1, \u0027CA\u0027, 60000.00), (2, \u0027WY\u0027, 75000.00), (3, \u0027NV\u0027, 80000.00), (4, \u0027CA\u0027, 65000.00), (5, \u0027WY\u0027, 80000.00);", + "sql": "SELECT state, AVG(salary) as avg_salary FROM survey_data GROUP BY state ORDER BY avg_salary DESC LIMIT 3;", + "sql_explanation": "This query lists the top 3 states with the highest average salary by grouping the data by state and calculating the average salary for each state, then ordering the results by average salary in descending order and limiting the results to the top 3." +}, { + "id": "3788", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of minerals extracted per month?", + "sql_context": "CREATE TABLE extraction_monthly (mine_id INT, extraction_month TEXT, mineral TEXT, quantity INT); INSERT INTO extraction_monthly (mine_id, extraction_month, mineral, quantity) VALUES (1, \u0027January\u0027, \u0027Gold\u0027, 50), (1, \u0027January\u0027, \u0027Silver\u0027, 75), (1, \u0027February\u0027, \u0027Gold\u0027, 55), (1, \u0027February\u0027, \u0027Silver\u0027, 80), (2, \u0027January\u0027, \u0027Gold\u0027, 60), (2, \u0027January\u0027, \u0027Silver\u0027, 70), (2, \u0027February\u0027, \u0027Gold\u0027, 65), (2, \u0027February\u0027, \u0027Silver\u0027, 85);", + "sql": "SELECT extraction_month, SUM(quantity) FROM extraction_monthly GROUP BY extraction_month;", + "sql_explanation": "This SQL query calculates the total quantity of minerals extracted per month by grouping the extraction_month and summing up the quantity column." +}, { + "id": "3844", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum depth of all silver mines?", + "sql_context": "CREATE TABLE MineDepths (MineID INT, MineType VARCHAR(10), Depth INT); INSERT INTO MineDepths (MineID, MineType, Depth) VALUES (1, \u0027Gold\u0027, 1200), (2, \u0027Silver\u0027, 800), (3, \u0027Gold\u0027, 1500);", + "sql": "SELECT MineType, MIN(Depth) FROM MineDepths WHERE MineType \u003d \u0027Silver\u0027 GROUP BY MineType;", + "sql_explanation": "The SQL query calculates the minimum depth of all silver mines. It filters the MineDepths table to only include silver mines, groups the results by mine type, and then calculates the minimum depth for each group." +}, { + "id": "4447", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum salary of workers in the \u0027oil\u0027 industry in each state in the USA?", + "sql_context": "CREATE TABLE workers (id INT, name VARCHAR(50), industry VARCHAR(50), salary FLOAT, state VARCHAR(50)); INSERT INTO workers (id, name, industry, salary, state) VALUES (1, \u0027John Doe\u0027, \u0027oil\u0027, 60000, \u0027Texas\u0027); INSERT INTO workers (id, name, industry, salary, state) VALUES (2, \u0027Jane Smith\u0027, \u0027oil\u0027, 65000, \u0027California\u0027); INSERT INTO workers (id, name, industry, salary, state) VALUES (3, \u0027Mike Johnson\u0027, \u0027oil\u0027, 70000, \u0027California\u0027);", + "sql": "SELECT state, MIN(salary) FROM workers WHERE industry \u003d \u0027oil\u0027 GROUP BY state;", + "sql_explanation": "This query groups the \u0027workers\u0027 table by state and calculates the minimum salary for each group where the industry is \u0027oil\u0027." +}, { + "id": "4763", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which country has the highest mineral extraction?", + "sql_context": "CREATE TABLE mineral_extraction (mine_id INT, country TEXT, quantity INT); INSERT INTO mineral_extraction (mine_id, country, quantity) VALUES (1, \u0027USA\u0027, 5000), (2, \u0027Canada\u0027, 6000), (3, \u0027Mexico\u0027, 4000);", + "sql": "SELECT country, MAX(quantity) FROM mineral_extraction GROUP BY country;", + "sql_explanation": "This SQL query finds the country with the highest mineral extraction by grouping the \u0027mineral_extraction\u0027 table by country and then selecting the country with the maximum quantity." +}, { + "id": "4857", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average environmental impact score by mine?", + "sql_context": "CREATE TABLE impact_scores (score_id INT, mine_id INT, impact_score INT); INSERT INTO impact_scores (score_id, mine_id, impact_score) VALUES (1, 1, 80), (2, 1, 85), (3, 2, 90), (4, 2, 95);", + "sql": "SELECT mine_id, AVG(impact_score) FROM impact_scores GROUP BY mine_id;", + "sql_explanation": "This query calculates the average environmental impact score by mine by using the AVG aggregate function with the GROUP BY clause to group the results by mine_id." +}, { + "id": "5444", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of mineral extraction by year?", + "sql_context": "CREATE TABLE extraction (extraction_id INT, mine_id INT, year INT, mineral VARCHAR(255), quantity INT); INSERT INTO extraction (extraction_id, mine_id, year, mineral, quantity) VALUES (1, 1, 2018, \u0027Gold\u0027, 1000), (2, 1, 2019, \u0027Gold\u0027, 1200), (3, 2, 2018, \u0027Uranium\u0027, 2000), (4, 2, 2019, \u0027Uranium\u0027, 2500);", + "sql": "SELECT year, SUM(quantity) FROM extraction GROUP BY year;", + "sql_explanation": "This query calculates the total amount of mineral extraction by year by using the SUM aggregate function with the GROUP BY clause to group the results by year." +}, { + "id": "1716", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "CREATE TABLE Coastal_Conservation(id INT, year INT, region VARCHAR(50), protected_area VARCHAR(50), size INT, conservation_status VARCHAR(50));", + "sql_context": "CREATE TABLE Coastal_Conservation(id INT, year INT, region VARCHAR(50), protected_area VARCHAR(50), size INT, conservation_status VARCHAR(50));INSERT INTO Coastal_Conservation(id, year, region, protected_area, size, conservation_status) VALUES (1, 2021, \u0027Caribbean\u0027, \u0027Marine Park A\u0027, 5000, \u0027Partially Protected\u0027);", + "sql": "SELECT region, AVG(size) AS Average_Protected_Area_Size FROM Coastal_Conservation WHERE conservation_status \u003d \u0027Partially Protected\u0027 GROUP BY region;", + "sql_explanation": "The SQL query calculates the average size of partially protected areas in the Caribbean region in the Coastal_Conservation table using the AVG function." +}, { + "id": "2238", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which marine species were sighted in both the \u0027Atlantic Ocean\u0027 and the \u0027Indian Ocean\u0027?", + "sql_context": "CREATE TABLE Sightings (Species VARCHAR(25), Ocean VARCHAR(25), Sightings INT); INSERT INTO Sightings (Species, Ocean, Sightings) VALUES (\u0027Dolphin\u0027, \u0027Atlantic Ocean\u0027, 200), (\u0027Turtle\u0027, \u0027Pacific Ocean\u0027, 350), (\u0027Shark\u0027, \u0027Indian Ocean\u0027, 150), (\u0027Whale\u0027, \u0027Pacific Ocean\u0027, 400), (\u0027Dolphin\u0027, \u0027Indian Ocean\u0027, 50);", + "sql": "SELECT Species FROM Sightings WHERE Ocean IN (\u0027Atlantic Ocean\u0027, \u0027Indian Ocean\u0027) GROUP BY Species HAVING COUNT(DISTINCT Ocean) \u003d 2;", + "sql_explanation": "The SQL query identifies the marine species that were sighted in both the \u0027Atlantic Ocean\u0027 and the \u0027Indian Ocean\u0027. It groups the rows by species and filters the groups that have exactly 2 distinct oceans using the HAVING clause with the COUNT() function." +}, { + "id": "3229", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Number of underwater volcanoes near the Philippines", + "sql_context": "CREATE TABLE Underwater_Volcanoes (id INT, volcano_name VARCHAR(50), location VARCHAR(50), depth FLOAT); INSERT INTO Underwater_Volcanoes (id, volcano_name, location, depth) VALUES (1, \u0027Apo\u0027, \u0027Philippines\u0027, -2200);", + "sql": "SELECT location, COUNT(*) FROM Underwater_Volcanoes GROUP BY location HAVING location \u003d \u0027Philippines\u0027;", + "sql_explanation": "This query counts the number of underwater volcanoes near the Philippines using the GROUP BY clause and the HAVING clause with the COUNT aggregate function." +}, { + "id": "4611", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many vessels were involved in maritime accidents by country?", + "sql_context": "CREATE TABLE maritime_accidents (accident_id INT, vessel_id INT, country VARCHAR(100)); INSERT INTO maritime_accidents (accident_id, vessel_id, country) VALUES (1, 1, \u0027Canada\u0027); INSERT INTO maritime_accidents (accident_id, vessel_id, country) VALUES (2, 2, \u0027Mexico\u0027);", + "sql": "SELECT country, COUNT(vessel_id) FROM maritime_accidents GROUP BY country;", + "sql_explanation": "This query groups the maritime accidents by country and counts the number of vessels involved in accidents for each country." +}, { + "id": "4755", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many maritime safety incidents have occurred in each region of the world?", + "sql_context": "CREATE TABLE maritime_safety_incidents (region TEXT, year INT, incidents INT); INSERT INTO maritime_safety_incidents (region, year, incidents) VALUES (\u0027North America\u0027, 2010, 30), (\u0027South America\u0027, 2010, 20), (\u0027Europe\u0027, 2010, 40), (\u0027Africa\u0027, 2010, 50), (\u0027Asia\u0027, 2010, 60), (\u0027Australia\u0027, 2010, 70), (\u0027North America\u0027, 2011, 35), (\u0027South America\u0027, 2011, 25), (\u0027Europe\u0027, 2011, 45), (\u0027Africa\u0027, 2011, 55), (\u0027Asia\u0027, 2011, 65), (\u0027Australia\u0027, 2011, 75);", + "sql": "SELECT region, COUNT(*) FROM maritime_safety_incidents GROUP BY region;", + "sql_explanation": "This SQL query groups the rows in the maritime_safety_incidents table by the region column using the GROUP BY clause. For each group, the query calculates the count of rows using the COUNT function." +}, { + "id": "777", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the daily sales trend for each product category in the last month?", + "sql_context": "CREATE TABLE Product (id INT, name VARCHAR(255), category VARCHAR(255), revenue FLOAT, sale_date DATE);", + "sql": "SELECT category, sale_date, SUM(revenue) as daily_sales FROM Product WHERE sale_date \u003e\u003d (CURRENT_DATE - INTERVAL \u00271 month\u0027) GROUP BY ROLLUP(category, sale_date) ORDER BY category, sale_date DESC;", + "sql_explanation": "The SQL query calculates the daily sales trend for each product category in the last month by filtering the records based on the sale date and then grouping the records based on the product category and sale date. It then calculates the daily sales for each product category using the SUM function. The query uses the ROLLUP function to calculate the total sales for each day and the overall total sales. It then orders the results by product category and sale date in descending order." +}, { + "id": "1001", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue generated by each store for the ethical product category in Q4 of 2021, ordered by the highest revenue first?", + "sql_context": "CREATE TABLE store_sales(id INT, store VARCHAR(255), product VARCHAR(255), revenue FLOAT, date DATE); INSERT INTO store_sales(id, store, product, revenue, date) VALUES (1, \u0027Seattle Flagship\u0027, \u0027Eco-friendly Tote Bag\u0027, 200.00, \u00272021-12-01\u0027);", + "sql": "SELECT store, SUM(revenue) as total_revenue FROM store_sales WHERE product LIKE \u0027ethical%\u0027 AND date BETWEEN \u00272021-10-01\u0027 AND \u00272021-12-31\u0027 GROUP BY store ORDER BY total_revenue DESC;", + "sql_explanation": "The query calculates the total revenue for each store in the ethical product category for Q4 2021. It uses the WHERE clause to filter the store_sales table for ethical products and sales within Q4 2021. It then groups the results by store and calculates the total revenue for each. Finally, it orders the results in descending order based on the total revenue." +}, { + "id": "1141", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 3 suppliers in the US by the number of orders in 2021?", + "sql_context": "CREATE TABLE suppliers (supplier_id INT, supplier_name VARCHAR(50), country VARCHAR(50), orders INT);", + "sql": "SELECT supplier_name, COUNT(orders) FROM suppliers WHERE country \u003d \u0027USA\u0027 AND EXTRACT(YEAR FROM order_date) \u003d 2021 GROUP BY supplier_name ORDER BY COUNT(orders) DESC LIMIT 3;", + "sql_explanation": "This query retrieves the top 3 suppliers in the US by the number of orders in 2021. It filters the suppliers table by country \u003d \u0027USA\u0027 and extracts the year from the order_date field. It then groups the records by supplier_name and counts the number of orders. Finally, it orders the results by the number of orders in descending order and limits the results to the top 3." +}, { + "id": "1741", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of products that are both \u0027recycled\u0027 and \u0027fair_trade\u0027 certified?", + "sql_context": "CREATE TABLE products (product_id INT, product_name VARCHAR(255), certification VARCHAR(255));INSERT INTO products VALUES (1,\u0027Product A\u0027,\u0027recycled\u0027),(2,\u0027Product B\u0027,\u0027fair_trade\u0027),(3,\u0027Product C\u0027,\u0027organic\u0027),(4,\u0027Product D\u0027,\u0027recycled\u0027),(5,\u0027Product E\u0027,\u0027fair_trade\u0027),(6,\u0027Product F\u0027,\u0027recycled, fair_trade\u0027);", + "sql": "SELECT SUM(quantity) FROM products WHERE certification IN (\u0027recycled\u0027, \u0027fair_trade\u0027) GROUP BY certification HAVING COUNT(DISTINCT certification) \u003d 2", + "sql_explanation": "The SQL query calculates the total quantity of products that are both \u0027recycled\u0027 and \u0027fair_trade\u0027 certified by using the IN operator to filter the products table based on the certification column and the GROUP BY and HAVING clauses to only consider rows where both certifications are present." +}, { + "id": "2342", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 3 suppliers with the highest average ethical product ratings?", + "sql_context": "CREATE TABLE suppliers (supplier_id INT, supplier_name TEXT, ethical_rating FLOAT); INSERT INTO suppliers (supplier_id, supplier_name, ethical_rating) VALUES (1, \u0027Green Supplies\u0027, 4.8), (2, \u0027Eco Friendly Inc\u0027, 4.6), (3, \u0027Fair Trade Co\u0027, 4.5);", + "sql": "SELECT supplier_name, AVG(ethical_rating) AS avg_rating FROM suppliers GROUP BY supplier_name ORDER BY avg_rating DESC LIMIT 3;", + "sql_explanation": "This query calculates the average ethical rating for each supplier, groups them by supplier_name, and orders them in descending order. It then returns the top 3 suppliers with the highest average ethical ratings." +}, { + "id": "3814", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many products are in each supply chain tier?", + "sql_context": "CREATE TABLE supply_chain (product_id INT, tier INT); INSERT INTO supply_chain (product_id, tier) VALUES (1, 1), (1, 2), (2, 1), (3, 1), (3, 3);", + "sql": "SELECT tier, COUNT(DISTINCT product_id) AS product_count FROM supply_chain GROUP BY tier;", + "sql_explanation": "This SQL query counts the number of unique products in each tier of the supply chain. It groups the supply chain by tier and then counts the number of distinct product IDs in each group." +}, { + "id": "4053", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many products does each brand offer in the ethical product line?", + "sql_context": "CREATE TABLE brands (brand_id INT, brand TEXT); CREATE TABLE products (product_id INT, product_name TEXT, brand_id INT, ethical_line BOOLEAN); INSERT INTO brands (brand_id, brand) VALUES (1, \u0027Brand A\u0027); INSERT INTO brands (brand_id, brand) VALUES (2, \u0027Brand B\u0027); INSERT INTO brands (brand_id, brand) VALUES (3, \u0027Brand C\u0027); INSERT INTO products (product_id, product_name, brand_id, ethical_line) VALUES (1, \u0027Product 1\u0027, 1, true); INSERT INTO products (product_id, product_name, brand_id, ethical_line) VALUES (2, \u0027Product 2\u0027, 2, true); INSERT INTO products (product_id, product_name, brand_id, ethical_line) VALUES (3, \u0027Product 3\u0027, 3, false); INSERT INTO products (product_id, product_name, brand_id, ethical_line) VALUES (4, \u0027Product 4\u0027, 1, true); INSERT INTO products (product_id, product_name, brand_id, ethical_line) VALUES (5, \u0027Product 5\u0027, 2, false); INSERT INTO products (product_id, product_name, brand_id, ethical_line) VALUES (6, \u0027Product 6\u0027, 3, true); INSERT INTO products (product_id, product_name, brand_id, ethical_line) VALUES (7, \u0027Product 7\u0027, 1, true);", + "sql": "SELECT brand_id, COUNT(*) FROM products WHERE ethical_line \u003d true GROUP BY brand_id;", + "sql_explanation": "The SQL query groups the \u0027products\u0027 table by the \u0027brand_id\u0027 column and then uses the COUNT(*) function to count the number of rows in each group where \u0027ethical_line\u0027 is true. This provides the number of products each brand offers in the ethical product line." +}, { + "id": "4406", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average carbon footprint of products made from recycled materials, grouped by country?", + "sql_context": "CREATE TABLE products (product_id INT, country VARCHAR(50), is_recycled BOOLEAN, carbon_footprint DECIMAL(10, 2)); CREATE VIEW recycled_products AS SELECT country, carbon_footprint FROM products WHERE is_recycled \u003d TRUE GROUP BY country;", + "sql": "SELECT country, AVG(carbon_footprint) FROM recycled_products GROUP BY country;", + "sql_explanation": "The query calculates the average carbon footprint of products made from recycled materials, grouped by country by joining the products table on the country column and filtering the results to only include records where the is_recycled column is true. The AVG function is then used to calculate the average carbon footprint and the results are grouped by country." +}, { + "id": "5011", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average product transparency score for each brand?", + "sql_context": "CREATE TABLE brands (brand_id INT, brand TEXT); CREATE TABLE products (product_id INT, product_name TEXT, brand_id INT, transparency_score INT); INSERT INTO brands (brand_id, brand) VALUES (1, \u0027Brand A\u0027); INSERT INTO brands (brand_id, brand) VALUES (2, \u0027Brand B\u0027); INSERT INTO brands (brand_id, brand) VALUES (3, \u0027Brand C\u0027); INSERT INTO products (product_id, product_name, brand_id, transparency_score) VALUES (1, \u0027Product 1\u0027, 1, 85); INSERT INTO products (product_id, product_name, brand_id, transparency_score) VALUES (2, \u0027Product 2\u0027, 2, 90); INSERT INTO products (product_id, product_name, brand_id, transparency_score) VALUES (3, \u0027Product 3\u0027, 3, 80); INSERT INTO products (product_id, product_name, brand_id, transparency_score) VALUES (4, \u0027Product 4\u0027, 1, 95); INSERT INTO products (product_id, product_name, brand_id, transparency_score) VALUES (5, \u0027Product 5\u0027, 2, 75);", + "sql": "SELECT brand, AVG(transparency_score) FROM products GROUP BY brand;", + "sql_explanation": "The SQL query groups the \u0027products\u0027 table by the \u0027brand\u0027 column and then uses the AVG() function to find the average \u0027transparency_score\u0027 for each group. This provides the average product transparency score for each brand." +}, { + "id": "5189", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many employees in \u0027ethical_labor\u0027 table are there for each \u0027position\u0027?", + "sql_context": "CREATE TABLE ethical_labor (employee_id INT, employee_name VARCHAR(50), position VARCHAR(50), country VARCHAR(50), salary DECIMAL(10,2));", + "sql": "SELECT position, COUNT(*) FROM ethical_labor GROUP BY position;", + "sql_explanation": "The SQL query uses the \u0027ethical_labor\u0027 table and groups the records by the \u0027position\u0027 column. It then counts the number of records for each \u0027position\u0027 using the COUNT() function." +}, { + "id": "5401", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many products does each brand offer?", + "sql_context": "CREATE TABLE brands (brand_id INT, brand TEXT); CREATE TABLE products (product_id INT, product_name TEXT, brand_id INT); INSERT INTO brands (brand_id, brand) VALUES (1, \u0027Brand A\u0027); INSERT INTO brands (brand_id, brand) VALUES (2, \u0027Brand B\u0027); INSERT INTO brands (brand_id, brand) VALUES (3, \u0027Brand C\u0027); INSERT INTO products (product_id, product_name, brand_id) VALUES (1, \u0027Product 1\u0027, 1); INSERT INTO products (product_id, product_name, brand_id) VALUES (2, \u0027Product 2\u0027, 2); INSERT INTO products (product_id, product_name, brand_id) VALUES (3, \u0027Product 3\u0027, 3); INSERT INTO products (product_id, product_name, brand_id) VALUES (4, \u0027Product 4\u0027, 1); INSERT INTO products (product_id, product_name, brand_id) VALUES (5, \u0027Product 5\u0027, 2); INSERT INTO products (product_id, product_name, brand_id) VALUES (6, \u0027Product 6\u0027, 3); INSERT INTO products (product_id, product_name, brand_id) VALUES (7, \u0027Product 7\u0027, 1);", + "sql": "SELECT brand_id, COUNT(*) FROM products GROUP BY brand_id;", + "sql_explanation": "The SQL query groups the \u0027products\u0027 table by the \u0027brand_id\u0027 column and then uses the COUNT(*) function to count the number of rows in each group. This provides the number of products each brand offers." +}, { + "id": "1425", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of wins and losses for each team in the current season?", + "sql_context": "CREATE TABLE games (team_id INT, result VARCHAR(5)); INSERT INTO games (team_id, result) VALUES (1, \u0027Win\u0027), (1, \u0027Loss\u0027), (2, \u0027Win\u0027), (2, \u0027Win\u0027), (3, \u0027Loss\u0027);", + "sql": "SELECT team_id, SUM(CASE WHEN result \u003d \u0027Win\u0027 THEN 1 ELSE 0 END) AS wins, SUM(CASE WHEN result \u003d \u0027Loss\u0027 THEN 1 ELSE 0 END) AS losses FROM games GROUP BY team_id;", + "sql_explanation": "This query uses conditional aggregation (SUM and CASE WHEN) to calculate the number of wins and losses for each team based on the \u0027result\u0027 column of the \u0027games\u0027 table." +}, { + "id": "1794", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 3 countries with the most gold medals won in the Winter Olympics?", + "sql_context": "CREATE TABLE winter_olympics_medals (country VARCHAR(50), medals INT, medal_type VARCHAR(10)); INSERT INTO winter_olympics_medals (country, medals, medal_type) VALUES (\u0027USA\u0027, 305, \u0027gold\u0027), (\u0027Canada\u0027, 189, \u0027gold\u0027), (\u0027Norway\u0027, 368, \u0027gold\u0027);", + "sql": "SELECT country, SUM(medals) as total_gold FROM winter_olympics_medals WHERE medal_type \u003d \u0027gold\u0027 GROUP BY country ORDER BY total_gold DESC LIMIT 3;", + "sql_explanation": "The SQL query calculates the total number of gold medals won by each country and groups them. It then orders the results in descending order based on the total gold medals and limits the output to the top 3 countries." +}, { + "id": "1861", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which athlete has won the most medals in swimming events in France?", + "sql_context": "CREATE TABLE if not exists countries (country_id INT, country VARCHAR(255)); INSERT INTO countries (country_id, country) VALUES (1, \u0027France\u0027), (2, \u0027Germany\u0027), (3, \u0027Italy\u0027); CREATE TABLE if not exists athletes (athlete_id INT, country_id INT, medals INT, sport VARCHAR(255)); INSERT INTO athletes (athlete_id, country_id, medals, sport) VALUES (1, 1, 10, \u0027Swimming\u0027), (2, 2, 8, \u0027Swimming\u0027), (3, 3, 12, \u0027Swimming\u0027), (4, 1, 15, \u0027Swimming\u0027), (5, 1, 5, \u0027Swimming\u0027);", + "sql": "SELECT athlete_id, SUM(medals) FROM athletes WHERE country_id \u003d 1 AND sport \u003d \u0027Swimming\u0027 GROUP BY athlete_id ORDER BY SUM(medals) DESC LIMIT 1;", + "sql_explanation": "This SQL query filters the athletes table to only include swimming athletes from France (country_id \u003d 1) and then calculates the total number of medals won by each athlete. It then returns the athlete_id and total medals for the athlete with the most medals." +}, { + "id": "2627", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of home runs hit in a single game in the J-League, excluding games with less than 3 home runs hit?", + "sql_context": "CREATE TABLE J_League_Matches (MatchID INT, HomeTeam VARCHAR(50), AwayTeam VARCHAR(50), HomeRuns INT, AwayRuns INT); INSERT INTO J_League_Matches (MatchID, HomeTeam, AwayTeam, HomeRuns, AwayRuns) VALUES (1, \u0027Kashima Antlers\u0027, \u0027Urawa Red Diamonds\u0027, 2, 1);", + "sql": "SELECT AVG(HomeRuns + AwayRuns) FROM J_League_Matches WHERE (HomeRuns + AwayRuns) \u003e\u003d 3 GROUP BY (HomeRuns + AwayRuns);", + "sql_explanation": "1. Filters the J_League_Matches table for records where the sum of HomeRuns and AwayRuns is greater than or equal to 3. 2. Groups the filtered records by the sum of HomeRuns and AwayRuns. 3. Uses the AVG function to calculate the average number of home runs hit in a single game for the qualifying records." +}, { + "id": "2850", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which team had the highest total points in the 2022 NFL season?", + "sql_context": "CREATE TABLE nfl_scores (team TEXT, total_points INT); INSERT INTO nfl_scores (team, total_points) VALUES (\u0027Patriots\u0027, 523), (\u0027Eagles\u0027, 518), (\u0027Chiefs\u0027, 516);", + "sql": "SELECT team, SUM(total_points) as total_points FROM nfl_scores GROUP BY team ORDER BY total_points DESC LIMIT 1;", + "sql_explanation": "The SQL query calculates the total points for each team by grouping the records by the team column and using the SUM function. Then, it orders the results in descending order by the total points and returns the top record." +}, { + "id": "3043", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average time spent on ice by each defenseman in the 2021-2022 NHL season?", + "sql_context": "CREATE TABLE nhl_season (player_id INT, player_name VARCHAR(50), team_id INT, team_name VARCHAR(50), position VARCHAR(50), games_played INT, time_on_ice INT); INSERT INTO nhl_season (player_id, player_name, team_id, team_name, position, games_played, time_on_ice) VALUES (1, \u0027Victor Hedman\u0027, 1, \u0027Tampa Bay Lightning\u0027, \u0027D\u0027, 82, 2000);", + "sql": "SELECT player_name, AVG(time_on_ice) as avg_time FROM nhl_season WHERE position \u003d \u0027D\u0027 GROUP BY player_name;", + "sql_explanation": "The query calculates the average time spent on ice per game for each defenseman in the 2021-2022 NHL season. It filters the records by position (D) and calculates the average of the time_on_ice column for each player." +}, { + "id": "3370", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the most common foul in the \u0027basketball_fouls\u0027 table?", + "sql_context": "CREATE TABLE basketball_teams (team_id INT, name VARCHAR(50)); CREATE TABLE basketball_players (player_id INT, name VARCHAR(50), team_id INT); CREATE TABLE basketball_fouls (foul_id INT, player_id INT, type VARCHAR(50)); INSERT INTO basketball_teams (team_id, name) VALUES (1, \u0027Chicago Bulls\u0027), (2, \u0027Los Angeles Lakers\u0027); INSERT INTO basketball_players (player_id, name, team_id) VALUES (1, \u0027Michael Jordan\u0027, 1), (2, \u0027Kobe Bryant\u0027, 2); INSERT INTO basketball_fouls (foul_id, player_id, type) VALUES (1, 1, \u0027Block\u0027), (2, 1, \u0027Charge\u0027), (3, 2, \u0027Block\u0027), (4, 2, \u0027Charge\u0027), (5, 2, \u0027Travel\u0027);", + "sql": "SELECT type AS most_common_foul FROM basketball_fouls GROUP BY type ORDER BY COUNT(*) DESC LIMIT 1;", + "sql_explanation": "The SQL query calculates the most common foul in the \u0027basketball_fouls\u0027 table by using the GROUP BY clause to group the \u0027basketball_fouls\u0027 table by the \u0027type\u0027 column. The query then uses the ORDER BY clause to sort the results by the count of each type of foul in descending order. The query then returns the most common foul by using the LIMIT clause to return only the first row." +}, { + "id": "3609", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the highest scoring game for each team in the 2021 NFL season.", + "sql_context": "CREATE TABLE teams (team_id INT, team_name TEXT, games_played INT, points_per_game DECIMAL(5,2)); INSERT INTO teams (team_id, team_name, games_played, points_per_game) VALUES (1, \u0027Tampa Bay Buccaneers\u0027, 17, 30.1), (2, \u0027Kansas City Chiefs\u0027, 17, 28.2);", + "sql": "SELECT team_name, MAX(points_per_game) AS highest_scoring_game FROM teams GROUP BY team_name;", + "sql_explanation": "Find the maximum points per game for each team by grouping the records by team name and selecting the maximum points per game." +}, { + "id": "4226", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the players and their average points per game in the \"nba_games\" table", + "sql_context": "CREATE TABLE nba_games (player VARCHAR(255), points INTEGER, games_played INTEGER);", + "sql": "SELECT player, AVG(points) as avg_points_per_game FROM nba_games GROUP BY player;", + "sql_explanation": "This query lists the players and their average points per game in the \"nba_games\" table by grouping the table by the \"player\" column and calculating the average of the \"points\" column for each group." +}, { + "id": "4746", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of games won by each baseball team in the MLB?", + "sql_context": "CREATE TABLE baseball_teams (id INT, team_name VARCHAR(50), games_played INT, games_won INT); INSERT INTO baseball_teams (id, team_name, games_played, games_won) VALUES (1, \u0027Yankees\u0027, 162, 90), (2, \u0027Red Sox\u0027, 162, 85), (3, \u0027Dodgers\u0027, 162, 95);", + "sql": "SELECT team_name, SUM(games_won) FROM baseball_teams GROUP BY team_name;", + "sql_explanation": "We group the records in the baseball_teams table by the team_name column and calculate the total number of games won for each team using the SUM function." +}, { + "id": "4888", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of goals scored by each team in the UEFA Champions League?", + "sql_context": "CREATE TABLE Teams (TeamID INT, TeamName VARCHAR(100), Goals INT); INSERT INTO Teams (TeamID, TeamName, Goals) VALUES (1, \u0027Barcelona\u0027, 12), (2, \u0027Real Madrid\u0027, 15), (3, \u0027Bayern Munich\u0027, 18);", + "sql": "SELECT TeamName, AVG(Goals) as AvgGoals FROM Teams GROUP BY TeamName;", + "sql_explanation": "This SQL query calculates the average number of goals scored by each team in the Teams table. It uses the AVG function to find the average goals and groups the result by TeamName to get the average for each team." +}, { + "id": "5014", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the total salaries of players in each position for the hockey_players table?", + "sql_context": "CREATE TABLE hockey_players (player_id INT, name VARCHAR(50), position VARCHAR(20), team VARCHAR(50), salary DECIMAL(10, 2)); INSERT INTO hockey_players (player_id, name, position, team, salary) VALUES (1, \u0027Alex Ovechkin\u0027, \u0027Left Wing\u0027, \u0027Washington Capitals\u0027, 10000000.00); INSERT INTO hockey_players (player_id, name, position, team, salary) VALUES (2, \u0027Sidney Crosby\u0027, \u0027Center\u0027, \u0027Pittsburgh Penguins\u0027, 11000000.00);", + "sql": "SELECT position, SUM(salary) FROM hockey_players GROUP BY position;", + "sql_explanation": "This query uses the SUM() function to calculate the total salary for each position in the hockey_players table, and the GROUP BY clause to group the results by the position column." +}, { + "id": "5086", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average running speed for each player in the 2020 World Cup?", + "sql_context": "CREATE TABLE world_cup (player VARCHAR(255), match_id INT, running_speed FLOAT); INSERT INTO world_cup (player, match_id, running_speed) VALUES (\u0027Messi\u0027, 1, 25), (\u0027Messi\u0027, 2, 26), (\u0027Ronaldo\u0027, 1, 24), (\u0027Ronaldo\u0027, 2, 27);", + "sql": "SELECT player, AVG(running_speed) FROM world_cup GROUP BY player;", + "sql_explanation": "Join the world_cup and teams tables, group by player, and calculate the average running speed for each player in the 2020 World Cup." +}, { + "id": "5278", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of fans engaging with each team\u0027s social media on a daily basis?", + "sql_context": "CREATE TABLE social_media (team VARCHAR(255), date DATE, engagement INT); INSERT INTO social_media (team, date, engagement) VALUES (\u0027Bears\u0027, \u00272022-01-01\u0027, 5000), (\u0027Bears\u0027, \u00272022-01-02\u0027, 6000), (\u0027Bills\u0027, \u00272022-01-01\u0027, 3000), (\u0027Bills\u0027, \u00272022-01-02\u0027, 4000);", + "sql": "SELECT team, AVG(engagement) FROM social_media GROUP BY team;", + "sql_explanation": "Join the social_media and teams tables, group by team, and calculate the average daily fan engagement with each team\u0027s social media." +}, { + "id": "5654", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of games won by any team in a single season?", + "sql_context": "CREATE TABLE games (id INT, team TEXT, season INT, home_or_away TEXT, wins INT, losses INT); INSERT INTO games (id, team, season, home_or_away, wins, losses) VALUES (1, \u0027Team A\u0027, 2020, \u0027Home\u0027, 35, 10); INSERT INTO games (id, team, season, home_or_away, wins, losses) VALUES (2, \u0027Team B\u0027, 2020, \u0027Away\u0027, 28, 17);", + "sql": "SELECT team, MAX(wins) FROM games GROUP BY team;", + "sql_explanation": "Group teams by season, and find the maximum number of wins for each team." +}, { + "id": "1111", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Rank the top 3 countries with the highest increase in military spending between 2016 and 2018 in the North Atlantic Treaty Organization (NATO)?", + "sql_context": "CREATE TABLE MilitarySpending (Year INT, Country VARCHAR(50), Spending FLOAT, Region VARCHAR(50)); INSERT INTO MilitarySpending (Year, Country, Spending, Region) VALUES (2016, \u0027United States\u0027, 600.1, \u0027NATO\u0027), (2016, \u0027United Kingdom\u0027, 45.6, \u0027NATO\u0027), (2017, \u0027United States\u0027, 615.4, \u0027NATO\u0027);", + "sql": "SELECT Country, MAX(Spending) - MIN(Spending) as Increase FROM MilitarySpending WHERE Year IN (2016, 2018) AND Region \u003d \u0027NATO\u0027 GROUP BY Country ORDER BY Increase DESC LIMIT 3;", + "sql_explanation": "Rank the top 3 countries with the highest increase in military spending between 2016 and 2018 in the North Atlantic Treaty Organization (NATO)." +}, { + "id": "1345", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 3 countries that have received military training from Japan since 2015?", + "sql_context": "CREATE TABLE military_training (country VARCHAR(255), trainee_count INT, training_date DATE);", + "sql": "SELECT country, SUM(trainee_count) as total_trained FROM military_training WHERE training_date \u003e\u003d \u00272015-01-01\u0027 GROUP BY country ORDER BY total_trained DESC LIMIT 3;", + "sql_explanation": "This query lists the top 3 countries that have received military training from Japan since 2015 by grouping by country and summing the trainee count, then ordering by the total trainee count in descending order and limiting the results to the top 3." +}, { + "id": "1659", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 2 countries with the most peacekeeping operations since 2016?", + "sql_context": "CREATE TABLE PeacekeepingOperations (Country VARCHAR(50), Year INT, Operations INT); INSERT INTO PeacekeepingOperations (Country, Year, Operations) VALUES (\u0027France\u0027, 2016, 6), (\u0027USA\u0027, 2016, 8), (\u0027UK\u0027, 2016, 7), (\u0027France\u0027, 2017, 7), (\u0027USA\u0027, 2017, 9), (\u0027UK\u0027, 2017, 8), (\u0027France\u0027, 2018, 8), (\u0027USA\u0027, 2018, 10), (\u0027UK\u0027, 2018, 9);", + "sql": "SELECT Country, SUM(Operations) AS TotalOperations FROM PeacekeepingOperations GROUP BY Country ORDER BY TotalOperations DESC FETCH FIRST 2 ROWS ONLY;", + "sql_explanation": "This query lists the top 2 countries with the most peacekeeping operations since 2016. It calculates the total operations for each country using the SUM() function and the GROUP BY clause. Then, it orders the results by the total operations in descending order and returns the top 2 rows using the FETCH FIRST clause." +}, { + "id": "1932", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average military innovation budget (in USD) for each organization in the \u0027military_innovation\u0027 table, excluding those with less than 2 projects, ordered by the average budget in descending order?", + "sql_context": "CREATE TABLE military_innovation (id INT, organization VARCHAR(50), budget INT);", + "sql": "SELECT organization, AVG(budget) as avg_budget FROM military_innovation GROUP BY organization HAVING COUNT(*) \u003e\u003d 2 ORDER BY avg_budget DESC;", + "sql_explanation": "The query starts by grouping the records in the \u0027military_innovation\u0027 table by \u0027organization\u0027 and calculating the average budget for each organization using the AVG function on the \u0027budget\u0027 column. The HAVING clause filters out organizations with less than 2 projects. Finally, the ORDER BY clause sorts the results by the average budget in descending order." +}, { + "id": "2038", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names of countries that received humanitarian assistance in Q1 and Q3 of 2022?", + "sql_context": "CREATE TABLE Humanitarian_Assistance (Country VARCHAR(100), Quarter INT, Year INT);", + "sql": "SELECT Country FROM Humanitarian_Assistance WHERE Year \u003d 2022 AND Quarter IN (1, 3) GROUP BY Country HAVING COUNT(DISTINCT Quarter) \u003d 2;", + "sql_explanation": "This query lists the names of countries that received humanitarian assistance in Q1 and Q3 of 2022 by selecting the country column from the Humanitarian_Assistance table where the year is 2022 and the quarter is either 1 or 3, grouping the results by country, and filtering the groups that have exactly 2 distinct quarters." +}, { + "id": "2074", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the total aid received by countries in Africa, excluding Egypt, from all aid agencies.", + "sql_context": "CREATE TABLE aid_agencies (country VARCHAR(50), aid_received INT);", + "sql": "SELECT country, SUM(aid_received) AS total_aid FROM aid_agencies WHERE country !\u003d \u0027Egypt\u0027 AND country LIKE \u0027%Africa%\u0027 GROUP BY country;", + "sql_explanation": "First, we filter the aid_agencies table to only include countries from Africa (excluding Egypt) using the WHERE clause and the LIKE operator. Then, using the SUM aggregate function, we calculate the total aid received for each country in the filtered table." +}, { + "id": "2113", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total defense diplomacy events in 2020 and 2021", + "sql_context": "CREATE TABLE defense_diplomacy (event_date DATE, event_type VARCHAR(255)); INSERT INTO defense_diplomacy (event_date, event_type) VALUES (\u00272020-01-01\u0027, \u0027Summit\u0027), (\u00272021-01-01\u0027, \u0027Conference\u0027), (\u00272020-06-01\u0027, \u0027Workshop\u0027);", + "sql": "SELECT YEAR(event_date) as year, COUNT(*) as total_events FROM defense_diplomacy WHERE YEAR(event_date) IN (2020, 2021) GROUP BY year;", + "sql_explanation": "This query groups the defense diplomacy events by year and then counts the number of events in each group. It returns the total defense diplomacy events in 2020 and 2021." +}, { + "id": "2289", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total defense spending for each country in North America?", + "sql_context": "CREATE TABLE defense_spending_3 (country VARCHAR(50), continent VARCHAR(50), amount DECIMAL(10,2)); INSERT INTO defense_spending_3 (country, continent, amount) VALUES (\u0027USA\u0027, \u0027North America\u0027, 74000000000), (\u0027Canada\u0027, \u0027North America\u0027, 21000000000), (\u0027Mexico\u0027, \u0027North America\u0027, 6000000000);", + "sql": "SELECT country, SUM(amount) as total_defense_spending FROM defense_spending_3 WHERE continent \u003d \u0027North America\u0027 GROUP BY country;", + "sql_explanation": "This query groups the defense spending by country and sums the amount of defense spending if the continent is North America." +}, { + "id": "2565", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of humanitarian assistance operations in the \u0027europe\u0027 region?", + "sql_context": "CREATE TABLE humanitarian_assistance (country VARCHAR(50), region VARCHAR(50), operations INT); INSERT INTO humanitarian_assistance (country, region, operations) VALUES (\u0027France\u0027, \u0027Europe\u0027, 30), (\u0027Germany\u0027, \u0027Europe\u0027, 40), (\u0027UK\u0027, \u0027Europe\u0027, 50);", + "sql": "SELECT region, SUM(operations) as total_operations FROM humanitarian_assistance WHERE region \u003d \u0027Europe\u0027 GROUP BY region;", + "sql_explanation": "Calculate the total number of humanitarian assistance operations in the \u0027Europe\u0027 region." +}, { + "id": "2762", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average defense spending as a percentage of GDP for each country in Q2 2022?", + "sql_context": "CREATE TABLE defense_spending (country VARCHAR(255), defense_spending_gdp NUMERIC, quarter INT, year INT); INSERT INTO defense_spending (country, defense_spending_gdp, quarter, year) VALUES (\u0027USA\u0027, 3.5, 2, 2022), (\u0027China\u0027, 2.0, 2, 2022), (\u0027India\u0027, 2.5, 2, 2022);", + "sql": "SELECT country, AVG(defense_spending_gdp) FROM defense_spending WHERE quarter \u003d 2 AND year \u003d 2022 GROUP BY country;", + "sql_explanation": "This query calculates the average defense spending as a percentage of GDP for each country in Q2 2022. It filters the data where quarter is 2 and year is 2022, then groups the results by country and calculates the average of the defense spending as a percentage of GDP for each group." +}, { + "id": "3131", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many humanitarian assistance missions were conducted by each unit in Q1 of 2019?", + "sql_context": "CREATE TABLE HumanitarianAssistanceByQuarter (Quarter VARCHAR(10), Unit VARCHAR(50), Missions INT); INSERT INTO HumanitarianAssistanceByQuarter (Quarter, Unit, Missions) VALUES (\u0027Q1 2019\u0027, \u0027Unit A\u0027, 4), (\u0027Q1 2019\u0027, \u0027Unit B\u0027, 5), (\u0027Q1 2019\u0027, \u0027Unit C\u0027, 6);", + "sql": "SELECT Unit, SUM(Missions) FROM HumanitarianAssistanceByQuarter WHERE Quarter \u003d \u0027Q1 2019\u0027 GROUP BY Unit;", + "sql_explanation": "This query calculates the total number of humanitarian assistance missions conducted by each unit in Q1 of 2019. It groups the data by unit and calculates the total number of missions for each group." +}, { + "id": "4098", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of peacekeeping operations participated in by each country in the \u0027peacekeeping\u0027 table, excluding those with less than 3 operations?", + "sql_context": "CREATE TABLE peacekeeping (id INT, country VARCHAR(50), num_operations INT);", + "sql": "SELECT AVG(num_operations) FROM peacekeeping GROUP BY country HAVING COUNT(*) \u003e\u003d 3;", + "sql_explanation": "First, the query groups the records in the \u0027peacekeeping\u0027 table by country. Then, it calculates the average number of operations for each country by using the AVG function on the \u0027num_operations\u0027 column. Finally, the HAVING clause filters out countries with less than 3 operations." +}, { + "id": "1227", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which factories produced the most garments in 2022?", + "sql_context": "CREATE TABLE factory_production (factory_id INT, year INT, garments_produced INT);", + "sql": "SELECT factory_id, SUM(garments_produced) AS total_garments_produced FROM factory_production WHERE year \u003d 2022 GROUP BY factory_id ORDER BY total_garments_produced DESC;", + "sql_explanation": "The SQL query retrieves the factories that produced the most garments in 2022 by grouping the records by factory_id, calculating the sum of garments_produced for each group in 2022, and sorting the results in descending order based on total_garments_produced." +}, { + "id": "1324", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the average monthly CO2 emission for each manufacturing facility in 2021?", + "sql_context": "CREATE TABLE co2_emissions (facility_id INT, facility_name VARCHAR(255), emission_date DATE, co2_emission DECIMAL(10,2)); INSERT INTO co2_emissions (facility_id, facility_name, emission_date, co2_emission) VALUES (1, \u0027Facility A\u0027, \u00272021-01-01\u0027, 500.00), (2, \u0027Facility B\u0027, \u00272021-02-01\u0027, 700.00), (3, \u0027Facility C\u0027, \u00272021-03-01\u0027, 800.00);", + "sql": "SELECT facility_name, AVG(co2_emission) as avg_monthly_emission FROM co2_emissions WHERE emission_date BETWEEN \u00272021-01-01\u0027 AND \u00272021-12-31\u0027 GROUP BY facility_name;", + "sql_explanation": "This query calculates the average monthly CO2 emissions for each manufacturing facility in 2021 by averaging the CO2 emissions for each facility where the emission date falls between January 1, 2021 and December 31, 2021. It then groups the results by facility name to get the average monthly CO2 emissions for each facility." +}, { + "id": "1326", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the number of units sold for each product in Australia in Q4 2020 and Q1 2021?", + "sql_context": "CREATE TABLE product_sales (product_name VARCHAR(30), country VARCHAR(20), quarter INT, year INT, units_sold INT); INSERT INTO product_sales (product_name, country, quarter, year, units_sold) VALUES (\u0027t-shirt\u0027, \u0027Australia\u0027, 4, 2020, 500), (\u0027t-shirt\u0027, \u0027Australia\u0027, 1, 2021, 600), (\u0027jeans\u0027, \u0027Australia\u0027, 4, 2020, 700), (\u0027jeans\u0027, \u0027Australia\u0027, 1, 2021, 800);", + "sql": "SELECT product_name, SUM(units_sold) FROM product_sales WHERE country \u003d \u0027Australia\u0027 AND (quarter \u003d 4 OR quarter \u003d 1) AND year IN (2020, 2021) GROUP BY product_name;", + "sql_explanation": "The SQL query calculates the number of units of each product sold in Australia in Q4 2020 and Q1 2021 by grouping the records by product_name and calculating the sum of units_sold for each group." +}, { + "id": "1459", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total CO2 emissions by country in the textile industry in 2022?", + "sql_context": "CREATE TABLE co2_emissions_textile_2022 (country VARCHAR(50), co2_emissions DECIMAL(10,2), date DATE);", + "sql": "SELECT country, SUM(co2_emissions) AS total_co2_emissions FROM co2_emissions_textile_2022 WHERE date \u003e\u003d \u00272022-01-01\u0027 AND date \u003c \u00272023-01-01\u0027 GROUP BY country;", + "sql_explanation": "This SQL query calculates the total CO2 emissions by country in the textile industry in 2022. It uses the co2_emissions_textile_2022 table, filters the records to only include emissions in 2022, then groups the results by country. The SUM function calculates the total CO2 emissions for each group." +}, { + "id": "1815", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of customers from each country, and the average order value for each country.", + "sql_context": "CREATE TABLE customer_orders (customer_id INT, country VARCHAR(255), order_date DATE, order_value DECIMAL(10,2)); INSERT INTO customer_orders VALUES (1, \u0027USA\u0027, \u00272022-01-01\u0027, 100.00), (2, \u0027Canada\u0027, \u00272022-01-05\u0027, 200.00), (3, \u0027Mexico\u0027, \u00272022-02-01\u0027, 300.00), (4, \u0027USA\u0027, \u00272022-02-03\u0027, 400.00), (5, \u0027Canada\u0027, \u00272022-03-01\u0027, 500.00);", + "sql": "SELECT country, COUNT(*) as num_customers, AVG(order_value) as avg_order_value FROM customer_orders GROUP BY country ORDER BY num_customers DESC;", + "sql_explanation": "The SQL query groups the rows by country and calculates the number of customers from each country using the COUNT function and the average order value for each country using the AVG function. Finally, the rows are sorted by the number of customers in descending order." +}, { + "id": "1860", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total sales amount for each product category by country in 2022, excluding online sales?", + "sql_context": "CREATE TABLE sales_2022 AS SELECT * FROM sales WHERE sale_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-12-31\u0027; ALTER TABLE sales_2022 ADD COLUMN sale_country VARCHAR(50); UPDATE sales_2022 SET sale_country \u003d CASE WHEN sale_channel \u003d \u0027Online\u0027 THEN \u0027Online\u0027 ELSE sale_city END; ALTER TABLE sales_2022 ADD COLUMN product_category VARCHAR(50); UPDATE sales_2022 SET product_category \u003d CASE WHEN product_id \u003d 1 THEN \u0027Tops\u0027 WHEN product_id \u003d 2 THEN \u0027Bottoms\u0027 WHEN product_id \u003d 3 THEN \u0027Outerwear\u0027 WHEN product_id \u003d 4 THEN \u0027Accessories\u0027 END;", + "sql": "SELECT sale_country, product_category, SUM(sale_amount) FROM sales_2022 WHERE sale_country !\u003d \u0027Online\u0027 GROUP BY sale_country, product_category;", + "sql_explanation": "The query creates a new table for sales data in 2022, adds a sale_country column, updates the sale_country values based on sale_channel, adds a product_category column, and updates the product_category values based on product_id. Then, the query groups the results by sale_country and product_category, excluding online sales, and calculates the sum of the sale_amount for each group using the GROUP BY and SUM aggregate functions." +}, { + "id": "2106", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total revenue for each product category in Q3 2021?", + "sql_context": "CREATE TABLE sales (product_category VARCHAR(255), sales_amount NUMERIC, sale_date DATE); INSERT INTO sales (product_category, sales_amount, sale_date) VALUES (\u0027men_shirts\u0027, 500, \u00272021-07-01\u0027); INSERT INTO sales (product_category, sales_amount, sale_date) VALUES (\u0027women_pants\u0027, 800, \u00272021-07-02\u0027); INSERT INTO sales (product_category, sales_amount, sale_date) VALUES (\u0027children_dresses\u0027, 400, \u00272021-07-03\u0027);", + "sql": "SELECT product_category, SUM(sales_amount) FROM sales WHERE sale_date BETWEEN \u00272021-07-01\u0027 AND \u00272021-09-30\u0027 GROUP BY product_category;", + "sql_explanation": "The query calculates the total revenue for each product category in Q3 2021 by summing the sales_amount for each product_category and filtering the sale_date within Q3 2021 (July 1, 2021 to September 30, 2021), then grouping the results by product_category." +}, { + "id": "2653", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total quantity of garments produced in Southeast Asia by garment type in 2022?", + "sql_context": "CREATE TABLE garment_production_2022 AS SELECT * FROM garment_production WHERE production_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-12-31\u0027; ALTER TABLE garment_production_2022 ADD COLUMN country_region VARCHAR(50); UPDATE garment_production_2022 SET country_region \u003d CASE WHEN country \u003d \u0027Indonesia\u0027 THEN \u0027Southeast Asia\u0027 WHEN country \u003d \u0027Vietnam\u0027 THEN \u0027Southeast Asia\u0027 ELSE country_region END;", + "sql": "SELECT country_region, garment_type, SUM(quantity) FROM garment_production_2022 GROUP BY country_region, garment_type;", + "sql_explanation": "The query creates a new table for garment production data in 2022, adds a country_region column, updates the country_region values based on country, and groups the results by country_region and garment_type. Then, the query calculates the sum of the quantity for each group using the GROUP BY and SUM aggregate functions, filtering for Southeast Asia." +}, { + "id": "2709", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 3 products with the highest revenue, and their corresponding revenues.", + "sql_context": "CREATE TABLE sales (sale_id INT, product_id INT, revenue DECIMAL(10,2)); INSERT INTO sales VALUES (1, 1, 100.00), (2, 1, 200.00), (3, 2, 300.00), (4, 2, 400.00), (5, 3, 500.00), (6, 3, 600.00);", + "sql": "SELECT product_id, SUM(revenue) as total_revenue FROM sales GROUP BY product_id ORDER BY total_revenue DESC LIMIT 3;", + "sql_explanation": "The SQL query calculates the total revenue for each product by grouping them using the GROUP BY clause and summing up their revenues using the SUM function. The ORDER BY clause sorts the products by their total revenue in descending order, and the LIMIT clause selects the top 3 products with the highest revenues." +}, { + "id": "2953", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many units of each fabric type were used in manufacturing for the Spring 2022 collection?", + "sql_context": "CREATE TABLE manufacturing (manufacturing_id INT, fabric_type VARCHAR(50), collection VARCHAR(50), units_used INT);", + "sql": "SELECT fabric_type, SUM(units_used) FROM manufacturing WHERE collection \u003d \u0027Spring 2022\u0027 GROUP BY fabric_type;", + "sql_explanation": "The SQL query calculates the number of units of each fabric type used in manufacturing for the Spring 2022 collection by summing the units_used for each fabric_type with a collection of \u0027Spring 2022\u0027, grouped by fabric_type." +}, { + "id": "3239", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average CO2 emission per garment by country of origin?", + "sql_context": "CREATE TABLE garments (garment_id INT, country_origin VARCHAR(50), co2_emission DECIMAL(10,2));", + "sql": "SELECT country_origin, AVG(co2_emission) AS avg_co2_per_garment FROM garments GROUP BY country_origin;", + "sql_explanation": "The SQL query calculates the average CO2 emission per garment by grouping the records by country_origin and computing the average CO2 emission for each group." +}, { + "id": "3750", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many garments in the \u0027Knit\u0027 category were sold per month in 2023?", + "sql_context": "CREATE TABLE garments (id INT PRIMARY KEY, category VARCHAR(255), price DECIMAL(10,2)); CREATE TABLE sales (id INT PRIMARY KEY, garment_id INT, date DATE, quantity INT); CREATE VIEW sales_by_month_and_category AS SELECT YEAR(date) as sales_year, MONTH(date) as sales_month, category, SUM(quantity) as total_sold FROM sales JOIN garments ON sales.garment_id \u003d garments.id WHERE YEAR(date) \u003d 2023 AND category \u003d \u0027Knit\u0027 GROUP BY sales_year, sales_month, category;", + "sql": "SELECT sales_month, SUM(total_sold) FROM sales_by_month_and_category GROUP BY sales_month;", + "sql_explanation": "This query calculates the number of garments in the \u0027Knit\u0027 category sold per month in 2023. It does so by querying the \u0027sales_by_month_and_category\u0027 view and filtering for the year 2023 and the \u0027Knit\u0027 category. It then groups the results by sales month and calculates the total sales per month." +}, { + "id": "3982", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many garments were sold in each region with a price greater than \u0027150\u0027?", + "sql_context": "CREATE TABLE sales (id INT, salesperson_id INT, garment_id INT, region TEXT, price INT); INSERT INTO sales (id, salesperson_id, garment_id, region, price) VALUES (1, 1, 1, \u0027Paris\u0027, 150), (2, 1, 2, \u0027London\u0027, 120), (3, 2, 3, \u0027Paris\u0027, 180), (4, 2, 4, \u0027London\u0027, 110), (5, 3, 5, \u0027Berlin\u0027, 200), (6, 3, 6, \u0027Berlin\u0027, 160);", + "sql": "SELECT region, COUNT(*) AS garments_sold FROM sales WHERE price \u003e 150 GROUP BY region;", + "sql_explanation": "This SQL query calculates the number of garments sold in each region with a price greater than \u0027150\u0027. It does so by filtering the \u0027sales\u0027 table based on the \u0027price\u0027 column using the WHERE clause and then using the GROUP BY clause to group the results by region. The COUNT function is used to count the number of garments sold in each region." +}, { + "id": "463", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of financial wellbeing scores for customers in California?", + "sql_context": "CREATE TABLE customers (customer_id INT, name VARCHAR(255), state VARCHAR(255), financial_wellbeing_score INT);", + "sql": "SELECT state, COUNT(*) as count, MIN(financial_wellbeing_score) as min_score, AVG(financial_wellbeing_score) as avg_score, MAX(financial_wellbeing_score) as max_score FROM customers WHERE state \u003d \u0027California\u0027 GROUP BY state;", + "sql_explanation": "This SQL query calculates the distribution of financial wellbeing scores for customers in California. It does this by using the COUNT, MIN, AVG, and MAX functions on the financial_wellbeing_score column and filtering the customers table for rows where the state is \u0027California\u0027. It then groups the result by the state column." +}, { + "id": "675", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Number of Shariah-compliant finance institutions in Southeast Asia, ordered by institution_count descending.", + "sql_context": "CREATE TABLE shariah_compliant_finance (id INT, country VARCHAR(20), institution VARCHAR(30)); INSERT INTO shariah_compliant_finance (id, country, institution) VALUES (1, \u0027Indonesia\u0027, \u0027Bank Muamalat\u0027), (2, \u0027Malaysia\u0027, \u0027Bank Islam\u0027), (3, \u0027Singapore\u0027, \u0027Maybank Islamic\u0027), (4, \u0027Thailand\u0027, \u0027Bank Islam Thailand\u0027);", + "sql": "SELECT country, COUNT(institution) AS institution_count FROM shariah_compliant_finance WHERE country IN (\u0027Indonesia\u0027, \u0027Malaysia\u0027, \u0027Singapore\u0027, \u0027Thailand\u0027) GROUP BY country ORDER BY institution_count DESC;", + "sql_explanation": "Count the number of Shariah-compliant finance institutions for each country in Southeast Asia and order the results in descending order by the count." +}, { + "id": "1637", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which financial institution in Asia has the highest average loan amount?", + "sql_context": "CREATE SCHEMA if not exists finance;CREATE TABLE if not exists loans (id INT PRIMARY KEY, institution_name TEXT, region TEXT, amount DECIMAL(10,2)); INSERT INTO loans (id, institution_name, region, amount) VALUES (1, \u0027ABC Microfinance\u0027, \u0027Asia\u0027, 5000.00), (2, \u0027DEF Microfinance\u0027, \u0027Asia\u0027, 8000.00), (3, \u0027GHI Microfinance\u0027, \u0027Asia\u0027, 6000.00);", + "sql": "SELECT institution_name, AVG(amount) as avg_amount FROM finance.loans WHERE region \u003d \u0027Asia\u0027 GROUP BY institution_name ORDER BY avg_amount DESC LIMIT 1;", + "sql_explanation": "This query finds the financial institution in Asia with the highest average loan amount by selecting the \u0027institution_name\u0027 and the average \u0027amount\u0027 for each institution in \u0027Asia\u0027. Then, it groups the results by \u0027institution_name\u0027 and orders them in descending order by the average amount. Finally, it limits the results to the top institution." +}, { + "id": "2575", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum balance for each account in the socially responsible lending portfolio?", + "sql_context": "CREATE TABLE socially_responsible_portfolio (account_number INT, balance DECIMAL(10,2), account_type VARCHAR(255));", + "sql": "SELECT account_type, MIN(balance) FROM socially_responsible_portfolio WHERE account_type \u003d \u0027loan\u0027 GROUP BY account_type;", + "sql_explanation": "This query calculates the minimum balance for each account in the socially responsible lending portfolio by selecting the minimum balance for each account where the account_type is loan and grouping by account_type." +}, { + "id": "2996", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total value of loans issued to clients in the Asia-Pacific region, grouped by account type.", + "sql_context": "CREATE TABLE loans (loan_id INT, client_region VARCHAR(20), account_type VARCHAR(20), loan_amount DECIMAL(10, 2)); INSERT INTO loans (loan_id, client_region, account_type, loan_amount) VALUES (1, \u0027Asia-Pacific\u0027, \u0027Shariah Compliant\u0027, 12000.00), (2, \u0027Europe\u0027, \u0027Shariah Compliant\u0027, 9000.00), (3, \u0027Asia-Pacific\u0027, \u0027Conventional\u0027, 15000.00), (4, \u0027North America\u0027, \u0027Conventional\u0027, 10000.00);", + "sql": "SELECT account_type, SUM(loan_amount) FROM loans WHERE client_region \u003d \u0027Asia-Pacific\u0027 GROUP BY account_type;", + "sql_explanation": "The SQL query calculates the total value of loans issued to clients in the Asia-Pacific region, grouped by account type. It filters the loans table by region, returning a sum of the loan_amount column for all rows with a region of \u0027Asia-Pacific\u0027 and groups the results by account type." +}, { + "id": "3891", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total amount of Shariah-compliant financing for each country?", + "sql_context": "CREATE TABLE shariah_financing(client_id INT, country VARCHAR(25), amount FLOAT);INSERT INTO shariah_financing(client_id, country, amount) VALUES (1, \u0027Malaysia\u0027, 5000), (2, \u0027UAE\u0027, 7000), (3, \u0027Indonesia\u0027, 6000), (4, \u0027Saudi Arabia\u0027, 8000);", + "sql": "SELECT country, SUM(amount) as total_financing FROM shariah_financing GROUP BY country;", + "sql_explanation": "This query groups the shariah_financing table by country and calculates the total amount of financing for each country using the SUM() function." +}, { + "id": "725", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 3 mediators by total cases mediated and the total number of successful mediations?", + "sql_context": "CREATE TABLE Mediators (MediatorId INT, MediatorName TEXT, TotalCases INT, SuccessfulMediations INT); INSERT INTO Mediators (MediatorId, MediatorName, TotalCases, SuccessfulMediations) VALUES (1, \u0027Mediator A\u0027, 30, 25), (2, \u0027Mediator B\u0027, 40, 35), (3, \u0027Mediator C\u0027, 50, 45), (4, \u0027Mediator D\u0027, 60, 50), (5, \u0027Mediator E\u0027, 70, 55);", + "sql": "SELECT MediatorName, SUM(TotalCases) AS TotalCases, SUM(SuccessfulMediations) AS SuccessfulMediations FROM Mediators GROUP BY MediatorName ORDER BY TotalCases DESC, SuccessfulMediations DESC LIMIT 3;", + "sql_explanation": "This SQL query lists the top 3 mediators by summing the TotalCases and SuccessfulMediations columns, grouping by MediatorName, ordering by TotalCases and SuccessfulMediations in descending order, and limiting the results to 3 rows." +}, { + "id": "1044", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of restorative justice programs by location, and the number of programs facilitated by \u0027Sarah Lee\u0027?", + "sql_context": "CREATE TABLE restorative_justice_programs (id INT, program_name TEXT, location TEXT, facilitator TEXT, participants INT); INSERT INTO restorative_justice_programs (id, program_name, location, facilitator, participants) VALUES (1, \u0027Victim Offender Mediation\u0027, \u0027Chicago\u0027, \u0027John Smith\u0027, 15), (2, \u0027Restorative Circles\u0027, \u0027Los Angeles\u0027, \u0027Ahmed Rami\u0027, 20), (3, \u0027Victim Empathy Workshop\u0027, \u0027Chicago\u0027, \u0027Sarah Lee\u0027, 12);", + "sql": "SELECT location, COUNT(*) AS total_programs, SUM(CASE WHEN facilitator \u003d \u0027Sarah Lee\u0027 THEN 1 ELSE 0 END) AS sarah_lee_programs FROM restorative_justice_programs GROUP BY location;", + "sql_explanation": "The SQL query calculates the total number of restorative justice programs by location and the number of programs facilitated by \u0027Sarah Lee\u0027. It uses the COUNT and SUM functions with CASE statement to count total programs and programs facilitated by \u0027Sarah Lee\u0027." +}, { + "id": "1116", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of restorative justice sessions attended, by participant\u0027s age group, for cases closed in the past year?", + "sql_context": "CREATE TABLE restorative_justice (id INT, participant_age_group VARCHAR(50), sessions_attended INT, case_closed_date DATE);", + "sql": "SELECT participant_age_group, AVG(sessions_attended) FROM restorative_justice WHERE case_closed_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) GROUP BY participant_age_group;", + "sql_explanation": "The SQL query filters the restorative_justice table for cases closed in the past year, groups the results by participant\u0027s age group, and calculates the average number of sessions attended for each age group." +}, { + "id": "1263", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of traffic violations in justice_database by month in the year 2020?", + "sql_context": "CREATE TABLE traffic_violations (id INT, violation_date DATE, location VARCHAR(255), description VARCHAR(255)); INSERT INTO traffic_violations (id, violation_date, location, description) VALUES (1, \u00272020-01-01\u0027, \u0027New York, NY\u0027, \u0027Speeding\u0027);", + "sql": "SELECT EXTRACT(MONTH FROM violation_date), COUNT(*) FROM traffic_violations WHERE EXTRACT(YEAR FROM violation_date) \u003d 2020 GROUP BY EXTRACT(MONTH FROM violation_date);", + "sql_explanation": "The SQL query extracts the month from the violation_date column and groups the results by month, counting the total number of traffic violations in the justice_database for each month in the year 2020." +}, { + "id": "1561", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of legal cases handled by each lawyer, with a value over $50,000, in the past year?", + "sql_context": "CREATE TABLE legal_cases (id INT, value DECIMAL(10,2), case_date DATE, lawyer VARCHAR(50));", + "sql": "SELECT lawyer, COUNT(*) FROM legal_cases WHERE value \u003e 50000 AND case_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) GROUP BY lawyer HAVING COUNT(*) \u003e 0;", + "sql_explanation": "The SQL query filters the legal_cases table for cases with a value over $50,000 in the past year, groups the results by lawyer, and counts the number of cases for each lawyer." +}, { + "id": "1666", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many individuals have participated in restorative justice programs in the past year, grouped by their age and gender?", + "sql_context": "CREATE TABLE restorative_justice_participants (id INT, age INT, gender TEXT, program_date DATE);", + "sql": "SELECT age, gender, COUNT(*) FROM restorative_justice_participants WHERE program_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) GROUP BY age, gender;", + "sql_explanation": "The query groups the number of participants in restorative justice programs by their age and gender for the past year. It uses the COUNT() aggregation function to count the number of participants and the GROUP BY clause to group the results by age and gender." +}, { + "id": "1723", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of cases heard by each judge in the criminal court, in the last month?", + "sql_context": "CREATE TABLE cases (id INT, date DATE, judge_id INT, court_type VARCHAR(50));", + "sql": "SELECT judge_id, COUNT(*) as total_cases_heard FROM cases WHERE court_type \u003d \u0027Criminal\u0027 AND date \u003e\u003d DATEADD(MONTH, -1, GETDATE()) GROUP BY judge_id;", + "sql_explanation": "This query calculates the total number of cases heard by each judge in the criminal court in the last month. It does this by filtering the cases table to only include rows with a court type of \u0027Criminal\u0027 and a date within the last month, and then grouping the results by judge_id. It then calculates the count of cases for each group." +}, { + "id": "1748", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the most common type of crime committed in each borough, in the last year?", + "sql_context": "CREATE TABLE crimes (id INT, date DATE, borough VARCHAR(50), type VARCHAR(50));", + "sql": "SELECT borough, type, COUNT(*) as count FROM crimes WHERE date \u003e\u003d DATEADD(YEAR, -1, GETDATE()) GROUP BY borough, type ORDER BY borough, count DESC;", + "sql_explanation": "This query calculates the most common type of crime committed in each borough in the last year. It does this by filtering the crimes table to only include rows with a date within the last year and then grouping the results by borough and type. It then orders the results by borough and the number of crimes, and returns the top result for each borough." +}, { + "id": "1875", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of cases heard by restorative justice programs in the US, and how many of those cases resulted in a full or partial agreement, broken down by state?", + "sql_context": "CREATE TABLE us_rj_cases(id INT, state VARCHAR(255), result VARCHAR(255));", + "sql": "SELECT state, SUM(CASE WHEN result IN (\u0027Full Agreement\u0027, \u0027Partial Agreement\u0027) THEN 1 ELSE 0 END) AS agreements FROM us_rj_cases GROUP BY state;", + "sql_explanation": "The query calculates the total number of cases heard by restorative justice programs in the US, broken down by state, by grouping the us_rj_cases table by the state column. It then calculates the number of cases that resulted in a full or partial agreement by counting the number of rows where the result is either \u0027Full Agreement\u0027 or \u0027Partial Agreement\u0027 and using the SUM function to add up the number of agreements for each state." +}, { + "id": "1939", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of criminal justice reform initiatives by type and funding source?", + "sql_context": "CREATE TABLE criminal_justice_reform (initiative_id INT, initiative_type VARCHAR(20), funding_source VARCHAR(20)); INSERT INTO criminal_justice_reform (initiative_id, initiative_type, funding_source) VALUES (1, \u0027Prison Reform\u0027, \u0027Government\u0027), (2, \u0027Bail Reform\u0027, \u0027Non-profit\u0027);", + "sql": "SELECT initiative_type, funding_source, COUNT(*) as total_initiatives FROM criminal_justice_reform GROUP BY initiative_type, funding_source;", + "sql_explanation": "This SQL query groups the \u0027criminal_justice_reform\u0027 table by both the \u0027initiative_type\u0027 and \u0027funding_source\u0027 columns and calculates the count of initiatives for each group, providing the total number of criminal justice reform initiatives by type and funding source." +}, { + "id": "2085", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the earliest HearingDate for each CommunityCourt in descending order?", + "sql_context": "CREATE TABLE CommunityCourts (CourtID INT, District VARCHAR(20)); CREATE TABLE CommunityCourtHearings (HearingID INT, CourtID INT, HearingDate DATE); INSERT INTO CommunityCourts (CourtID, District) VALUES (1, \u0027Downtown\u0027), (2, \u0027Uptown\u0027), (3, \u0027Midtown\u0027); INSERT INTO CommunityCourtHearings (HearingID, CourtID, HearingDate) VALUES (1, 1, \u00272021-06-15\u0027), (2, 1, \u00272021-06-25\u0027), (3, 2, \u00272021-07-20\u0027), (4, 3, \u00272021-08-12\u0027), (5, 3, \u00272021-08-22\u0027);", + "sql": "SELECT CourtID, MIN(HearingDate) as EarliestHearingDate FROM CommunityCourtHearings GROUP BY CourtID ORDER BY EarliestHearingDate DESC;", + "sql_explanation": "The SQL query uses the MIN() function to find the earliest HearingDate for each CommunityCourt. The GROUP BY clause groups the results by CourtID, and the ORDER BY clause sorts the results by the earliest HearingDate in descending order." +}, { + "id": "2096", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average duration of successful restorative justice sessions in the justice_schemas.restorative_sessions table, grouped by type of session?", + "sql_context": "CREATE TABLE justice_schemas.restorative_sessions (id INT PRIMARY KEY, session_type TEXT, duration_minutes INT, was_successful BOOLEAN);", + "sql": "SELECT session_type, AVG(duration_minutes) FROM justice_schemas.restorative_sessions WHERE was_successful \u003d TRUE GROUP BY session_type;", + "sql_explanation": "This query calculates the average duration (in minutes) for successful restorative justice sessions in the justice_schemas.restorative_sessions table, grouped by the session_type. Only sessions marked as successful are included." +}, { + "id": "2469", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of cases in each court, broken down by case type, case status, and year?", + "sql_context": "CREATE TABLE CourtCases (CourtName text, CaseType text, CaseStatus text, Year int, NumCases int); INSERT INTO CourtCases VALUES (\u0027Court1\u0027, \u0027Assault\u0027, \u0027Open\u0027, 2022, 30, \u00272022-01-01\u0027), (\u0027Court1\u0027, \u0027Theft\u0027, \u0027Closed\u0027, 2022, 25, \u00272022-01-01\u0027), (\u0027Court2\u0027, \u0027Assault\u0027, \u0027Open\u0027, 2022, 28, \u00272022-01-01\u0027), (\u0027Court2\u0027, \u0027Theft\u0027, \u0027Closed\u0027, 2022, 22, \u00272022-01-01\u0027);", + "sql": "SELECT CourtName, CaseType, CaseStatus, Year, SUM(NumCases) FROM CourtCases GROUP BY CourtName, CaseType, CaseStatus, Year;", + "sql_explanation": "The SQL query uses the GROUP BY clause to group the records by CourtName, CaseType, CaseStatus, and Year, then calculates the sum of NumCases for each group." +}, { + "id": "2741", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average number of community service hours per offender in each state, excluding offenders with no hours.", + "sql_context": "CREATE TABLE offenders (id INT, name TEXT, state TEXT, community_service_hours INT); INSERT INTO offenders (id, name, state, community_service_hours) VALUES (1, \u0027John Doe\u0027, \u0027Washington\u0027, 50); INSERT INTO offenders (id, name, state, community_service_hours) VALUES (2, \u0027Jane Smith\u0027, \u0027Oregon\u0027, 75); INSERT INTO offenders (id, name, state, community_service_hours) VALUES (3, \u0027Mike Brown\u0027, \u0027Washington\u0027, 100); INSERT INTO offenders (id, name, state, community_service_hours) VALUES (4, \u0027Maria Garcia\u0027, \u0027California\u0027, 150);", + "sql": "SELECT state, AVG(community_service_hours) FROM offenders WHERE community_service_hours IS NOT NULL GROUP BY state;", + "sql_explanation": "This query selects the state and the average of community_service_hours columns from the offenders table, where community_service_hours is not NULL, and groups the results by state." +}, { + "id": "2743", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average fine amount issued in traffic court, broken down by the day of the week?", + "sql_context": "CREATE TABLE traffic_court_fines (id INT, fine_amount DECIMAL(5,2), fine_date DATE);", + "sql": "SELECT DATE_FORMAT(fine_date, \u0027%W\u0027) AS day_of_week, AVG(fine_amount) FROM traffic_court_fines GROUP BY day_of_week;", + "sql_explanation": "The query calculates the average fine amount issued in traffic court, grouped by the day of the week. It uses the AVG() aggregation function to calculate the average fine amount and the DATE_FORMAT() function to extract the day of the week from the fine date. The results are then grouped by the day of the week." +}, { + "id": "2901", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 3 contributors to criminal justice reform in Canada by funding amount?", + "sql_context": "CREATE TABLE funding (funding_id INT, contributor VARCHAR(50), amount INT, region VARCHAR(20)); INSERT INTO funding (funding_id, contributor, amount, region) VALUES (1, \u0027Contributor A\u0027, 150000, \u0027Canada\u0027), (2, \u0027Contributor B\u0027, 250000, \u0027Canada\u0027);", + "sql": "SELECT contributor FROM funding WHERE region \u003d \u0027Canada\u0027 GROUP BY contributor ORDER BY SUM(amount) DESC LIMIT 3;", + "sql_explanation": "Filters funding table for Canada region. Groups by contributor and orders by total funding amount in descending order. Returns top 3 contributors." +}, { + "id": "3396", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many legal technology patents were granted per year?", + "sql_context": "CREATE TABLE years (year INT); INSERT INTO years (year) VALUES (2015), (2016), (2017), (2018), (2019), (2020); CREATE TABLE patents (id INT, year INT, granted BOOLEAN); INSERT INTO patents (id, year, granted) VALUES (1, 2015, TRUE), (2, 2016, TRUE), (3, 2017, FALSE), (4, 2018, TRUE), (5, 2019, FALSE), (6, 2020, TRUE);", + "sql": "SELECT p.year, COUNT(p.id) AS total_patents FROM patents p WHERE p.granted \u003d TRUE GROUP BY p.year;", + "sql_explanation": "This SQL query calculates the total number of legal technology patents granted per year by filtering the records where granted is TRUE and grouping the records based on the year column." +}, { + "id": "3485", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of cases heard by each judge, ordered by total cases in descending order?", + "sql_context": "CREATE TABLE judges (judge_id INT, name VARCHAR(50), court_id INT); INSERT INTO judges (judge_id, name, court_id) VALUES (1, \u0027John Doe\u0027, 1001), (2, \u0027Jane Smith\u0027, 1002), (3, \u0027Robert Johnson\u0027, 1003); CREATE TABLE cases (case_id INT, judge_id INT, court_date DATE); INSERT INTO cases (case_id, judge_id, court_date) VALUES (101, 1, \u00272021-01-01\u0027), (102, 1, \u00272021-02-01\u0027), (103, 2, \u00272021-03-01\u0027), (104, 3, \u00272021-04-01\u0027), (105, 3, \u00272021-05-01\u0027);", + "sql": "SELECT judge_id, COUNT(*) as total_cases FROM cases GROUP BY judge_id ORDER BY total_cases DESC;", + "sql_explanation": "This SQL query calculates the total number of cases for each judge by using the COUNT() aggregate function with GROUP BY clause. The ORDER BY clause sorts the results in descending order based on the total number of cases." +}, { + "id": "4026", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of incarcerated individuals by race and ethnicity?", + "sql_context": "CREATE TABLE incarcerated (id INT, race VARCHAR(20), ethnicity VARCHAR(20)); INSERT INTO incarcerated (id, race, ethnicity) VALUES (1, \u0027White\u0027, \u0027Non-Hispanic\u0027); INSERT INTO incarcerated (id, race, ethnicity) VALUES (2, \u0027Black\u0027, \u0027Hispanic\u0027);", + "sql": "SELECT race, ethnicity, COUNT(*) AS count FROM incarcerated GROUP BY race, ethnicity;", + "sql_explanation": "Group the incarcerated table by race and ethnicity, then count the number of records in each group." +}, { + "id": "4976", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many legal_tech_startups are registered in each country?", + "sql_context": "CREATE TABLE legal_tech_startups (id INT, country TEXT, city TEXT, field TEXT, registration_date DATE);", + "sql": "SELECT country, COUNT(*) FROM legal_tech_startups GROUP BY country;", + "sql_explanation": "This query groups the \u0027legal_tech_startups\u0027 table by the \u0027country\u0027 column and counts the number of rows in each group, providing a count of legal tech startups by country." +}, { + "id": "5126", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of traffic violations and misdemeanors committed by gender?", + "sql_context": "CREATE TABLE Traffic_Violations (ID INT, Gender VARCHAR(10), Violation VARCHAR(20)); INSERT INTO Traffic_Violations (ID, Gender, Violation) VALUES (1, \u0027Male\u0027, \u0027Traffic Violation\u0027), (2, \u0027Female\u0027, \u0027Misdemeanor\u0027);", + "sql": "SELECT Gender, COUNT(*) FROM Traffic_Violations GROUP BY Gender;", + "sql_explanation": "The SQL query uses the Traffic_Violations table and the COUNT() and GROUP BY functions to count the number of traffic violations and misdemeanors committed by each gender." +}, { + "id": "5287", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many legal aid clinics are there in each state?", + "sql_context": "CREATE TABLE legal_aid_clinics (id INT, clinic_name VARCHAR(50), state VARCHAR(20));", + "sql": "SELECT state, COUNT(*) FROM legal_aid_clinics GROUP BY state;", + "sql_explanation": "The SQL query groups the data by state and counts the number of legal aid clinics in each state." +}, { + "id": "705", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average duration spent by visitors in each exhibit type over the last month?", + "sql_context": "CREATE TABLE Visitor_Interactions (Visitor_ID INT, Start_Time TIMESTAMP, End_Time TIMESTAMP, Exhibit_ID INT, Exhibit_Type VARCHAR(255)); INSERT INTO Visitor_Interactions (Visitor_ID, Start_Time, End_Time, Exhibit_ID, Exhibit_Type) VALUES (1001, \u00272022-01-01 10:00:00\u0027, \u00272022-01-01 12:00:00\u0027, 1, \u0027Digital\u0027), (1002, \u00272022-01-01 14:00:00\u0027, \u00272022-01-01 15:00:00\u0027, 2, \u0027Physical\u0027), (1003, \u00272022-01-01 16:00:00\u0027, \u00272022-01-01 18:00:00\u0027, 3, \u0027Digital\u0027);", + "sql": "SELECT Exhibit_Type, AVG(TIMESTAMPDIFF(MINUTE, Start_Time, End_Time))/60 AS Avg_Duration_Hours FROM Visitor_Interactions WHERE Start_Time \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 1 MONTH) GROUP BY Exhibit_Type;", + "sql_explanation": "This query filters the Visitor_Interactions table for records within the last month, groups by the Exhibit_Type column, and then calculates the average duration spent by visitors in each exhibit type in hours (Avg_Duration_Hours) for each group." +}, { + "id": "4327", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many visitors attended exhibitions in each state?", + "sql_context": "CREATE TABLE Exhibition_Visitors (id INT, visitor_state VARCHAR(255)); INSERT INTO Exhibition_Visitors (id, visitor_state) VALUES (1, \u0027California\u0027), (2, \u0027Texas\u0027), (3, \u0027Florida\u0027), (4, \u0027California\u0027), (5, \u0027California\u0027), (6, \u0027Texas\u0027), (7, \u0027California\u0027), (8, \u0027Florida\u0027);", + "sql": "SELECT visitor_state, COUNT(*) FROM Exhibition_Visitors GROUP BY visitor_state;", + "sql_explanation": "This SQL query calculates the number of visitors who attended exhibitions in each state. The query extracts the visitor_state column and then groups the table by the extracted visitor_state, finally counting the number of rows in each group." +}, { + "id": "511", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of eco-friendly accommodations in each country, ordered by the number of accommodations.", + "sql_context": "CREATE TABLE accommodations (id INT, country VARCHAR(50), accommodation_type VARCHAR(50), sustainability_score INT); INSERT INTO accommodations (id, country, accommodation_type, sustainability_score) VALUES (1, \u0027France\u0027, \u0027Eco-friendly Hotel\u0027, 80); INSERT INTO accommodations (id, country, accommodation_type, sustainability_score) VALUES (2, \u0027France\u0027, \u0027Eco-friendly Resort\u0027, 85); INSERT INTO accommodations (id, country, accommodation_type, sustainability_score) VALUES (3, \u0027Germany\u0027, \u0027Eco-friendly Hotel\u0027, 90);", + "sql": "SELECT country, COUNT(*) as num_eco_accommodations FROM accommodations WHERE accommodation_type \u003d \u0027Eco-friendly Hotel\u0027 OR accommodation_type \u003d \u0027Eco-friendly Resort\u0027 GROUP BY country ORDER BY num_eco_accommodations DESC;", + "sql_explanation": "This query uses a GROUP BY statement to group accommodations by country, and a COUNT function to count eco-friendly accommodations in each country. It then orders the results by the number of eco-friendly accommodations in each country." +}, { + "id": "1303", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of visitors for each country and for each month in the international_visitors table?", + "sql_context": "CREATE TABLE international_visitors (visitor_id INT, country_id INT, arrival_date DATE, num_visitors INT); INSERT INTO international_visitors (visitor_id, country_id, arrival_date, num_visitors) VALUES (3, 1, \u00272020-03-01\u0027, 60000); INSERT INTO international_visitors (visitor_id, country_id, arrival_date, num_visitors) VALUES (4, 2, \u00272020-04-01\u0027, 80000);", + "sql": "SELECT i.country_id, DATE_FORMAT(i.arrival_date, \u0027%Y-%m\u0027) as month, SUM(i.num_visitors) as total_visitors FROM international_visitors i GROUP BY i.country_id, month;", + "sql_explanation": "This query creates a table \u0027international_visitors\u0027 and inserts records for two countries, New Zealand and Indonesia, with their respective number of visitors for two different dates. Then, it groups the records by country and month and calculates the total number of visitors." +}, { + "id": "3875", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average rating of tourist attractions in Canada, and what are their names?", + "sql_context": "CREATE TABLE TouristAttractions (id INT, country VARCHAR(50), name VARCHAR(100), rating FLOAT); INSERT INTO TouristAttractions (id, country, name, rating) VALUES (1, \u0027Canada\u0027, \u0027Niagara Falls\u0027, 4.7), (2, \u0027Canada\u0027, \u0027Banff National Park\u0027, 4.8), (3, \u0027Canada\u0027, \u0027CN Tower\u0027, 4.5), (4, \u0027Canada\u0027, \u0027Whistler Mountain\u0027, 4.6);", + "sql": "SELECT AVG(rating), name FROM TouristAttractions WHERE country \u003d \u0027Canada\u0027 GROUP BY name;", + "sql_explanation": "This SQL query calculates the average rating of tourist attractions in Canada and retrieves their names by filtering the TouristAttractions table based on the country being Canada, then using the AVG and GROUP BY functions to calculate the average rating for each unique name in the table." +}, { + "id": "3991", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average CO2 emission for transportation methods by region?", + "sql_context": "CREATE TABLE if not exists transportation (transport_id INT, transport VARCHAR(20), region VARCHAR(50), co2_emission INT); INSERT INTO transportation (transport_id, transport, region, co2_emission) VALUES (1, \u0027Airplane\u0027, \u0027Europe\u0027, 445), (2, \u0027Train\u0027, \u0027Asia\u0027, 14), (3, \u0027Car\u0027, \u0027Americas\u0027, 185), (4, \u0027Bus\u0027, \u0027Africa\u0027, 80), (5, \u0027Bicycle\u0027, \u0027Oceania\u0027, 0);", + "sql": "SELECT region, AVG(co2_emission) as avg_emission FROM transportation GROUP BY region;", + "sql_explanation": "Calculate the average CO2 emission for transportation methods by region by grouping the records by the region column and applying the AVG() function to the co2_emission column." +}, { + "id": "4216", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average annual number of tourists visiting destinations in Asia, grouped by country?", + "sql_context": "CREATE TABLE asia_tourism (country VARCHAR(255), year INT, visitors INT); INSERT INTO asia_tourism (country, year, visitors) VALUES (\u0027Japan\u0027, 2018, 20000), (\u0027China\u0027, 2018, 30000), (\u0027Thailand\u0027, 2018, 25000), (\u0027Japan\u0027, 2019, 22000), (\u0027China\u0027, 2019, 32000), (\u0027Thailand\u0027, 2019, 28000);", + "sql": "SELECT country, AVG(visitors) as avg_visitors FROM asia_tourism GROUP BY country;", + "sql_explanation": "The SQL query calculates the average number of tourists for each country in Asia by averaging the \u0027visitors\u0027 column and grouping by the \u0027country\u0027 column." +}, { + "id": "5163", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average rating of tourist attractions in each country?", + "sql_context": "CREATE TABLE attractions (id INT, name TEXT, location TEXT, rating FLOAT); INSERT INTO attractions (id, name, location, rating) VALUES (1, \u0027Mt. Fuji\u0027, \u0027Japan\u0027, 4.6), (2, \u0027Eiffel Tower\u0027, \u0027France\u0027, 4.5);", + "sql": "SELECT location, AVG(rating) FROM attractions GROUP BY location;", + "sql_explanation": "This query selects the location and the average rating of tourist attractions from the attractions table, groups the results by location." +}, { + "id": "825", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total carbon sequestered and average carbon sequestration per hectare for forests in a specific country?", + "sql_context": "CREATE TABLE Forests (id INT, name VARCHAR(255), hectares FLOAT, country VARCHAR(255), carbon_sequestration_tonnes INT); INSERT INTO Forests (id, name, hectares, country, carbon_sequestration_tonnes) VALUES (1, \u0027Amazon Rainforest\u0027, 55000000.0, \u0027Brazil\u0027, 120000000), (2, \u0027Congo Rainforest\u0027, 35000000.0, \u0027Congo\u0027, 90000000);", + "sql": "SELECT country, SUM(carbon_sequestration_tonnes) as total_carbon_sequestration, AVG(carbon_sequestration_tonnes/hectares) as avg_carbon_sequestration_per_hectare FROM Forests GROUP BY country;", + "sql_explanation": "This query calculates the total carbon sequestered and average carbon sequestration per hectare for forests in a specific country. It does so by using the SUM function to calculate the total carbon sequestration for each country and the AVG function to calculate the average carbon sequestration per hectare. The query groups the results by country." +}, { + "id": "1884", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total carbon sequestration for each forest and its respective region?", + "sql_context": "CREATE TABLE forests (id INT, name VARCHAR(50), region VARCHAR(50), hectares DECIMAL(5,2), carbon_sequestration_rate DECIMAL(5,2)); INSERT INTO forests (id, name, region, hectares, carbon_sequestration_rate) VALUES (1, \u0027Forest 1\u0027, \u0027Tropical\u0027, 1500.00, 2.50), (2, \u0027Forest 2\u0027, \u0027Temperate\u0027, 2000.00, 1.80), (3, \u0027Forest 3\u0027, \u0027Tropical\u0027, 2500.00, 3.20), (4, \u0027Forest 4\u0027, \u0027Temperate\u0027, 3000.00, 2.00);", + "sql": "SELECT f.name, f.region, SUM(f.hectares * f.carbon_sequestration_rate) as total_carbon_sequestration FROM forests f GROUP BY f.name, f.region;", + "sql_explanation": "This query calculates the total carbon sequestration for each forest and its respective region by selecting the hectares and carbon_sequestration_rate columns from the forests table and grouping the results by forest name and region. It then calculates the sum of the product of the hectares and carbon_sequestration_rate columns using the SUM function." +}, { + "id": "2255", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which locations have more than one tree managed under the sustainable management type?", + "sql_context": "CREATE TABLE forest_management (id INT PRIMARY KEY, location VARCHAR(50), tree_id INT, management_type VARCHAR(50)); INSERT INTO forest_management (id, location, tree_id, management_type) VALUES (1, \u0027North\u0027, 1, \u0027Sustainable\u0027); INSERT INTO forest_management (id, location, tree_id, management_type) VALUES (2, \u0027South\u0027, 2, \u0027Unsustainable\u0027); INSERT INTO forest_management (id, location, tree_id, management_type) VALUES (3, \u0027North\u0027, 3, \u0027Sustainable\u0027);", + "sql": "SELECT location FROM forest_management WHERE management_type \u003d \u0027Sustainable\u0027 GROUP BY location HAVING COUNT(DISTINCT tree_id) \u003e 1;", + "sql_explanation": "This query retrieves the locations with more than one tree managed under the sustainable management type. It first filters the table to only include sustainable management types, then groups the results by location and filters for locations with more than one unique tree ID." +}, { + "id": "2329", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total carbon sequestration for each species in 2018 and 2019?", + "sql_context": "CREATE TABLE carbon_sequestration (id INT, species VARCHAR(255), year INT, sequestration FLOAT); INSERT INTO carbon_sequestration (id, species, year, sequestration) VALUES (1, \u0027Pine\u0027, 2018, 1000.2), (2, \u0027Oak\u0027, 2019, 1100.1), (3, \u0027Spruce\u0027, 2018, 1300.0), (4, \u0027Spruce\u0027, 2019, 1500.0);", + "sql": "SELECT species, SUM(sequestration) as total_sequestration FROM carbon_sequestration WHERE year IN (2018, 2019) GROUP BY species;", + "sql_explanation": "This SQL query calculates the total carbon sequestration for each species in 2018 and 2019. It does this by selecting the species column and summing the sequestration column where the year is 2018 or 2019, then grouping the results by the species column." +}, { + "id": "2335", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which countries have the highest average sustainable timber harvest volume, in cubic meters, per timber production facility?", + "sql_context": "CREATE TABLE country_harvest (id INT, country VARCHAR(255), facility_name VARCHAR(255), avg_vol_cubic_meters FLOAT);", + "sql": "SELECT country, AVG(avg_vol_cubic_meters) FROM country_harvest GROUP BY country ORDER BY AVG(avg_vol_cubic_meters) DESC LIMIT 1;", + "sql_explanation": "This query selects the country and calculates the average avg_vol_cubic_meters of all rows in the country_harvest table, then groups the results by country and orders them in descending order by the average of avg_vol_cubic_meters, and finally returns only the top row." +}, { + "id": "2670", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average wildlife habitat size for each species, ranked by size?", + "sql_context": "CREATE TABLE wildlife (species VARCHAR(255), habitat_size FLOAT); INSERT INTO wildlife (species, habitat_size) VALUES (\u0027Deer\u0027, 123.4), (\u0027Bear\u0027, 145.6), (\u0027Elk\u0027, 167.8), (\u0027Wolf\u0027, 234.6), (\u0027Fox\u0027, 256.7), (\u0027Lynx\u0027, 345.2);", + "sql": "SELECT species, AVG(habitat_size) AS avg_habitat_size FROM wildlife GROUP BY species ORDER BY AVG(habitat_size) DESC;", + "sql_explanation": "This query calculates the average wildlife habitat size for each species and ranks them by size. The AVG function is used to calculate the average habitat size for each species, and the result is grouped by the \u0027species\u0027 column. The ORDER BY clause is used to rank the species by their average habitat size in descending order." +}, { + "id": "2797", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "get the total volume of timber imported from Asia to each country", + "sql_context": "CREATE TABLE timber_imports (id INT, country VARCHAR(255), volume_ha INT, import_origin VARCHAR(255));", + "sql": "SELECT country, SUM(volume_ha) as total_volume FROM timber_imports WHERE import_origin \u003d \u0027Asia\u0027 GROUP BY country;", + "sql_explanation": "This query calculates the total volume of timber imported from Asia to each country by summing the volume_ha column for rows with an import_origin of \u0027Asia\u0027. The result is grouped by country." +}, { + "id": "2995", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average carbon sequestration rate for different tree species?", + "sql_context": "CREATE TABLE CarbonSequestration (species VARCHAR(255), year INT, sequestration_rate FLOAT); INSERT INTO CarbonSequestration (species, year, sequestration_rate) VALUES (\u0027Pine\u0027, 2018, 5.5), (\u0027Pine\u0027, 2019, 6.0), (\u0027Oak\u0027, 2018, 4.5), (\u0027Oak\u0027, 2019, 5.0);", + "sql": "SELECT species, AVG(sequestration_rate) as avg_sequestration_rate FROM CarbonSequestration GROUP BY species;", + "sql_explanation": "The SQL query calculates the average carbon sequestration rate for different tree species by grouping the records based on the \u0027species\u0027 column and averaging the \u0027sequestration_rate\u0027 for each group." +}, { + "id": "3308", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum carbon sequestration value in each country\u0027s forests?", + "sql_context": "CREATE TABLE forests (forest_id INT, country TEXT, area REAL, carbon_sequestration REAL); INSERT INTO forests (forest_id, country, area, carbon_sequestration) VALUES (1, \u0027USA\u0027, 5000, 120), (2, \u0027Canada\u0027, 7000, 150), (3, \u0027Mexico\u0027, 3000, 100);", + "sql": "SELECT country, MAX(carbon_sequestration) as max_carbon_sequestration FROM forests GROUP BY country;", + "sql_explanation": "Groups forests by country and calculates the maximum carbon sequestration value in each country\u0027s forests." +}, { + "id": "3328", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average timber production, in cubic meters, for each region in the year 2020?", + "sql_context": "CREATE TABLE timber_production_by_region (id INT, region VARCHAR(255), year INT, cubic_meters FLOAT); INSERT INTO timber_production_by_region (id, region, year, cubic_meters) VALUES (1, \u0027North America\u0027, 2020, 789456.12), (2, \u0027South America\u0027, 2020, 678345.12), (3, \u0027Europe\u0027, 2020, 567890.12);", + "sql": "SELECT region, AVG(cubic_meters) FROM timber_production_by_region WHERE year \u003d 2020 GROUP BY region;", + "sql_explanation": "The SQL query filters the timber_production_by_region table to only include rows with a year of 2020. It then calculates the average cubic meters of timber production for each region, grouping the results by region." +}, { + "id": "3569", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which tree species have the highest average age in the \u0027Trees\u0027 table?", + "sql_context": "CREATE TABLE Trees (id INT, species VARCHAR(50), age INT); INSERT INTO Trees (id, species, age) VALUES (1, \u0027Pine\u0027, 30), (2, \u0027Oak\u0027, 50), (3, \u0027Maple\u0027, 25);", + "sql": "SELECT species, AVG(age) as avg_age FROM Trees GROUP BY species ORDER BY avg_age DESC LIMIT 1;", + "sql_explanation": "This query finds the tree species with the highest average age by using the AVG() function on the \u0027age\u0027 column, grouping by \u0027species\u0027, ordering in descending order, and limiting the result to 1 record." +}, { + "id": "3787", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "how many wildlife habitats are there in each region?", + "sql_context": "CREATE TABLE wildlife_habitats (id INT, region VARCHAR(255), habitat_type VARCHAR(255));", + "sql": "SELECT region, COUNT(DISTINCT id) as num_habitats FROM wildlife_habitats GROUP BY region;", + "sql_explanation": "This query counts the number of distinct wildlife habitats in each region by grouping the wildlife_habitats table by region and counting the number of distinct id values." +}, { + "id": "4326", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many trees are present in each forest type in the \u0027forest_inventory\u0027 table?", + "sql_context": "CREATE TABLE forest_inventory (id INT, forest_type VARCHAR(255), tree_count INT); INSERT INTO forest_inventory (id, forest_type, tree_count) VALUES (1, \u0027Temperate\u0027, 1000), (2, \u0027Tropical\u0027, 2000), (3, \u0027Boreal\u0027, 1500);", + "sql": "SELECT forest_type, SUM(tree_count) FROM forest_inventory GROUP BY forest_type;", + "sql_explanation": "The SQL query groups the \u0027forest_inventory\u0027 table by forest type and calculates the total tree count for each forest type using the SUM function." +}, { + "id": "4797", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total volume of timber produced by each region in Asia?", + "sql_context": "CREATE TABLE timber_production_asia (region VARCHAR(255), volume INT); INSERT INTO timber_production_asia (region, volume) VALUES (\u0027East Asia\u0027, 2500), (\u0027South Asia\u0027, 1800), (\u0027Southeast Asia\u0027, 1200);", + "sql": "SELECT region, SUM(volume) FROM timber_production_asia GROUP BY region;", + "sql_explanation": "This query calculates the total volume of timber produced by each region in Asia. It does so by using the SUM() function on the volume column and grouping the records by region, providing the total volume of timber produced in each region." +}, { + "id": "617", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum and minimum speed of vessels in the \u0027Cruise Ship\u0027 category in the last year?", + "sql_context": "CREATE TABLE vessels (id INT, name TEXT, type TEXT, speed FLOAT); INSERT INTO vessels (id, name, type, speed) VALUES (1, \u0027Cruise Ship A\u0027, \u0027Cruise Ship\u0027, 25), (2, \u0027Cruise Ship B\u0027, \u0027Cruise Ship\u0027, 30), (3, \u0027Cruise Ship C\u0027, \u0027Cruise Ship\u0027, 35);", + "sql": "SELECT vessels.type, MAX(vessels.speed) AS max_speed, MIN(vessels.speed) AS min_speed FROM vessels WHERE vessels.type \u003d \u0027Cruise Ship\u0027 AND vessels.id \u003e\u003d DATEADD(\u0027year\u0027, -1, CURRENT_DATE) GROUP BY vessels.type;", + "sql_explanation": "Calculates the maximum and minimum speed of vessels in the \u0027Cruise Ship\u0027 category in the last year." +}, { + "id": "1203", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum and maximum speed of Vessel5 in March 2021?", + "sql_context": "CREATE TABLE VesselSpeed(SpeedID INT, VesselID INT, Speed FLOAT, SpeedDate DATETIME); INSERT INTO VesselSpeed(SpeedID, VesselID, Speed, SpeedDate) VALUES (1, 5, 15.2, \u00272021-03-01 10:00:00\u0027), (2, 5, 17.8, \u00272021-03-15 13:00:00\u0027), (3, 5, 14.9, \u00272021-03-31 09:00:00\u0027);", + "sql": "SELECT VesselID, MIN(Speed) AS MinSpeed, MAX(Speed) AS MaxSpeed FROM VesselSpeed WHERE VesselID \u003d 5 AND SpeedDate BETWEEN \u00272021-03-01\u0027 AND \u00272021-03-31\u0027 GROUP BY VesselID;", + "sql_explanation": "This query calculates the minimum and maximum speed of Vessel5 in March 2021. It filters the VesselSpeed table by the VesselID and the speed date, then calculates the minimum and maximum speed using the MIN and MAX aggregate functions, grouped by the VesselID. The result will include the vessel\u0027s minimum and maximum speeds in March 2021." +}, { + "id": "1535", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show vessels with safety incidents in both the Caribbean Sea and the North Sea.", + "sql_context": "CREATE TABLE Vessels (ID INT, Name VARCHAR(50), Type VARCHAR(50)); CREATE TABLE SafetyIncidents (ID INT, VesselID INT, Location VARCHAR(50), IncidentType VARCHAR(50)); INSERT INTO Vessels (ID, Name, Type) VALUES (1, \u0027Ocean Titan\u0027, \u0027Cargo\u0027); INSERT INTO SafetyIncidents (ID, VesselID, Location, IncidentType) VALUES (1, 1, \u0027Caribbean Sea\u0027, \u0027Collision\u0027); INSERT INTO SafetyIncidents (ID, VesselID, Location, IncidentType) VALUES (2, 1, \u0027North Sea\u0027, \u0027Fire\u0027); INSERT INTO SafetyIncidents (ID, VesselID, Location, IncidentType) VALUES (3, 2, \u0027Caribbean Sea\u0027, \u0027Grounding\u0027);", + "sql": "SELECT si.VesselID FROM SafetyIncidents si WHERE si.Location IN (\u0027Caribbean Sea\u0027, \u0027North Sea\u0027) GROUP BY si.VesselID HAVING COUNT(DISTINCT si.Location) \u003d 2;", + "sql_explanation": "The SQL query groups safety incidents by vessel ID and filters for the Caribbean Sea and North Sea. It then counts the number of distinct locations per vessel and only returns those with incidents in both locations." +}, { + "id": "2536", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all the unique ports where cargo was loaded onto vessels in Q1 2022 and the total quantity of cargo in metric tons.", + "sql_context": "CREATE TABLE vessel_cargo (vessel_id INT, load_date DATE, port VARCHAR(255), cargo_quantity INT);", + "sql": "SELECT port, SUM(cargo_quantity) FROM vessel_cargo WHERE QUARTER(load_date) \u003d 1 AND YEAR(load_date) \u003d 2022 GROUP BY port;", + "sql_explanation": "The SQL query uses the SUM() function to calculate the total \"cargo_quantity\" for each unique \"port\" in the \"vessel_cargo\" table during Q1 of 2022." +}, { + "id": "3090", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the most common type of incident in the \u0027safety_records\u0027 table?", + "sql_context": "CREATE TABLE safety_records (id INT, incident_type VARCHAR(50), incident_date DATE, description VARCHAR(100));", + "sql": "SELECT incident_type, COUNT(*) FROM safety_records GROUP BY incident_type ORDER BY COUNT(*) DESC LIMIT 1;", + "sql_explanation": "This SQL query groups the \u0027safety_records\u0027 table by \u0027incident_type\u0027 and calculates the count of each group, ordering by count in descending order and limiting the result to 1, providing the most common type of incident." +}, { + "id": "3110", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average cargo weight for each vessel type in the Arctic region, for the month of July?", + "sql_context": "CREATE TABLE cargo_data (id INT, vessel_name VARCHAR(50), type VARCHAR(50), region VARCHAR(50), date DATE, cargo_weight INT);", + "sql": "SELECT type, AVG(cargo_weight) FROM cargo_data WHERE region \u003d \u0027Arctic\u0027 AND MONTH(date) \u003d 7 GROUP BY type;", + "sql_explanation": "This SQL query calculates the average cargo weight for each vessel type in the Arctic region, for the month of July. It does this by selecting the type column and the average of the cargo_weight column from the cargo_data table, where the region is \u0027Arctic\u0027 and the month of the date is July. The GROUP BY clause is used to group the results by the type column." +}, { + "id": "3928", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total weight of each cargo type in the \u0027cargo_tracking\u0027 table?", + "sql_context": "CREATE TABLE cargo_tracking (cargo_id INT, cargo_type VARCHAR(50), weight FLOAT); INSERT INTO cargo_tracking (cargo_id, cargo_type, weight) VALUES (1, \u0027CargoType1\u0027, 5000), (2, \u0027CargoType2\u0027, 7000), (3, \u0027CargoType3\u0027, 6000);", + "sql": "SELECT cargo_type, SUM(weight) as total_weight FROM cargo_tracking GROUP BY cargo_type;", + "sql_explanation": "This query calculates the total weight of each cargo type in the \u0027cargo_tracking\u0027 table by using the SUM() function and GROUP BY clause." +}, { + "id": "904", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get the number of employees who left in each month of 2020 from the \"employee_records\" table", + "sql_context": "CREATE TABLE employee_records (employee_id INT PRIMARY KEY, name TEXT, position TEXT, leaving_date DATE); INSERT INTO employee_records (employee_id, name, position, leaving_date) VALUES (1, \u0027John Doe\u0027, \u0027CTO\u0027, \u00272018-01-01\u0027); INSERT INTO employee_records (employee_id, name, position, leaving_date) VALUES (2, \u0027Jane Smith\u0027, \u0027COO\u0027, \u00272019-05-15\u0027); INSERT INTO employee_records (employee_id, name, position, leaving_date) VALUES (3, \u0027Alice Johnson\u0027, \u0027Data Analyst\u0027, \u00272020-03-20\u0027); INSERT INTO employee_records (employee_id, name, position, leaving_date) VALUES (4, \u0027Bella Williams\u0027, \u0027Data Scientist\u0027, \u00272020-04-30\u0027); INSERT INTO employee_records (employee_id, name, position, leaving_date) VALUES (5, \u0027Carlos Brown\u0027, \u0027Software Engineer\u0027, \u00272020-12-15\u0027);", + "sql": "SELECT EXTRACT(MONTH FROM leaving_date) AS month, COUNT(*) AS count FROM employee_records WHERE leaving_date \u003e\u003d \u00272020-01-01\u0027 AND leaving_date \u003c \u00272021-01-01\u0027 GROUP BY month ORDER BY month;", + "sql_explanation": "This query gets the number of employees who left in each month of 2020 from the \"employee_records\" table. It does this by using the EXTRACT function to extract the month from the \"leaving_date\" and the GROUP BY clause to group the records by the month. The WHERE clause is used to filter the records to only those where the \"leaving_date\" is in 2020. The COUNT function is used to count the number of records in each group." +}, { + "id": "1128", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Count the number of companies in the technology sector with at least one female and one male founder, and list their names.", + "sql_context": "CREATE TABLE companies (id INT, name TEXT, industry TEXT, founder_gender TEXT); INSERT INTO companies (id, name, industry, founder_gender) VALUES (1, \u0027TechNova\u0027, \u0027Technology\u0027, \u0027Female\u0027); INSERT INTO companies (id, name, industry, founder_gender) VALUES (2, \u0027InnoTech\u0027, \u0027Technology\u0027, \u0027Male\u0027);", + "sql": "SELECT companies.name, COUNT(DISTINCT founder_gender) AS founder_diversity FROM companies WHERE industry \u003d \u0027Technology\u0027 GROUP BY companies.name HAVING founder_diversity \u003e\u003d 2;", + "sql_explanation": "The SQL query performs a group by operation on the companies table, filters for technology companies, counts the number of distinct founder genders, and lists the names of companies with at least one female and one male founder." +}, { + "id": "1363", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the success rate of startups founded by people from each state in the US?", + "sql_context": "CREATE TABLE companies (id INT, name TEXT, founder_state TEXT, is_active BOOLEAN);", + "sql": "SELECT founder_state, 100.0 * AVG(CASE WHEN is_active THEN 1.0 ELSE 0.0 END) as success_rate FROM companies WHERE founder_state IS NOT NULL GROUP BY founder_state;", + "sql_explanation": "We group the results by founder_state and calculate the success rate by dividing the count of active startups by the total number of startups. We exclude results where founder_state is NULL." +}, { + "id": "1416", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of companies founded by underrepresented minorities in each industry?", + "sql_context": "CREATE TABLE Companies (id INT, name VARCHAR(50), industry VARCHAR(50), country VARCHAR(50), founding_year INT, founder_minority VARCHAR(10)); INSERT INTO Companies (id, name, industry, country, founding_year, founder_minority) VALUES (1, \u0027GreenTech\u0027, \u0027Renewable Energy\u0027, \u0027USA\u0027, 2019, \u0027Yes\u0027); INSERT INTO Companies (id, name, industry, country, founding_year, founder_minority) VALUES (2, \u0027SunPower\u0027, \u0027Renewable Energy\u0027, \u0027USA\u0027, 2018, \u0027No\u0027);", + "sql": "SELECT industry, ROUND(100.0 * SUM(CASE WHEN founder_minority \u003d \u0027Yes\u0027 THEN 1 ELSE 0 END) / COUNT(*), 2) as minority_percentage FROM Companies GROUP BY industry;", + "sql_explanation": "The SQL query filters companies by founder_minority and groups them by industry. Then, it calculates the percentage of companies founded by underrepresented minorities in each industry by using a conditional sum and dividing it by the total number of companies in each industry." +}, { + "id": "1894", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which industry has the highest average diversity score?", + "sql_context": "CREATE TABLE company (name VARCHAR(255), industry VARCHAR(100), diversity_score INT); INSERT INTO company (name, industry, diversity_score) VALUES (\u0027CompanyA\u0027, \u0027Technology\u0027, 80), (\u0027CompanyB\u0027, \u0027Finance\u0027, 90), (\u0027CompanyC\u0027, \u0027Technology\u0027, 85), (\u0027CompanyD\u0027, \u0027Retail\u0027, 70), (\u0027CompanyE\u0027, \u0027Fashion\u0027, 95), (\u0027CompanyF\u0027, \u0027Healthcare\u0027, 88);", + "sql": "SELECT industry, AVG(company.diversity_score) as avg_diversity_score FROM company GROUP BY industry ORDER BY avg_diversity_score DESC LIMIT 1;", + "sql_explanation": "The SQL query groups the records by the industry column and calculates the average of the diversity_score column for each group. It then orders the results by the average diversity score in descending order and returns the industry with the highest average score." +}, { + "id": "2145", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of companies founded by veterans in each year", + "sql_context": "CREATE TABLE companies (company_id INT, company_name VARCHAR(255), founding_year INT, founder_veteran BOOLEAN);", + "sql": "SELECT EXTRACT(YEAR FROM c.founding_year) AS year, COUNT(c.company_id) FROM companies c WHERE c.founder_veteran \u003d TRUE GROUP BY year;", + "sql_explanation": "This query groups the results by founding year and calculates the count of companies founded by veterans for each group." +}, { + "id": "2466", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 10 cities with the most funding, based on the total funding received by companies located in those cities", + "sql_context": "CREATE TABLE company_location (id INT, company_name VARCHAR(50), city VARCHAR(50), funding_amount DECIMAL(10, 2));", + "sql": "SELECT city, SUM(funding_amount) AS total_funding FROM company_location GROUP BY city ORDER BY total_funding DESC LIMIT 10;", + "sql_explanation": "This query lists the top 10 cities with the most funding, based on the total funding received by companies located in those cities. The GROUP BY clause groups the results by city. The SUM function calculates the total funding for each city. The ORDER BY clause orders the results by the total funding in descending order. The LIMIT clause limits the results to the top 10 cities." +}, { + "id": "2630", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of companies by founding year and gender?", + "sql_context": "CREATE TABLE Companies (id INT, name VARCHAR(50), industry VARCHAR(50), country VARCHAR(50), founding_year INT, founder_gender VARCHAR(10)); INSERT INTO Companies (id, name, industry, country, founding_year, founder_gender) VALUES (1, \u0027InnoTech\u0027, \u0027Tech\u0027, \u0027USA\u0027, 2015, \u0027Male\u0027); INSERT INTO Companies (id, name, industry, country, founding_year, founder_gender) VALUES (2, \u0027Code4All\u0027, \u0027Tech\u0027, \u0027Germany\u0027, 2018, \u0027Female\u0027);", + "sql": "SELECT founding_year, founder_gender, COUNT(*) as company_count FROM Companies GROUP BY founding_year, founder_gender;", + "sql_explanation": "The SQL query filters companies by founding year and gender, groups them by founding_year and founder_gender, and counts the number of occurrences for each group, returning the distribution of companies by founding year and gender." +}, { + "id": "2768", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many patents were filed by companies founded before 2011?", + "sql_context": "CREATE TABLE company_innovation (company_id INT, founding_year INT, patent_count INT); INSERT INTO company_innovation (company_id, founding_year, patent_count) VALUES (1, 2009, 3), (2, 2011, 1), (3, 2008, 2), (4, 2010, 4), (5, 2010, 5);", + "sql": "SELECT founding_year, SUM(patent_count) FROM company_innovation WHERE founding_year \u003c 2011 GROUP BY founding_year;", + "sql_explanation": "This SQL query calculates the total number of patents filed by companies founded before 2011 by summing the \u0027patent_count\u0027 column where \u0027founding_year\u0027 is less than 2011 and grouping by \u0027founding_year\u0027." +}, { + "id": "4414", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of patents filed by company, grouped by country", + "sql_context": "CREATE TABLE company_patents (id INT, company_name VARCHAR(50), country VARCHAR(50), patent_number INT);", + "sql": "SELECT country, COUNT(*) AS num_patents FROM company_patents GROUP BY country;", + "sql_explanation": "This query shows the number of patents filed by company, grouped by country. The GROUP BY clause groups the results by country. The COUNT function counts the number of patents for each country." +}, { + "id": "4535", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many companies were founded in each year?", + "sql_context": "CREATE TABLE company_profiles (company_id INT, founding_year INT); INSERT INTO company_profiles (company_id, founding_year) VALUES (1, 2010), (2, 2012), (3, 2010), (4, 2011), (5, 2009), (6, 2008), (7, 2008), (8, 2011), (9, 2010), (10, 2012);", + "sql": "SELECT founding_year, COUNT(*) FROM company_profiles GROUP BY founding_year;", + "sql_explanation": "This SQL query calculates the number of companies founded in each year by counting the number of rows in the \u0027company_profiles\u0027 table where \u0027founding_year\u0027 is grouped by \u0027founding_year\u0027." +}, { + "id": "1104", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of successful and failed satellite launches by country in the last 5 years?", + "sql_context": "CREATE TABLE satellite_launches (id INT, country VARCHAR(255), launch_date DATE, launch_result VARCHAR(255));", + "sql": "SELECT country, launch_result, COUNT(*) as num_launches FROM satellite_launches WHERE launch_date \u003e\u003d DATE_SUB(CURRENT_DATE(), INTERVAL 5 YEAR) GROUP BY country, launch_result;", + "sql_explanation": "The SQL query filters the records from the satellite_launches table based on the launch_date column to only include the records from the last 5 years, groups the records by country and launch_result, and calculates the count of records for each group, which represents the number of successful and failed satellite launches by country in the last 5 years." +}, { + "id": "1264", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the trend in the number of space missions launched per agency in the last 5 years?", + "sql_context": "CREATE TABLE space_missions_agency_yearly (id INT, agency VARCHAR(255), mission_year INT); INSERT INTO space_missions_agency_yearly (id, agency, mission_year) VALUES (1, \u0027NASA\u0027, 2017), (2, \u0027ESA\u0027, 2018), (3, \u0027ISRO\u0027, 2019), (4, \u0027Roscosmos\u0027, 2017), (5, \u0027NASA\u0027, 2018), (6, \u0027NASA\u0027, 2019), (7, \u0027ESA\u0027, 2020), (8, \u0027ISRO\u0027, 2021), (9, \u0027Roscosmos\u0027, 2018);", + "sql": "SELECT agency, mission_year, COUNT(*) AS num_missions FROM space_missions_agency_yearly WHERE mission_year \u003e\u003d 2016 GROUP BY agency, mission_year ORDER BY mission_year;", + "sql_explanation": "The SQL query calculates the number of space missions for each agency in the last 5 years by using the COUNT function on the space_missions_agency_yearly table, filtering for mission_year \u003e\u003d 2016, grouping the results by the agency and mission_year columns, and ordering the results by the mission_year column." +}, { + "id": "1267", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of flight hours for each spacecraft model in the last 5 years?", + "sql_context": "CREATE TABLE Spacecraft (spacecraft_model VARCHAR(50), flight_hours INT, last_maintenance_date DATE); INSERT INTO Spacecraft (spacecraft_model, flight_hours, last_maintenance_date) VALUES (\u0027SC-1\u0027, 5000, \u00272016-05-12\u0027), (\u0027SC-2\u0027, 7000, \u00272018-03-21\u0027), (\u0027SC-3\u0027, 3500, \u00272015-08-04\u0027);", + "sql": "SELECT spacecraft_model, SUM(flight_hours) as total_flight_hours FROM Spacecraft WHERE last_maintenance_date \u003e\u003d DATEADD(year, -5, GETDATE()) GROUP BY spacecraft_model;", + "sql_explanation": "Filters the Spacecraft table based on last_maintenance_date within the last 5 years, then calculates the sum of flight hours for each spacecraft_model." +}, { + "id": "1289", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of successful satellite deployments per year for the past 10 years?", + "sql_context": "CREATE TABLE SatelliteDeployments (Id INT, Year INT, Success BOOLEAN); INSERT INTO SatelliteDeployments VALUES (1, 2010, true), (2, 2011, false), (3, 2012, true), (4, 2013, true), (5, 2014, false), (6, 2015, true), (7, 2016, true), (8, 2017, false), (9, 2018, true), (10, 2019, true);", + "sql": "SELECT YEAR(DeploymentDate) as Year, COUNT(*) as SuccessfulDeployments FROM SatelliteDeployments WHERE Success \u003d true GROUP BY YEAR(DeploymentDate) ORDER BY Year DESC;", + "sql_explanation": "The SQL query calculates the number of successful satellite deployments per year for the past 10 years. It filters the records to only include successful deployments, then groups the records by year and calculates the count of successful deployments for each year." +}, { + "id": "1350", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of aircraft production rates by manufacturer?", + "sql_context": "CREATE TABLE aircraft_production (id INT, manufacturer VARCHAR(255), production_rate INT);", + "sql": "SELECT manufacturer, AVG(production_rate) as avg_production_rate, STDDEV(production_rate) as stddev_production_rate FROM aircraft_production GROUP BY manufacturer;", + "sql_explanation": "The SQL query groups the records from the aircraft_production table based on the manufacturer column, and calculates the average and standard deviation of the production_rate for each group, which represents the distribution of aircraft production rates by manufacturer." +}, { + "id": "1531", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the total production for each aircraft model and the number of unique manufacturing dates.", + "sql_context": "CREATE TABLE Aircrafts (AircraftID INT, Model VARCHAR(20), ManufacturingDate DATE, TotalProduced INT); CREATE TABLE ManufacturingDates (ManufacturingDate DATE); INSERT INTO ManufacturingDates (ManufacturingDate) VALUES (\u00271976-08-01\u0027), (\u00272006-01-01\u0027); INSERT INTO Aircrafts (AircraftID, Model, ManufacturingDate, TotalProduced) VALUES (1, \u0027F-16\u0027, \u00271976-08-01\u0027, 4500); INSERT INTO Aircrafts (AircraftID, Model, ManufacturingDate, TotalProduced) VALUES (2, \u0027F-35\u0027, \u00272006-01-01\u0027, 500); INSERT INTO Aircrafts (AircraftID, Model, ManufacturingDate, TotalProduced) VALUES (3, \u0027F-35\u0027, \u00272006-01-01\u0027, 600);", + "sql": "SELECT Model, SUM(TotalProduced) as \u0027Total Production\u0027, COUNT(DISTINCT ManufacturingDate) as \u0027Number of Manufacturing Dates\u0027 FROM Aircrafts GROUP BY Model;", + "sql_explanation": "Identify the total production for each aircraft model and the number of unique manufacturing dates." +}, { + "id": "2129", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the number of satellites deployed by \u0027SatelliteCo\u0027 each year, sorted by the number of satellites deployed in descending order?", + "sql_context": "CREATE TABLE satellites (id INT, company VARCHAR(255), year INT, quantity INT); INSERT INTO satellites (id, company, year, quantity) VALUES (1, \u0027SatelliteCo\u0027, 2010, 5), (2, \u0027SatelliteCo\u0027, 2011, 8), (3, \u0027SpaceComm\u0027, 2010, 3), (4, \u0027SatelliteCo\u0027, 2012, 6);", + "sql": "SELECT year, SUM(quantity) as total_deployed FROM satellites WHERE company \u003d \u0027SatelliteCo\u0027 GROUP BY year ORDER BY total_deployed DESC;", + "sql_explanation": "This query lists the number of satellites deployed by \u0027SatelliteCo\u0027 each year, sorted by the number of satellites deployed in descending order. It does so by grouping the rows by the \u0027year\u0027 column, filtering the rows where the \u0027company\u0027 is \u0027SatelliteCo\u0027, summing the \u0027quantity\u0027 column to get the total number of satellites deployed in each year, and then ordering the results by the \u0027total_deployed\u0027 column in descending order." +}, { + "id": "2315", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average lifespan of satellites manufactured by Millennium Space Systems?", + "sql_context": "CREATE TABLE SatelliteLifespan (id INT, name VARCHAR(255), manufacturer VARCHAR(255), lifespan INT); INSERT INTO SatelliteLifespan (id, name, manufacturer, lifespan) VALUES (1, \u0027Xena\u0027, \u0027Millennium Space Systems\u0027, 5), (2, \u0027Microlatt\u0027, \u0027Millennium Space Systems\u0027, 7), (3, \u0027Aquila\u0027, \u0027Planet Labs\u0027, 3);", + "sql": "SELECT manufacturer, AVG(lifespan) FROM SatelliteLifespan WHERE manufacturer \u003d \u0027Millennium Space Systems\u0027 GROUP BY manufacturer;", + "sql_explanation": "This query groups the SatelliteLifespan table by the manufacturer column and calculates the average lifespan for satellites manufactured by Millennium Space Systems." +}, { + "id": "2489", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of flights per day for each aircraft model?", + "sql_context": "CREATE TABLE aircraft_flights (flight_id INT, model_id INT, date DATE);", + "sql": "SELECT model_id, AVG(DATEDIFF(\u0027day\u0027, MIN(date), MAX(date))) as avg_flights_per_day FROM aircraft_flights GROUP BY model_id;", + "sql_explanation": "This SQL query calculates the average number of flights per day for each aircraft model by grouping the records by model_id and calculating the difference between the minimum and maximum date for each group. The results are then divided by the number of days between the minimum and maximum date for each group to obtain the average number of flights per day." +}, { + "id": "2618", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total cost of manufacturing for each aircraft type, including only completed manufacturing orders?", + "sql_context": "CREATE TABLE ManufacturingOrders (OrderID INT, OrderDate DATE, AircraftType VARCHAR(50), Manufacturer VARCHAR(50), Cost INT); INSERT INTO ManufacturingOrders (OrderID, OrderDate, AircraftType, Manufacturer, Cost) VALUES (1, \u00272021-09-01\u0027, \u0027Aircraft1\u0027, \u0027Manufacturer1\u0027, 1000000);", + "sql": "SELECT AircraftType, SUM(Cost) AS TotalCost FROM ManufacturingOrders WHERE OrderDate \u003c GETDATE() GROUP BY AircraftType;", + "sql_explanation": "This query calculates the total cost of manufacturing for each aircraft type, including only completed manufacturing orders. It uses a sum function to add up the costs and groups by aircraft type. The results only include orders with a date before the current date." +}, { + "id": "2629", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Count the number of unsuccessful satellite launches by private companies", + "sql_context": "CREATE TABLE Launches (LaunchID INT, LaunchDate DATE, SatelliteName VARCHAR(50), Company VARCHAR(50), Success VARCHAR(50)); INSERT INTO Launches (LaunchID, LaunchDate, SatelliteName, Company, Success) VALUES (1, \u00272022-01-01\u0027, \u0027SatX\u0027, \u0027SpaceX\u0027, \u0027Failure\u0027); INSERT INTO Launches (LaunchID, LaunchDate, SatelliteName, Company, Success) VALUES (2, \u00272022-02-10\u0027, \u0027SatY\u0027, \u0027Blue Origin\u0027, \u0027Success\u0027);", + "sql": "SELECT Company, COUNT(*) FROM Launches WHERE Success \u003d \u0027Failure\u0027 AND Company NOT LIKE \u0027%Government%\u0027 GROUP BY Company;", + "sql_explanation": "Count the number of unsuccessful satellite launches per private company by counting the number of rows where the Success column is \u0027Failure\u0027 and the Company does not contain the word \u0027Government\u0027, then group by the Company column." +}, { + "id": "3035", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total cost of all space missions by mission type and launch year?", + "sql_context": "CREATE TABLE SpaceMissions (MissionID INT, MissionType VARCHAR(50), LaunchYear INT, Cost INT);", + "sql": "SELECT MissionType, LaunchYear, SUM(Cost) AS TotalCost FROM SpaceMissions GROUP BY MissionType, LaunchYear;", + "sql_explanation": "Total cost of all space missions by mission type and launch year is calculated by grouping MissionType and LaunchYear and finding the sum of Cost for each group." +}, { + "id": "3232", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the most recent flight for each aircraft model in the FlightLogs table?", + "sql_context": "CREATE TABLE FlightLogs (flight_id INT, aircraft_model VARCHAR(50), flight_date DATE); INSERT INTO FlightLogs (flight_id, aircraft_model, flight_date) VALUES (1, \u0027B747\u0027, \u00272022-01-01\u0027), (2, \u0027A320\u0027, \u00272021-05-01\u0027), (3, \u0027B747\u0027, \u00272022-03-01\u0027);", + "sql": "SELECT aircraft_model, MAX(flight_date) AS most_recent_flight FROM FlightLogs GROUP BY aircraft_model;", + "sql_explanation": "Get the most recent flight date for each aircraft model from the FlightLogs table." +}, { + "id": "3628", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum flight speed for each aircraft model by year?", + "sql_context": "CREATE TABLE flights (id INT, model VARCHAR(50), flight_speed DECIMAL(5,2), year INT); INSERT INTO flights (id, model, flight_speed, year) VALUES (1, \u0027Boeing 737\u0027, 900, 2019), (2, \u0027Airbus A320\u0027, 950, 2019), (3, \u0027Boeing 787\u0027, 1000, 2018), (4, \u0027SpaceX Starship\u0027, 1200, 2022);", + "sql": "SELECT model, year, MAX(flight_speed) as max_flight_speed FROM flights GROUP BY model, year;", + "sql_explanation": "This SQL query groups the flights table by model and year, and calculates the maximum flight speed for each aircraft model by year. It returns the result." +}, { + "id": "3803", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of satellite weight classes in the global satellite fleet?", + "sql_context": "CREATE TABLE satellite_fleet (id INT, country VARCHAR(255), manufacturer VARCHAR(255), weight_class VARCHAR(255), fleet_size INT);", + "sql": "SELECT weight_class, SUM(fleet_size) as total_fleet_size FROM satellite_fleet GROUP BY 1;", + "sql_explanation": "The SQL query groups the records from the satellite_fleet table based on the weight_class column, and calculates the sum of the fleet_size for each group, which represents the distribution of satellite weight classes in the global satellite fleet." +}, { + "id": "3854", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of satellites deployed by each country?", + "sql_context": "CREATE SCHEMA if not exists aerospace;CREATE TABLE if not exists aerospace.satellites (id INT PRIMARY KEY, country VARCHAR(50), name VARCHAR(50), launch_date DATE); INSERT INTO aerospace.satellites (id, country, name, launch_date) VALUES (1, \u0027USA\u0027, \u0027Sat1\u0027, \u00272000-01-01\u0027), (2, \u0027USA\u0027, \u0027Sat2\u0027, \u00272001-01-01\u0027), (3, \u0027China\u0027, \u0027Sat3\u0027, \u00272002-01-01\u0027);", + "sql": "SELECT country, COUNT(*) as total_satellites FROM aerospace.satellites GROUP BY country;", + "sql_explanation": "The SQL query groups the satellites table by country and calculates the count of satellites for each country." +}, { + "id": "4140", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the total flight hours for each aircraft type?", + "sql_context": "CREATE TABLE Aircraft_Types (Id INT, Aircraft_Type VARCHAR(50), Flight_Hours INT); INSERT INTO Aircraft_Types (Id, Aircraft_Type, Flight_Hours) VALUES (1, \u0027B737\u0027, 1000), (2, \u0027A320\u0027, 2000), (3, \u0027B747\u0027, 3000);", + "sql": "SELECT Aircraft_Type, SUM(Flight_Hours) FROM Aircraft_Types GROUP BY Aircraft_Type;", + "sql_explanation": "This query calculates the total flight hours for each aircraft type by grouping and aggregating the Aircraft_Types table." +}, { + "id": "4166", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many satellites have been launched by each country?", + "sql_context": "CREATE TABLE SatelliteLaunches (LaunchID INT, SatelliteID INT, Country VARCHAR(50), LaunchDate DATETIME); INSERT INTO SatelliteLaunches (LaunchID, SatelliteID, Country, LaunchDate) VALUES (1, 1, \u0027USA\u0027, \u00272022-01-01\u0027); INSERT INTO SatelliteLaunches (LaunchID, SatelliteID, Country, LaunchDate) VALUES (2, 2, \u0027France\u0027, \u00272022-02-15\u0027);", + "sql": "SELECT Country, COUNT(*) as NumSatellites FROM SatelliteLaunches GROUP BY Country;", + "sql_explanation": "This query groups the SatelliteLaunches table by the Country column and calculates the count of records within each group." +}, { + "id": "4401", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many space missions were attempted by each country?", + "sql_context": "CREATE TABLE space_missions (mission_id INT, mission_country VARCHAR(100), mission_year INT);", + "sql": "SELECT mission_country, COUNT(*) FROM space_missions GROUP BY mission_country;", + "sql_explanation": "The SQL query lists the number of space missions attempted by each country by selecting the mission_country column, and using the COUNT function to count the number of space missions for each country, and then grouping the results by mission_country." +}, { + "id": "421", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the total number of chemical substances produced in India per month for the past 2 years.", + "sql_context": "CREATE TABLE production (id INT, country VARCHAR(255), production_amount FLOAT, production_date DATE);", + "sql": "SELECT country, DATE_FORMAT(production_date, \u0027%Y-%m\u0027) as month, SUM(production_amount) as total_production FROM production WHERE country \u003d \u0027India\u0027 AND production_date \u003e DATE_SUB(CURDATE(), INTERVAL 2 YEAR) GROUP BY country, month;", + "sql_explanation": "1. Filter rows based on country and production_date. 2. Group remaining rows by the country and month columns. 3. Apply the SUM function to the production_amount column for each group. 4. Use the DATE_FORMAT function to extract the year and month from production_date." +}, { + "id": "554", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average weight of chemical substances in the pharmaceutical category, manufactured in the United Kingdom in the last 6 months.", + "sql_context": "CREATE TABLE chemicals (id INT, name VARCHAR(255), weight FLOAT, manufacturer_country VARCHAR(255), category VARCHAR(255), production_date DATE);", + "sql": "SELECT category, AVG(weight) as avg_weight FROM chemicals WHERE manufacturer_country \u003d \u0027United Kingdom\u0027 AND category \u003d \u0027pharmaceutical\u0027 AND production_date \u003e DATE_SUB(CURDATE(), INTERVAL 6 MONTH) GROUP BY category;", + "sql_explanation": "1. Filter rows based on manufacturer_country, category, and production_date. 2. Group remaining rows by the category column. 3. Apply the AVG function to the weight column for each group." +}, { + "id": "751", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of safety incidents recorded in a month for chemical plants in the Northeast region?", + "sql_context": "CREATE TABLE incidents (id INT, plant TEXT, incident_date DATE, incident_type TEXT); INSERT INTO incidents (id, plant, incident_date, incident_type) VALUES (1, \u0027Northeast Plant 1\u0027, \u00272021-03-17\u0027, \u0027Leak\u0027), (2, \u0027Northeast Plant 2\u0027, \u00272021-05-09\u0027, \u0027Explosion\u0027);", + "sql": "SELECT MAX(EXTRACT(MONTH FROM incident_date)) AS max_month, MAX(EXTRACT(YEAR FROM incident_date)) AS max_year FROM incidents WHERE plant LIKE \u0027Northeast%\u0027 GROUP BY EXTRACT(YEAR FROM incident_date);", + "sql_explanation": "1. Filter incidents in Northeast plants. 2. Extract the month and year from incident dates. 3. Group incidents by year. 4. Find the maximum month across all years." +}, { + "id": "843", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 2 manufacturers with the highest total weight of chemicals produced, in the chemicals domain, for the year 2022", + "sql_context": "CREATE TABLE chemicals_by_year (manufacturer_id INT, manufacturer_name VARCHAR(50), year INT, weight FLOAT); INSERT INTO chemicals_by_year (manufacturer_id, manufacturer_name, year, weight) VALUES (1, \u0027ChemCo Mexico\u0027, 2022, 450.5), (2, \u0027Canadian Chemicals\u0027, 2022, 500.3), (3, \u0027Brazilian BioChem\u0027, 2022, 300.7), (4, \u0027Indian Innovative Chemicals\u0027, 2022, 600.5), (5, \u0027Chinese Chemicals Corp\u0027, 2022, 400.9);", + "sql": "SELECT manufacturer_id, manufacturer_name, SUM(weight) as total_weight FROM chemicals_by_year WHERE year \u003d 2022 GROUP BY manufacturer_id, manufacturer_name ORDER BY total_weight DESC LIMIT 2;", + "sql_explanation": "The SQL query identifies the top 2 manufacturers with the highest total weight of chemicals produced, in the chemicals domain, for the year 2022. It first filters the data for the year 2022 using the WHERE clause. Then, it calculates the total weight of chemicals produced by each manufacturer using the SUM function and GROUP BY clause. Finally, it orders the results based on the total weight in descending order and limits the results to the top 2 manufacturers using the ORDER BY and LIMIT clauses." +}, { + "id": "1212", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average weight of chemical substances manufactured in the US that were produced in the last 5 years, grouped by their respective categories?", + "sql_context": "CREATE TABLE chemicals (id INT, name VARCHAR(255), weight FLOAT, manufacturer_country VARCHAR(255), category VARCHAR(255), production_date DATE);", + "sql": "SELECT category, AVG(weight) as avg_weight FROM chemicals WHERE manufacturer_country \u003d \u0027USA\u0027 AND production_date \u003e DATE_SUB(CURDATE(), INTERVAL 5 YEAR) GROUP BY category;", + "sql_explanation": "1. Filter rows based on manufacturer_country and production_date. 2. Group remaining rows by the category column. 3. Apply the AVG function to the weight column for each group." +}, { + "id": "1600", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of safety incidents recorded for each production site in the past six months?", + "sql_context": "CREATE TABLE production_sites(id INT, site_name TEXT, safety_incidents INT, last_inspection_date DATE); INSERT INTO production_sites (id, site_name, safety_incidents, last_inspection_date) VALUES (1, \u0027Site A\u0027, 2, \u00272021-08-01\u0027), (2, \u0027Site B\u0027, 1, \u00272021-07-15\u0027);", + "sql": "SELECT site_name, MAX(safety_incidents) FROM production_sites WHERE last_inspection_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 6 MONTH) GROUP BY site_name;", + "sql_explanation": "This query calculates the maximum number of safety incidents recorded for each production site in the past six months by filtering the rows in the production_sites table where the last_inspection_date is within the last six months, grouping the results by site_name, and then selecting the site_name and the maximum value of the safety_incidents column." +}, { + "id": "2500", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the number of safety incidents for each chemical type, ordered from most to least incidents?", + "sql_context": "CREATE TABLE safety_incidents (chemical_type VARCHAR(255), incident_date DATE); INSERT INTO safety_incidents (chemical_type, incident_date) VALUES (\u0027Type A\u0027, \u00272020-01-05\u0027), (\u0027Type A\u0027, \u00272020-03-12\u0027), (\u0027Type B\u0027, \u00272020-02-18\u0027), (\u0027Type C\u0027, \u00272020-01-02\u0027), (\u0027Type C\u0027, \u00272020-04-20\u0027), (\u0027Type D\u0027, \u00272020-03-03\u0027);", + "sql": "SELECT chemical_type, COUNT(*) as incident_count FROM safety_incidents GROUP BY chemical_type ORDER BY incident_count DESC", + "sql_explanation": "This query groups safety incidents by chemical type and calculates the count of incidents for each type. It then orders the results from most to least incidents, providing insight into which chemical types have the most safety concerns." +}, { + "id": "3342", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of safety incidents in each manufacturing plant, grouped by the state?", + "sql_context": "CREATE TABLE ManufacturingPlants (PlantID INT, PlantName TEXT, State TEXT, SafetyIncidents INT); INSERT INTO ManufacturingPlants (PlantID, PlantName, State, SafetyIncidents) VALUES (1, \u0027Plant A\u0027, \u0027Texas\u0027, 3), (2, \u0027Plant B\u0027, \u0027California\u0027, 2), (3, \u0027Plant C\u0027, \u0027Texas\u0027, 1), (4, \u0027Plant D\u0027, \u0027California\u0027, 5);", + "sql": "SELECT State, SUM(SafetyIncidents) AS TotalSafetyIncidents FROM ManufacturingPlants GROUP BY State;", + "sql_explanation": "This query lists the number of safety incidents in each manufacturing plant, grouped by the state. It groups the ManufacturingPlants table by State, and calculates the sum of the SafetyIncidents column from the filtered rows. Finally, it returns the State and TotalSafetyIncidents columns from the grouped rows." +}, { + "id": "3350", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average production quantity of chemical \u0027C456\u0027 in each country", + "sql_context": "CREATE TABLE chemical_production (id INT PRIMARY KEY, chemical_id VARCHAR(10), quantity INT, country VARCHAR(50)); INSERT INTO chemical_production (id, chemical_id, quantity, country) VALUES (1, \u0027C123\u0027, 500, \u0027USA\u0027), (2, \u0027C456\u0027, 300, \u0027Canada\u0027), (3, \u0027C123\u0027, 100, \u0027Germany\u0027), (4, \u0027C456\u0027, 250, \u0027USA\u0027), (5, \u0027C456\u0027, 350, \u0027Canada\u0027);", + "sql": "SELECT country, AVG(quantity) FROM chemical_production WHERE chemical_id \u003d \u0027C456\u0027 GROUP BY country;", + "sql_explanation": "This query calculates the average production quantity of chemical \u0027C456\u0027 in each country by grouping by country and calculating the average quantity where chemical_id is \u0027C456\u0027." +}, { + "id": "4030", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum amount of waste generated per unit of product in the waste_generation_per_product table?", + "sql_context": "CREATE TABLE waste_generation_per_product (product VARCHAR(255), waste_amount FLOAT);", + "sql": "SELECT product, MAX(waste_amount) FROM waste_generation_per_product GROUP BY product;", + "sql_explanation": "The SQL query calculates the maximum amount of waste generated per unit of product in the waste_generation_per_product table by using the MAX function and the GROUP BY clause." +}, { + "id": "1026", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which cruelty-free brand has the highest-rated mascara in the UK?", + "sql_context": "CREATE TABLE cosmetics.mascara_reviews (review_id INT, product_name VARCHAR(50), brand VARCHAR(50), cruelty_free BOOLEAN, rating INT); INSERT INTO cosmetics.mascara_reviews (review_id, product_name, brand, cruelty_free, rating) VALUES (1, \u0027Lengthening Mascara\u0027, \u0027The Body Shop\u0027, true, 4), (2, \u0027Volumizing Mascara\u0027, \u0027Urban Decay\u0027, false, 5), (3, \u0027Waterproof Mascara\u0027, \u0027NYX\u0027, true, 3), (4, \u0027Organic Mascara\u0027, \u0027EcoTools\u0027, true, 4), (5, \u0027Tinted Mascara\u0027, \u0027Benefit\u0027, false, 5);", + "sql": "SELECT brand, AVG(rating) as avg_rating FROM cosmetics.mascara_reviews WHERE product_name LIKE \u0027%mascara%\u0027 AND cruelty_free \u003d true GROUP BY brand ORDER BY avg_rating DESC LIMIT 1;", + "sql_explanation": "The SQL query calculates the average rating for cruelty-free mascaras from the mascara_reviews table. It filters for cruelty-free mascaras. Then, it groups the results by brand and orders them in descending order by the average rating. Finally, it limits the results to the brand with the highest average rating." +}, { + "id": "1450", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What percentage of cosmetic products from India have a safety rating above 4.5?", + "sql_context": "CREATE TABLE product_safety (product_name VARCHAR(100), launch_year INT, safety_rating DECIMAL(3,2), country VARCHAR(100)); INSERT INTO product_safety (product_name, launch_year, safety_rating, country) VALUES (\u0027Lush Cleanser\u0027, 2020, 4.8, \u0027India\u0027), (\u0027The Body Shop Moisturizer\u0027, 2020, 4.6, \u0027India\u0027), (\u0027Pacifica Serum\u0027, 2019, 4.9, \u0027USA\u0027), (\u0027Neem Oil\u0027, 2021, 4.6, \u0027India\u0027), (\u0027Turmeric\u0027, 2021, 4.7, \u0027India\u0027);", + "sql": "SELECT country, 100.0 * AVG(CASE WHEN safety_rating \u003e 4.5 THEN 1 ELSE 0 END) AS pct_safety_rating FROM product_safety WHERE country \u003d \u0027India\u0027 GROUP BY country;", + "sql_explanation": "This query first creates a table called product_safety and inserts 5 rows with product names, their launch years, their safety ratings, and their country. Then, it calculates the percentage of cosmetic products from India that have a safety rating above 4.5 and returns the country and percentage." +}, { + "id": "1571", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 5 countries with the most conventional ingredients sourced.", + "sql_context": "CREATE TABLE ingredients (ingredient_id INT, ingredient_name TEXT, organic TEXT, product_id INT, country TEXT); INSERT INTO ingredients VALUES (1, \u0027Jojoba Oil\u0027, \u0027Organic\u0027, 1, \u0027Mexico\u0027), (2, \u0027Shea Butter\u0027, \u0027Organic\u0027, 2, \u0027Ghana\u0027), (3, \u0027Aloe Vera\u0027, \u0027Organic\u0027, 3, \u0027Mexico\u0027), (4, \u0027Rosehip Oil\u0027, \u0027Organic\u0027, 4, \u0027Chile\u0027), (5, \u0027Cocoa Butter\u0027, \u0027Conventional\u0027, 5, \u0027Ghana\u0027);", + "sql": "SELECT country, COUNT(*) as conventional_count FROM ingredients WHERE organic \u003d \u0027Conventional\u0027 GROUP BY country ORDER BY conventional_count DESC LIMIT 5;", + "sql_explanation": "The SQL query filters the ingredients table to only include rows where organic is \u0027Conventional\u0027, groups the result by the country column, calculates the count of rows for each group, orders the result by the count in descending order, and limits the output to the top 5 records." +}, { + "id": "2204", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many consumer complaints were there for each cosmetic product in 2021?", + "sql_context": "CREATE TABLE Consumer_Complaints (ComplaintID INT, ProductID INT, ComplaintDate DATE); INSERT INTO Consumer_Complaints (ComplaintID, ProductID, ComplaintDate) VALUES (1, 101, \u00272021-01-01\u0027), (2, 102, \u00272021-02-01\u0027), (3, 101, \u00272021-03-01\u0027), (4, 103, \u00272021-04-01\u0027), (5, 102, \u00272021-05-01\u0027), (6, 101, \u00272021-06-01\u0027);", + "sql": "SELECT ProductID, COUNT(*) as Complaints FROM Consumer_Complaints WHERE EXTRACT(YEAR FROM ComplaintDate) \u003d 2021 GROUP BY ProductID;", + "sql_explanation": "The SQL query counts the number of consumer complaints for each cosmetic product in 2021 by extracting the year from the complaint date and grouping the complaints by product ID. It then counts the number of complaints for each product ID." +}, { + "id": "2235", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show countries with the highest number of safety incidents in 2022.", + "sql_context": "CREATE TABLE safety_incident (id INT, product_id INT, country VARCHAR(50), year INT, PRIMARY KEY (id)); INSERT INTO safety_incident (id, product_id, country, year) VALUES (1, 1, \u0027USA\u0027, 2022), (2, 2, \u0027Canada\u0027, 2022), (3, 3, \u0027Mexico\u0027, 2022);", + "sql": "SELECT country, COUNT(*) as incidents_count FROM safety_incident WHERE year \u003d 2022 GROUP BY country ORDER BY incidents_count DESC;", + "sql_explanation": "This query selects the country and the count of safety incidents from the safety_incident table where the year is 2022, groups the results by country, and orders them in descending order by the count of incidents to show the countries with the highest number of safety incidents." +}, { + "id": "2584", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total incident count for each product in a specific year.", + "sql_context": "CREATE TABLE ProductSafety (id INT, product_id INT, year INT, incident_count INT); INSERT INTO ProductSafety (id, product_id, year, incident_count) VALUES (1, 1, 2020, 2), (2, 1, 2019, 1), (3, 2, 2020, 0), (4, 2, 2019, 3), (5, 3, 2020, 1), (6, 3, 2019, 4);", + "sql": "SELECT product_id, SUM(incident_count) as total_incident_count FROM ProductSafety WHERE year \u003d 2020 GROUP BY product_id;", + "sql_explanation": "This query calculates the total incident count for each product in a specific year. It uses the SUM function to find the total incident count, and groups the results by product_id. The query filters the results to only include rows with a year of 2020." +}, { + "id": "3361", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average rating for cruelty-free products in each brand in the database?", + "sql_context": "CREATE TABLE Brand_Rating (id INT, brand VARCHAR(255), product VARCHAR(255), rating INT, cruelty_free BOOLEAN); INSERT INTO Brand_Rating (id, brand, product, rating, cruelty_free) VALUES (1, \u0027Lush\u0027, \u0027Soak Stimulant Bath Bomb\u0027, 5, true), (2, \u0027The Body Shop\u0027, \u0027Born Lippy Strawberry Lip Balm\u0027, 4, true), (3, \u0027Estee Lauder\u0027, \u0027Advanced Night Repair Synchronized Recovery Complex II\u0027, 5, false), (4, \u0027Lush\u0027, \u0027Angels on Bare Skin Cleanser\u0027, 4, true), (5, \u0027The Body Shop\u0027, \u0027Tea Tree Skin Clearing Facial Wash\u0027, 3, true);", + "sql": "SELECT brand, AVG(rating) as avg_rating FROM Brand_Rating WHERE cruelty_free \u003d true GROUP BY brand;", + "sql_explanation": "This query calculates the average rating for cruelty-free products for each brand in the Brand_Rating table. It filters the rows where the cruelty_free column is true, groups the results by brand, and calculates the average rating for each group." +}, { + "id": "4380", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the number of organic and non-organic ingredients used in cosmetic products.", + "sql_context": "CREATE TABLE ingredients (ingredient_id INT, organic BOOLEAN, product_id INT);", + "sql": "SELECT organic, COUNT(*) as num_ingredients FROM ingredients GROUP BY organic;", + "sql_explanation": "The query groups the ingredients by organic status and calculates the number of organic and non-organic ingredients used in cosmetic products." +}, { + "id": "745", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of professional development courses completed by teachers in the past year, broken down by their years of experience?", + "sql_context": "CREATE TABLE teachers (teacher_id INT, years_of_experience INT, professional_development_course_completion_date DATE); INSERT INTO teachers (teacher_id, years_of_experience, professional_development_course_completion_date) VALUES (1, 5, \u00272022-01-01\u0027), (2, 10, \u00272021-12-15\u0027), (3, 2, \u00272022-03-05\u0027);", + "sql": "SELECT years_of_experience, AVG(COUNT(*)) as avg_courses FROM teachers WHERE professional_development_course_completion_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) GROUP BY years_of_experience;", + "sql_explanation": "This SQL query calculates the average number of professional development courses completed by teachers in the past year, broken down by their years of experience. It first filters the records where the professional development course completion date is within the past year, and then groups the results by years_of_experience. It then calculates the number of courses completed by each teacher for each year of experience using the COUNT() function, and finally calculates the average number of courses using the AVG() function." +}, { + "id": "756", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average mental health score of students per school in the last year?", + "sql_context": "CREATE TABLE student_mental_health (student_id INT, school_id INT, mental_health_score INT, date DATE); INSERT INTO student_mental_health (student_id, school_id, mental_health_score, date) VALUES (1, 101, 75, \u00272021-09-01\u0027); INSERT INTO student_mental_health (student_id, school_id, mental_health_score, date) VALUES (2, 101, 80, \u00272021-09-02\u0027);", + "sql": "SELECT school_id, AVG(mental_health_score) as avg_mental_health_score FROM student_mental_health WHERE date \u003e\u003d DATEADD(year, -1, GETDATE()) GROUP BY school_id ORDER BY avg_mental_health_score DESC;", + "sql_explanation": "This query calculates the average mental health score for each school in the past year. It does this by filtering the student_mental_health table for dates within the past year, then grouping by school_id and calculating the average mental health score for each group using the AVG function." +}, { + "id": "1096", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many students have passed the exam in each subject area in the last academic year?", + "sql_context": "CREATE TABLE exam_results (student_id INT, subject_area VARCHAR(50), passed BOOLEAN, exam_date DATE); INSERT INTO exam_results (student_id, subject_area, passed, exam_date) VALUES (1, \u0027Mathematics\u0027, true, \u00272021-12-01\u0027), (2, \u0027Mathematics\u0027, false, \u00272021-11-01\u0027), (3, \u0027Science\u0027, true, \u00272022-02-01\u0027), (4, \u0027Science\u0027, false, \u00272021-09-01\u0027);", + "sql": "SELECT subject_area, COUNT(student_id) as num_students_passed FROM exam_results WHERE exam_date \u003e\u003d DATEADD(year, -1, CURRENT_TIMESTAMP) AND passed \u003d true GROUP BY subject_area;", + "sql_explanation": "Calculate the number of students who have passed the exam in each subject area in the last academic year by partitioning the data by subject_area and aggregating with COUNT function. Only consider the exam results with passed \u003d true." +}, { + "id": "1131", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of students who passed the lifelong learning exam in each country in the last year?", + "sql_context": "CREATE TABLE student_exam_results (student_id INT, country VARCHAR(255), exam_id INT, pass INT); INSERT INTO student_exam_results VALUES (1, \u0027USA\u0027, 1, 1), (2, \u0027Canada\u0027, 1, 1), (3, \u0027USA\u0027, 1, 0);", + "sql": "SELECT country, COUNT(*) * 100.0 / SUM(COUNT(*)) OVER() as pass_percentage FROM student_exam_results WHERE pass \u003d 1 AND date \u003e\u003d DATEADD(year, -1, GETDATE()) GROUP BY country;", + "sql_explanation": "The SQL query calculates the percentage of students who passed the lifelong learning exam in each country by using a GROUP BY clause to group the exam results by country and a COUNT aggregate function to count the number of passing exams for each country. It then uses a window function to calculate the total number of exams and calculates the percentage of passing exams for each country." +}, { + "id": "1458", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of open pedagogy projects completed per institution in the last year?", + "sql_context": "CREATE TABLE open_pedagogy_projects (project_id INT, institution_id INT, completion_date DATE);", + "sql": "SELECT institution_id, COUNT(project_id) FROM open_pedagogy_projects WHERE completion_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) GROUP BY institution_id;", + "sql_explanation": "This query calculates the total number of open pedagogy projects completed per institution in the last year. It groups the records by institution_id and counts the number of projects for each group, where the completion date is within the last year." +}, { + "id": "1512", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the total number of teacher trainings per month in 2021", + "sql_context": "CREATE TABLE teacher_training (id INT PRIMARY KEY, teacher_id INT, training_type VARCHAR(255), completed_date DATE);", + "sql": "SELECT DATE_FORMAT(completed_date, \u0027%Y-%m\u0027) AS month, COUNT(*) AS total_trainings FROM teacher_training WHERE YEAR(completed_date) \u003d 2021 GROUP BY month;", + "sql_explanation": "The query extracts the year and month from the completed_date column and groups the results by month. It then counts the number of rows (trainings) for each month and returns the total number of teacher trainings per month in 2021." +}, { + "id": "1613", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of lifelong learning credits earned by a student in each program in the last month?", + "sql_context": "CREATE TABLE lifelong_learning_credits (student_id INT, program_id INT, credits_earned INT, earned_date DATE);", + "sql": "SELECT program_id, MAX(credits_earned) FROM lifelong_learning_credits WHERE earned_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH) GROUP BY program_id;", + "sql_explanation": "This query calculates the maximum number of lifelong learning credits earned by a student in each program in the last month. It groups the records by program_id and calculates the maximum number of credits earned for each group, where the earned date is within the last month." +}, { + "id": "1694", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of students who prefer open pedagogy in each school district?", + "sql_context": "CREATE TABLE student_preference (student_id INT, district_id INT, preference VARCHAR(10)); INSERT INTO student_preference (student_id, district_id, preference) VALUES (1, 101, \u0027open\u0027), (2, 101, \u0027traditional\u0027), (3, 102, \u0027open\u0027), (4, 102, \u0027open\u0027), (5, 103, \u0027traditional\u0027);", + "sql": "SELECT district_id, 100.0 * SUM(CASE WHEN preference \u003d \u0027open\u0027 THEN 1 ELSE 0 END) / COUNT(*) AS pct_open FROM student_preference GROUP BY district_id;", + "sql_explanation": "Calculate the percentage of students who prefer open pedagogy by grouping the records by district_id and then applying the SUM and COUNT functions with a CASE statement to filter and count the relevant records." +}, { + "id": "1712", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the most common mental health score for students in each school?", + "sql_context": "CREATE TABLE student_school (student_id INT, school_id INT, mental_health_score INT); INSERT INTO student_school (student_id, school_id, mental_health_score) VALUES (1, 100, 75), (2, 100, 80), (3, 101, 70);", + "sql": "SELECT school_id, mental_health_score, COUNT(*) as count FROM student_school GROUP BY school_id, mental_health_score ORDER BY school_id, count DESC;", + "sql_explanation": "Identify the most common mental health score for students in each school by grouping student_school by school_id and mental_health_score, and ordering by school_id and count in descending order." +}, { + "id": "1827", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of teachers who have attended a workshop on teacher professional development by gender?", + "sql_context": "CREATE TABLE Teachers (TeacherID INT, Age INT, Gender VARCHAR(10), WorkshopAttended VARCHAR(20)); INSERT INTO Teachers (TeacherID, Age, Gender, WorkshopAttended) VALUES (1, 45, \u0027Female\u0027, \u0027Teacher Professional Development\u0027); INSERT INTO Teachers (TeacherID, Age, Gender, WorkshopAttended) VALUES (2, 35, \u0027Male\u0027, \u0027No\u0027); INSERT INTO Teachers (TeacherID, Age, Gender, WorkshopAttended) VALUES (3, 50, \u0027Female\u0027, \u0027Yes\u0027); INSERT INTO Teachers (TeacherID, Age, Gender, WorkshopAttended) VALUES (4, 40, \u0027Male\u0027, \u0027Teacher Professional Development\u0027);", + "sql": "SELECT Gender, (COUNT(*) FILTER (WHERE WorkshopAttended \u003d \u0027Teacher Professional Development\u0027)) * 100.0 / COUNT(*) FROM Teachers GROUP BY Gender;", + "sql_explanation": "The SQL query calculates the percentage of teachers who have attended a workshop on teacher professional development by gender by filtering the Teachers table based on the WorkshopAttended column, grouping the results by the Gender column, and then calculating the percentage using the COUNT function with the FILTER clause." +}, { + "id": "1885", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum lifelong learning score for each student, grouped by subject?", + "sql_context": "CREATE TABLE student_lifelong_learning (student_id INT, subject VARCHAR(255), lifelong_learning_score INT);", + "sql": "SELECT s.student_id, s.subject, MIN(s.lifelong_learning_score) as min_score FROM student_lifelong_learning s GROUP BY s.student_id, s.subject;", + "sql_explanation": "This query groups the student_lifelong_learning table by student_id and subject and calculates the minimum lifelong_learning_score for each group." +}, { + "id": "1906", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of hours spent on professional development by a teacher in a single month?", + "sql_context": "CREATE TABLE teacher_pd_hours (teacher_id INT, date DATE, hours_spent DECIMAL(5,2)); INSERT INTO teacher_pd_hours (teacher_id, date, hours_spent) VALUES (1001, \u00272022-01-01\u0027, 2.5), (1001, \u00272022-02-15\u0027, 3.0), (1002, \u00272022-03-10\u0027, 6.0), (1003, \u00272022-04-01\u0027, 4.0);", + "sql": "SELECT MAX(hours_spent) as max_hours_spent FROM teacher_pd_hours WHERE date \u003e\u003d DATEADD(month, -1, GETDATE()) GROUP BY MONTH(date), YEAR(date);", + "sql_explanation": "This query calculates the maximum number of hours spent on professional development by a teacher in a single month. It filters the data to only include records for hours spent on or after 1 month prior to the current date, and then calculates the maximum number of hours spent for these records grouped by the month and year." +}, { + "id": "2358", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the highest-rated open pedagogy resource by users?", + "sql_context": "CREATE TABLE open_pedagogy (resource_id INT, rating INT, user_id INT); INSERT INTO open_pedagogy (resource_id, rating, user_id) VALUES (1, 5, 100), (2, 4, 100), (3, 3, 101), (4, 5, 102);", + "sql": "SELECT resource_id, MAX(rating) as highest_rating FROM open_pedagogy GROUP BY resource_id ORDER BY highest_rating DESC LIMIT 1;", + "sql_explanation": "Identify the open pedagogy resource with the highest rating. The MAX function finds the highest rating, and LIMIT 1 returns only one row with the highest value." +}, { + "id": "2367", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of hours spent by students in online learning in each district, only showing districts with a total of over 500 hours?", + "sql_context": "CREATE TABLE students (student_id INT, district_id INT, num_hours_online_learning INT); INSERT INTO students (student_id, district_id, num_hours_online_learning) VALUES (1, 1, 100), (2, 1, 120), (3, 1, 150), (4, 2, 75), (5, 2, 80), (6, 2, 100), (7, 3, 125), (8, 3, 130), (9, 3, 150);", + "sql": "SELECT district_id, SUM(num_hours_online_learning) as total_hours FROM students GROUP BY district_id HAVING total_hours \u003e 500;", + "sql_explanation": "This query calculates the total number of hours spent by students in online learning in each district using the SUM function and the GROUP BY clause. The HAVING clause is used to filter districts with a total of over 500 hours." +}, { + "id": "2412", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum mental health score of students who identify as Indigenous, grouped by their ethnicity?", + "sql_context": "CREATE TABLE students (student_id INT, ethnicity VARCHAR(255), mental_health_score INT); INSERT INTO students (student_id, ethnicity, mental_health_score) VALUES (1, \u0027Native American\u0027, 80), (2, \u0027Latino\u0027, 70), (3, \u0027Indigenous Australian\u0027, 90);", + "sql": "SELECT ethnicity, MIN(mental_health_score) as min_score FROM students WHERE ethnicity LIKE \u0027%Indigenous%\u0027 GROUP BY ethnicity;", + "sql_explanation": "This SQL query calculates the minimum mental health score of students who identify as Indigenous, grouped by their ethnicity. It first filters the records where the ethnicity contains the word \u0027Indigenous\u0027, and then groups the results by ethnicity. Finally, it calculates the minimum mental health score for each ethnicity using the MIN() function." +}, { + "id": "2429", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of lifetime learning hours for each student in 2019?", + "sql_context": "CREATE TABLE lifetime_learning (student_id INT, year INT, learning_hours INT); INSERT INTO lifetime_learning (student_id, year, learning_hours) VALUES (1, 2018, 50), (1, 2019, 60), (2, 2018, 70), (2, 2019, 80), (3, 2019, 90);", + "sql": "SELECT student_id, SUM(learning_hours) as total_learning_hours FROM lifetime_learning WHERE year \u003d 2019 GROUP BY student_id;", + "sql_explanation": "This SQL query calculates the total number of lifetime learning hours for each student in 2019 by selecting student_id and the sum of learning_hours from the lifetime_learning table, filtering the results based on year, and grouping the results by student_id." +}, { + "id": "2445", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum mental health score of students in each department?", + "sql_context": "CREATE TABLE student_mental_health (student_id INT, department_id INT, mental_health_score INT);", + "sql": "SELECT department_id, MIN(mental_health_score) as min_mental_health_score FROM student_mental_health GROUP BY department_id;", + "sql_explanation": "This SQL query calculates the minimum mental health score of students in each department. It uses the MIN aggregation function to find the minimum score and the GROUP BY clause to group the results by department_id." +}, { + "id": "2553", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average open pedagogy participation score for students in each grade level?", + "sql_context": "CREATE TABLE student_open_pedagogy (student_id INT, grade_level INT, participation_score INT); INSERT INTO student_open_pedagogy (student_id, grade_level, participation_score) VALUES (1, 6, 85), (2, 6, 90), (3, 7, 75), (4, 7, 80), (5, 8, 95);", + "sql": "SELECT grade_level, AVG(participation_score) as avg_participation_score FROM student_open_pedagogy GROUP BY grade_level;", + "sql_explanation": "The SQL query calculates the average open pedagogy participation score (avg_participation_score) for students in each grade level (grouped by grade_level) using the AVG function." +}, { + "id": "2599", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the most common mental health issue among students?", + "sql_context": "CREATE TABLE students (id INT, name TEXT, gender TEXT, mental_health_issues TEXT); INSERT INTO students (id, name, gender, mental_health_issues) VALUES (1, \u0027Alice\u0027, \u0027Female\u0027, \u0027Anxiety, Depression\u0027); INSERT INTO students (id, name, gender, mental_health_issues) VALUES (2, \u0027Bob\u0027, \u0027Male\u0027, \u0027Anxiety\u0027); INSERT INTO students (id, name, gender, mental_health_issues) VALUES (3, \u0027Charlie\u0027, \u0027Non-binary\u0027, \u0027Depression\u0027);", + "sql": "SELECT mental_health_issues, COUNT(*) AS count FROM students GROUP BY mental_health_issues ORDER BY count DESC LIMIT 1;", + "sql_explanation": "First, we group the records by the mental_health_issues column and calculate the count of each group. Then, we order the groups by the count in descending order and limit the result to the top 1 group. Finally, we return the mental_health_issues and the count of the top group." +}, { + "id": "2610", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of teachers who have not completed any professional development courses in the \u0027teacher_development\u0027 table, grouped by their teaching subject.", + "sql_context": "CREATE TABLE teacher_development (teacher_id INT, subject_teached VARCHAR(30), course_completed INT);", + "sql": "SELECT subject_teached, COUNT(teacher_id) FROM teacher_development WHERE course_completed \u003d 0 GROUP BY subject_teached;", + "sql_explanation": "1. Selects the subject_teached column and the teacher ID column, and applies the COUNT function to get the number of teachers. 2. Filters rows based on the course_completed column being 0 (indicating no completed courses). 3. Groups the results by the teaching subject. 4. Returns the number of teachers who have not completed any professional development courses, grouped by their teaching subject." +}, { + "id": "2756", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of lifelong learning credits earned by students per school, from the \"students_lifelong_learning\" table?", + "sql_context": "CREATE TABLE students_lifelong_learning (student_id INT, school_id INT, lifelong_learning_credits INT);", + "sql": "SELECT school_id, MAX(lifelong_learning_credits) as max_credits FROM students_lifelong_learning GROUP BY school_id;", + "sql_explanation": "Find the maximum number of lifelong learning credits earned by students per school by grouping by school_id and applying the MAX function to lifelong_learning_credits." +}, { + "id": "2771", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average mental health score of students per district?", + "sql_context": "CREATE TABLE schools (school_id INT, district_id INT, mental_health_score INT); INSERT INTO schools (school_id, district_id, mental_health_score) VALUES (1001, 1, 75), (1002, 1, 80), (1003, 2, 70);", + "sql": "SELECT s.district_id, AVG(s.mental_health_score) as avg_mental_health_score FROM schools s GROUP BY s.district_id;", + "sql_explanation": "This SQL query calculates the average mental health score of students per district by selecting the district_id and the average mental_health_score from the schools table, then grouping the results by district_id." +}, { + "id": "2778", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of unique teachers who have led open pedagogy workshops in each country.", + "sql_context": "CREATE TABLE teachers (teacher_id INT, country VARCHAR(50), led_open_pedagogy_workshop BOOLEAN); INSERT INTO teachers (teacher_id, country, led_open_pedagogy_workshop) VALUES (1, \u0027USA\u0027, true), (2, \u0027Canada\u0027, false), (3, \u0027Mexico\u0027, true);", + "sql": "SELECT country, COUNT(DISTINCT teacher_id) FROM teachers WHERE led_open_pedagogy_workshop \u003d true GROUP BY country;", + "sql_explanation": "This query counts the number of unique teachers (COUNT DISTINCT teacher_id) who have led open pedagogy workshops (led_open_pedagogy_workshop \u003d true) in each country (GROUP BY country)." +}, { + "id": "2823", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of professional development courses completed by teachers in each region, sorted by the total number of courses completed?", + "sql_context": "CREATE TABLE teachers (teacher_id INT, teacher_name VARCHAR(50), region VARCHAR(20), courses_completed INT); INSERT INTO teachers (teacher_id, teacher_name, region, courses_completed) VALUES (1, \u0027John Doe\u0027, \u0027North\u0027, 3), (2, \u0027Jane Smith\u0027, \u0027South\u0027, 5), (3, \u0027Alice Johnson\u0027, \u0027East\u0027, 4), (4, \u0027Bob Williams\u0027, \u0027West\u0027, 2);", + "sql": "SELECT region, SUM(courses_completed) as total_courses FROM teachers GROUP BY region ORDER BY total_courses DESC;", + "sql_explanation": "This SQL query calculates the total number of professional development courses completed by teachers in each region and sorts the data by the total number of courses completed in descending order. It groups the data by the \u0027region\u0027 column and calculates the total number of courses completed for each group using the SUM() function. It then sorts the data by the total number of courses completed using the ORDER BY clause and the DESC keyword." +}, { + "id": "3267", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of students who have a mental health score above 80 in the \u0027student_scores\u0027 table, grouped by their school type.", + "sql_context": "CREATE TABLE student_scores (student_id INT, school_type VARCHAR(10), mental_health_score INT);", + "sql": "SELECT school_type, COUNT(*) FROM student_scores WHERE mental_health_score \u003e 80 GROUP BY school_type;", + "sql_explanation": "1. Selects the school_type column and applies the COUNT function to get the total number of rows. 2. Filters rows based on the mental_health_score column being greater than 80. 3. Groups the results by the school type. 4. Returns the number of students who have a mental health score above 80, grouped by their school type." +}, { + "id": "3756", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many professional development courses were completed by teachers from the \"teachers\" table, grouped by subject area?", + "sql_context": "CREATE TABLE teachers (teacher_id INT, subject_area VARCHAR(255), professional_development_courses INT);", + "sql": "SELECT subject_area, COUNT(teacher_id) as num_courses FROM teachers GROUP BY subject_area;", + "sql_explanation": "Count the number of professional development courses completed by teachers for each subject area by grouping by subject_area and applying the COUNT function to teacher_id." +}, { + "id": "3823", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of students by ethnicity in open pedagogy schools?", + "sql_context": "CREATE TABLE open_pedagogy_schools (id INT, school_name VARCHAR(50), student_ethnicity VARCHAR(50)); INSERT INTO open_pedagogy_schools (id, school_name, student_ethnicity) VALUES (1, \u0027Innovate School\u0027, \u0027Hispanic\u0027), (2, \u0027Open Learning School\u0027, \u0027Asian\u0027);", + "sql": "SELECT student_ethnicity, COUNT(*) FROM open_pedagogy_schools GROUP BY student_ethnicity;", + "sql_explanation": "Determine the distribution of students by ethnicity in open pedagogy schools by grouping the \u0027open_pedagogy_schools\u0027 table by the \u0027student_ethnicity\u0027 column and applying the COUNT function." +}, { + "id": "3926", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of teachers who attended a professional development event in each region?", + "sql_context": "CREATE TABLE teachers (teacher_id INT, region VARCHAR(20), event_attended BOOLEAN); INSERT INTO teachers (teacher_id, region, event_attended) VALUES (1, \u0027North\u0027, true), (2, \u0027North\u0027, false), (3, \u0027South\u0027, true);", + "sql": "SELECT region, 100.0 * AVG(event_attended) as percentage FROM teachers GROUP BY region;", + "sql_explanation": "Calculate the percentage of teachers who attended an event in each region by grouping teachers by their region, taking the average of event_attended, and multiplying it by 100.0 to convert to percentage." +}, { + "id": "4119", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many students are enrolled in each school type (public, private, or charter) in the \u0027school_enrollment\u0027 table?", + "sql_context": "CREATE TABLE school_enrollment (school_id INT, student_count INT, school_type VARCHAR(10));", + "sql": "SELECT school_type, SUM(student_count) FROM school_enrollment GROUP BY school_type;", + "sql_explanation": "1. Selects the school_type column and the student count column, and applies the SUM function to get the total number of students. 2. Groups the results by the school type. 3. Returns the number of students enrolled in each school type (public, private, or charter) in the \u0027school_enrollment\u0027 table." +}, { + "id": "4322", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of male and female teachers in each school", + "sql_context": "SELECT School, Gender, COUNT(*) as Count FROM Teachers GROUP BY School, Gender;", + "sql": "SELECT School, Gender, COUNT(*) as Count FROM Teachers GROUP BY School, Gender;", + "sql_explanation": "This SQL query groups teachers by school and gender, and counts the number of teachers in each group." +}, { + "id": "4367", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average mental health score of students by race?", + "sql_context": "CREATE TABLE students (student_id INT, student_name VARCHAR(50), race VARCHAR(10), mental_health_score INT); INSERT INTO students (student_id, student_name, race, mental_health_score) VALUES (1, \u0027John Doe\u0027, \u0027Asian\u0027, 75), (2, \u0027Jane Smith\u0027, \u0027Black\u0027, 85), (3, \u0027Alice Johnson\u0027, \u0027White\u0027, 80), (4, \u0027Bob Lee\u0027, \u0027Hispanic\u0027, 88);", + "sql": "SELECT race, AVG(mental_health_score) as avg_score FROM students GROUP BY race;", + "sql_explanation": "This SQL query calculates the average mental health score of students by race. It groups the records by the \u0027race\u0027 column and calculates the average \u0027mental_health_score\u0027 for each group." +}, { + "id": "5550", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of male and female students in the \u0027Student\u0027 table", + "sql_context": "CREATE TABLE Student (StudentID INT, Gender VARCHAR(10)); INSERT INTO Student (StudentID, Gender) VALUES (1, \u0027Male\u0027), (2, \u0027Female\u0027), (3, \u0027Male\u0027);", + "sql": "SELECT Gender, COUNT(*) FROM Student GROUP BY Gender;", + "sql_explanation": "This SQL query groups the Student table by the Gender column and calculates the count of students for each group." +}, { + "id": "1912", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the most common medium used in the \u0027Contemporary\u0027 gallery?", + "sql_context": "CREATE TABLE Artworks (artwork_id INT, artwork_name VARCHAR(50), medium VARCHAR(50), gallery_name VARCHAR(50)); INSERT INTO Artworks (artwork_id, artwork_name, medium, gallery_name) VALUES (1, \u0027Untitled\u0027, \u0027Oil on canvas\u0027, \u0027Contemporary\u0027), (2, \u0027Untitled\u0027, \u0027Acrylic on canvas\u0027, \u0027Contemporary\u0027);", + "sql": "SELECT medium, COUNT(*) as medium_count FROM Artworks WHERE gallery_name \u003d \u0027Contemporary\u0027 GROUP BY medium ORDER BY medium_count DESC LIMIT 1;", + "sql_explanation": "The SQL query groups the Artworks table by medium and counts the number of artworks for each medium in the \u0027Contemporary\u0027 gallery. Then it orders the result set by the medium_count in descending order and limits the result set to the top medium." +}, { + "id": "2636", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the number of exhibitions hosted by the \u0027Louvre Museum\u0027 for each movement.", + "sql_context": "CREATE TABLE Exhibitions (exhibition_id INT, museum_name VARCHAR(255), movement VARCHAR(255), exhibition_year INT);", + "sql": "SELECT movement, COUNT(*) as exhibitions_count FROM Exhibitions WHERE museum_name \u003d \u0027Louvre Museum\u0027 GROUP BY movement;", + "sql_explanation": "This query retrieves the number of exhibitions hosted by the \u0027Louvre Museum\u0027 for each movement by selecting movement and counting the number of exhibitions where museum_name is \u0027Louvre Museum\u0027 and grouping the results by movement." +}, { + "id": "3368", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue generated by art auctions in the American region?", + "sql_context": "CREATE TABLE Auctions (AuctionID INT, AuctionName TEXT, Year INT, Region TEXT, Revenue DECIMAL(10,2)); INSERT INTO Auctions (AuctionID, AuctionName, Year, Region, Revenue) VALUES (1, \u0027Christie\u0027\u0027s New York\u0027, 2017, \u0027America\u0027, 5000000); INSERT INTO Auctions (AuctionID, AuctionName, Year, Region, Revenue) VALUES (2, \u0027Sotheby\u0027\u0027s London\u0027, 2018, \u0027Europe\u0027, 6000000);", + "sql": "SELECT Region, SUM(Revenue) as TotalRevenue FROM Auctions WHERE Region \u003d \u0027America\u0027 GROUP BY Region;", + "sql_explanation": "This SQL query calculates the total revenue generated by art auctions in the American region. It filters the records for auctions in America and calculates the sum of revenues for that region." +}, { + "id": "3940", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the average sale price for artworks in each art movement?", + "sql_context": "CREATE TABLE Artworks (Artwork VARCHAR(255), ArtMovement VARCHAR(255), SalePrice DECIMAL(10,2)); INSERT INTO Artworks (Artwork, ArtMovement, SalePrice) VALUES (\u0027Artwork 1\u0027, \u0027Post-Impressionism\u0027, 500.00), (\u0027Artwork 2\u0027, \u0027Post-Impressionism\u0027, 400.00), (\u0027Artwork 3\u0027, \u0027Pop Art\u0027, 750.00), (\u0027Artwork 4\u0027, \u0027Pop Art\u0027, 1000.00);", + "sql": "SELECT ArtMovement, AVG(SalePrice) as AvgSalePrice FROM Artworks GROUP BY ArtMovement;", + "sql_explanation": "This SQL query calculates the average sale price for artworks in each art movement. It does this by calculating the average sale price for each art movement using the AVG() function and grouping the results by the ArtMovement column." +}, { + "id": "266", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a view \u0027policyholders_by_age\u0027 that displays the number of policyholders by age range", + "sql_context": "CREATE TABLE policyholders (policyholder_id INT PRIMARY KEY, name VARCHAR(100), age INT, gender VARCHAR(10), city VARCHAR(50), state VARCHAR(50), zip_code VARCHAR(10));", + "sql": "CREATE VIEW policyholders_by_age AS SELECT CASE WHEN age \u003c 25 THEN \u00270-24\u0027 WHEN age \u003c 35 THEN \u002725-34\u0027 WHEN age \u003c 45 THEN \u002735-44\u0027 WHEN age \u003c 55 THEN \u002745-54\u0027 ELSE \u002755+\u0027 END as age_range, COUNT(*) as num_policyholders FROM policyholders GROUP BY age_range;", + "sql_explanation": "A view \u0027policyholders_by_age\u0027 is created to display the number of policyholders by age range. The \u0027CASE\u0027 statement categorizes ages into appropriate ranges, and the \u0027GROUP BY\u0027 clause counts the number of policyholders in each age range." +}, { + "id": "379", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show all policy records for policy type \u0027Renters\u0027 as separate columns for policy ID, effective date, and a column for each policy type value", + "sql_context": "CREATE TABLE policy (policy_id INT, policy_type VARCHAR(20), effective_date DATE); INSERT INTO policy VALUES (1, \u0027Renters\u0027, \u00272018-01-01\u0027); INSERT INTO policy VALUES (2, \u0027Personal Auto\u0027, \u00272020-01-01\u0027);", + "sql": "SELECT policy_id, effective_date, MAX(CASE WHEN policy_type \u003d \u0027Renters\u0027 THEN policy_type END) AS Renters, MAX(CASE WHEN policy_type \u003d \u0027Personal Auto\u0027 THEN policy_type END) AS Personal_Auto FROM policy GROUP BY policy_id, effective_date;", + "sql_explanation": "This query pivots the policy table to show all policy records for policy type \u0027Renters\u0027 as separate columns for policy ID and effective date, along with a column for each policy type value." +}, { + "id": "1392", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total claim amount for policyholder 1003 for the year 2021.", + "sql_context": "CREATE TABLE Claims (id INT, policyholder_id INT, claim_amount DECIMAL(10,2), claim_date DATE); INSERT INTO Claims (id, policyholder_id, claim_amount, claim_date) VALUES (3, 1003, 7000.00, \u00272021-03-20\u0027); INSERT INTO Claims (id, policyholder_id, claim_amount, claim_date) VALUES (4, 1003, 2000.00, \u00272021-08-12\u0027);", + "sql": "SELECT policyholder_id, SUM(claim_amount) FROM Claims WHERE policyholder_id \u003d 1003 AND claim_date BETWEEN \u00272021-01-01\u0027 AND \u00272021-12-31\u0027 GROUP BY policyholder_id;", + "sql_explanation": "This query calculates the total claim amount for policyholder 1003 in 2021 by filtering policyholder_id and claim_date, and grouping policyholder_id." +}, { + "id": "1629", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Count policyholders by gender and age group", + "sql_context": "CREATE TABLE policyholders (policyholder_id INT, first_name VARCHAR(20), last_name VARCHAR(20), email VARCHAR(30), date_of_birth DATE, gender ENUM(\u0027M\u0027, \u0027F\u0027)); INSERT INTO policyholders (policyholder_id, first_name, last_name, email, date_of_birth, gender) VALUES (1, \u0027John\u0027, \u0027Doe\u0027, \u0027johndoe@example.com\u0027, \u00271985-05-15\u0027, \u0027M\u0027), (2, \u0027Jane\u0027, \u0027Doe\u0027, \u0027janedoe@example.com\u0027, \u00271990-08-08\u0027, \u0027F\u0027), (3, \u0027Bob\u0027, \u0027Smith\u0027, \u0027bobsmith@example.com\u0027, \u00271976-11-12\u0027, \u0027M\u0027), (4, \u0027Alice\u0027, \u0027Johnson\u0027, \u0027alicejohnson@example.com\u0027, \u00271982-02-23\u0027, \u0027F\u0027);", + "sql": "SELECT FLOOR(DATEDIFF(CURDATE(), date_of_birth)/365) AS age_group, gender, COUNT(*) AS num_policyholders FROM policyholders GROUP BY age_group, gender;", + "sql_explanation": "The SQL query calculates age groups based on the difference between the current date and date_of_birth, then groups the result set by age_group and gender, counting the number of policyholders in each group." +}, { + "id": "1672", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total claim amount for policy type \u0027Auto\u0027 in the Claims department for all time?", + "sql_context": "CREATE TABLE Claims (ClaimID INT, PolicyType VARCHAR(20), ProcessingDepartment VARCHAR(20), ProcessingDate DATE, ClaimAmount INT); INSERT INTO Claims (ClaimID, PolicyType, ProcessingDepartment, ProcessingDate, ClaimAmount) VALUES (1, \u0027Auto\u0027, \u0027Claims\u0027, \u00272023-01-10\u0027, 5000), (2, \u0027Home\u0027, \u0027Risk Assessment\u0027, \u00272023-02-15\u0027, 20000);", + "sql": "SELECT PolicyType, SUM(ClaimAmount) as TotalClaimAmount FROM Claims WHERE ProcessingDepartment \u003d \u0027Claims\u0027 AND PolicyType \u003d \u0027Auto\u0027 GROUP BY PolicyType;", + "sql_explanation": "The SQL query calculates the total claim amount for policy type \u0027Auto\u0027 in the Claims department for all time by performing a SUM operation on the ClaimAmount column, filtering the records where ProcessingDepartment is equal to \u0027Claims\u0027 and PolicyType is equal to \u0027Auto\u0027, and grouping the results by PolicyType." +}, { + "id": "1986", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of policies for each policy type, coverage level, and policyholder gender?", + "sql_context": "CREATE TABLE policies (id INT, policy_type VARCHAR(20), coverage_level INT, price FLOAT, policyholder_gender VARCHAR(10)); INSERT INTO policies (id, policy_type, coverage_level, price, policyholder_gender) VALUES (1, \u0027Comprehensive\u0027, 3, 1200.00, \u0027Female\u0027), (2, \u0027Third-Party\u0027, 2, 800.00, \u0027Male\u0027), (3, \u0027Third-Party\u0027, 3, 1000.00, \u0027Female\u0027);", + "sql": "SELECT policy_type, coverage_level, policyholder_gender, COUNT(*) FROM policies GROUP BY policy_type, coverage_level, policyholder_gender;", + "sql_explanation": "To find the total number of policies for each policy type, coverage level, and policyholder gender, we group the policies table by policy_type, coverage_level, and policyholder_gender and apply the COUNT function to each group to determine the number of policies for each combination of policy type, coverage level, and policyholder gender." +}, { + "id": "2029", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of claims that were denied by policy type?", + "sql_context": "CREATE TABLE Claims (ClaimID INT, PolicyID INT, ClaimAmount DECIMAL(10, 2), ClaimDate DATE, PolicyType VARCHAR(255), ClaimStatus VARCHAR(255)); INSERT INTO Claims VALUES (1, 1, 500, \u00272021-01-05\u0027, \u0027Auto\u0027, \u0027Approved\u0027), (2, 2, 1000, \u00272022-02-10\u0027, \u0027Home\u0027, \u0027Denied\u0027), (3, 3, 750, \u00272021-03-15\u0027, \u0027Auto\u0027, \u0027Denied\u0027), (4, 4, 1200, \u00272022-01-25\u0027, \u0027Home\u0027, \u0027Approved\u0027), (5, 5, 300, \u00272021-02-01\u0027, \u0027Auto\u0027, \u0027Approved\u0027), (6, 6, 1500, \u00272022-03-01\u0027, \u0027Home\u0027, \u0027Approved\u0027);", + "sql": "SELECT PolicyType, COUNT(*) FILTER (WHERE ClaimStatus \u003d \u0027Denied\u0027) * 100.0 / COUNT(*) AS DeniedPercentage FROM Claims GROUP BY PolicyType;", + "sql_explanation": "The SQL query groups the records by PolicyType, calculates the count of denied claims, and calculates the percentage of denied claims for each group using the COUNT(*) and COUNT(*) FILTER (WHERE) functions." +}, { + "id": "2816", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total claim amount and policy type for each policy that has a claim amount greater than $1500?", + "sql_context": "CREATE TABLE Claims (ClaimID INT, PolicyID INT, PolicyType VARCHAR(20), ClaimAmount DECIMAL(10,2)); INSERT INTO Claims (ClaimID, PolicyID, PolicyType, ClaimAmount) VALUES (1, 1, \u0027Auto\u0027, 1500.00), (2, 2, \u0027Home\u0027, 1800.00), (3, 3, \u0027Life\u0027, 3000.00);", + "sql": "SELECT PolicyType, SUM(ClaimAmount) as TotalClaimAmount FROM Claims WHERE ClaimAmount \u003e 1500 GROUP BY PolicyType;", + "sql_explanation": "The SQL query selects the PolicyType column and the sum of the ClaimAmount column for each PolicyType from the Claims table where the ClaimAmount is greater than 1500. The results are then grouped by the PolicyType column." +}, { + "id": "2886", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many policyholders are there in each risk assessment model group?", + "sql_context": "CREATE TABLE policyholders (id INT, policyholder_name VARCHAR(50), risk_assessment_model VARCHAR(20)); INSERT INTO policyholders (id, policyholder_name, risk_assessment_model) VALUES (1, \u0027John Doe\u0027, \u0027Model A\u0027), (2, \u0027Jane Smith\u0027, \u0027Model B\u0027), (3, \u0027Alice Johnson\u0027, \u0027Model A\u0027), (4, \u0027Bob Brown\u0027, \u0027Model C\u0027), (5, \u0027Charlie Davis\u0027, \u0027Model A\u0027);", + "sql": "SELECT risk_assessment_model, COUNT(*) as policyholder_count FROM policyholders GROUP BY risk_assessment_model;", + "sql_explanation": "This query groups the policyholders table by risk_assessment_model and calculates the count of policyholders for each risk assessment model using the COUNT() function, thus providing the number of policyholders in each risk assessment model group." +}, { + "id": "3327", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of policies and their corresponding policy types for policyholders aged 30 or younger?", + "sql_context": "CREATE TABLE Policyholders (PolicyholderID INT, Age INT, PolicyType VARCHAR(20)); INSERT INTO Policyholders (PolicyholderID, Age, PolicyType) VALUES (1, 25, \u0027Auto\u0027), (2, 32, \u0027Home\u0027), (3, 19, \u0027Life\u0027);", + "sql": "SELECT COUNT(*) as TotalPolicies, PolicyType FROM Policyholders WHERE Age \u003c\u003d 30 GROUP BY PolicyType;", + "sql_explanation": "The SQL query calculates the total number of policies and their corresponding policy types for policyholders aged 30 or younger by performing a COUNT(*) operation on the Policyholders table, filtering the records where Age is less than or equal to 30, and grouping the results by PolicyType." +}, { + "id": "3367", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify policyholders who have made more than two claims in Wisconsin", + "sql_context": "CREATE TABLE claims (policyholder_id INT, claim_number INT, state VARCHAR(2)); INSERT INTO claims (policyholder_id, claim_number, state) VALUES (1, 1, \u0027WI\u0027), (1, 2, \u0027WI\u0027), (1, 3, \u0027WI\u0027), (2, 1, \u0027WI\u0027);", + "sql": "SELECT policyholder_id FROM claims WHERE state \u003d \u0027WI\u0027 GROUP BY policyholder_id HAVING COUNT(*) \u003e 2;", + "sql_explanation": "This query groups the results by policyholder_id and filters for groups with more than two records (COUNT(*) \u003e 2)" +}, { + "id": "3377", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the total claim amounts and policy types for claims over $3000.", + "sql_context": "CREATE TABLE claim_2 (claim_id INT, claim_type VARCHAR(20), claim_amount FLOAT, policy_type VARCHAR(20)); INSERT INTO claim_2 (claim_id, claim_type, claim_amount, policy_type) VALUES (5, \u0027Flood\u0027, 5500.00, \u0027Home\u0027), (6, \u0027Vandalism\u0027, 2200.00, \u0027Auto\u0027), (7, \u0027Critical Illness\u0027, 4000.00, \u0027Life\u0027), (8, \u0027Property Damage\u0027, 3800.00, \u0027Rent\u0027);", + "sql": "SELECT policy_type, SUM(claim_amount) FROM claim_2 WHERE claim_amount \u003e 3000 GROUP BY policy_type;", + "sql_explanation": "The SQL query uses the GROUP BY clause to group the \u0027claim_2\u0027 table by \u0027policy_type\u0027 and then calculates the sum of \u0027claim_amount\u0027 for each group using the SUM() function, filtering for claim amounts over $3000." +}, { + "id": "3411", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show total claim amounts and policyholder id for policyholders in Illinois", + "sql_context": "CREATE TABLE claims (policyholder_id INT, claim_amount DECIMAL(10,2), state VARCHAR(2)); INSERT INTO claims (policyholder_id, claim_amount, state) VALUES (1, 500, \u0027IL\u0027), (2, 200, \u0027IL\u0027), (3, 800, \u0027IL\u0027);", + "sql": "SELECT policyholder_id, SUM(claim_amount) FROM claims WHERE state \u003d \u0027IL\u0027 GROUP BY policyholder_id;", + "sql_explanation": "This query calculates the sum (SUM) of the claim_amount column for each policyholder_id where the state is \u0027IL\u0027" +}, { + "id": "3449", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all underwriting departments in \u0027Ontario\u0027 and \u0027Quebec\u0027 with their count?", + "sql_context": "CREATE TABLE underwriting (id INT, department TEXT, city TEXT, province TEXT); INSERT INTO underwriting (id, department, city, province) VALUES (1, \u0027Department A\u0027, \u0027Toronto\u0027, \u0027ON\u0027); INSERT INTO underwriting (id, department, city, province) VALUES (2, \u0027Department B\u0027, \u0027Montreal\u0027, \u0027QC\u0027); INSERT INTO underwriting (id, department, city, province) VALUES (3, \u0027Department C\u0027, \u0027Ottawa\u0027, \u0027ON\u0027);", + "sql": "SELECT department, COUNT(*) FROM underwriting WHERE province IN (\u0027ON\u0027, \u0027QC\u0027) GROUP BY department;", + "sql_explanation": "The SQL query lists all underwriting departments in \u0027Ontario\u0027 and \u0027Quebec\u0027 with their count by using the SELECT statement with the GROUP BY clause on the \u0027department\u0027 column. It filters the records based on the \u0027province\u0027 column with the IN operator." +}, { + "id": "3706", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average risk assessment for policies in Texas by Underwriting team?", + "sql_context": "CREATE TABLE UnderwritingData (PolicyID INT, Team VARCHAR(20), RiskAssessment DECIMAL(5,2), State VARCHAR(20)); INSERT INTO UnderwritingData VALUES (1, \u0027Team A\u0027, 0.35, \u0027California\u0027), (2, \u0027Team B\u0027, 0.20, \u0027California\u0027), (3, \u0027Team A\u0027, 0.15, \u0027Texas\u0027);", + "sql": "SELECT Team, AVG(RiskAssessment) FROM UnderwritingData WHERE State \u003d \u0027Texas\u0027 GROUP BY Team;", + "sql_explanation": "Calculate the average risk assessment for policies in Texas by Underwriting team by grouping UnderwritingData table using GROUP BY clause." +}, { + "id": "3746", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total claim amount for each policyholder living in New York?", + "sql_context": "CREATE TABLE Policyholders (ID INT, ClaimAmount DECIMAL(10, 2), State VARCHAR(50)); INSERT INTO Policyholders (ID, ClaimAmount, State) VALUES (1, 1500.00, \u0027New York\u0027), (2, 500.00, \u0027Texas\u0027), (3, 1000.00, \u0027California\u0027), (4, 2000.00, \u0027New York\u0027);", + "sql": "SELECT State, SUM(ClaimAmount) FROM Policyholders WHERE State \u003d \u0027New York\u0027 GROUP BY State;", + "sql_explanation": "This query calculates the total claim amount for each policyholder living in New York using the SUM aggregation function and WHERE clause." +}, { + "id": "3867", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which underwriters have processed more than 5 claims?", + "sql_context": "CREATE TABLE claims (id INT, underwriter_id INT, processed_date DATE); INSERT INTO claims (id, underwriter_id, processed_date) VALUES (1, 1, \u00272021-01-01\u0027), (2, 2, \u00272021-02-01\u0027), (3, 1, \u00272021-03-01\u0027), (4, 2, \u00272021-02-02\u0027), (5, 2, \u00272021-02-03\u0027), (6, 3, \u00272021-03-01\u0027);", + "sql": "SELECT underwriter_id, COUNT(*) FROM claims GROUP BY underwriter_id HAVING COUNT(*) \u003e 5;", + "sql_explanation": "This SQL query groups the claims data by underwriter_id (GROUP BY underwriter_id) and filters the results to show only those underwriters who have processed more than 5 claims (HAVING COUNT(*) \u003e 5)." +}, { + "id": "4042", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total claim amount for each policy type?", + "sql_context": "CREATE TABLE Claims_Type (id INT, policy_type VARCHAR(20), amount FLOAT); INSERT INTO Claims_Type (id, policy_type, amount) VALUES (1, \u0027Auto\u0027, 5000), (2, \u0027Home\u0027, 7000), (3, \u0027Life\u0027, 8000), (4, \u0027Health\u0027, 6000);", + "sql": "SELECT policy_type, SUM(amount) as total_claim FROM Claims_Type GROUP BY policy_type;", + "sql_explanation": "This SQL query calculates the total claim amount for each policy type by grouping the \u0027Claims_Type\u0027 table by the \u0027policy_type\u0027 column and then summing the \u0027amount\u0027 column for each group." +}, { + "id": "4268", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many policies were issued in each state?", + "sql_context": "CREATE TABLE Policies (id INT, state VARCHAR(20), policy_number INT); INSERT INTO Policies (id, state, policy_number) VALUES (1, \u0027California\u0027, 100), (2, \u0027Texas\u0027, 120), (3, \u0027New York\u0027, 150), (4, \u0027Florida\u0027, 110);", + "sql": "SELECT state, COUNT(policy_number) as policy_count FROM Policies GROUP BY state;", + "sql_explanation": "This SQL query calculates the number of policies issued in each state by grouping the \u0027Policies\u0027 table by the \u0027state\u0027 column and then counting the number of \u0027policy_number\u0027 entries in each group." +}, { + "id": "4478", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of policy types for each broker.", + "sql_context": "CREATE TABLE PolicyBroker (PolicyID INT, PolicyType VARCHAR(20), Broker VARCHAR(20)); INSERT INTO PolicyBroker (PolicyID, PolicyType, Broker) VALUES (1, \u0027Auto\u0027, \u0027BrokerSmith\u0027), (2, \u0027Home\u0027, \u0027BrokerJones\u0027), (3, \u0027Auto\u0027, \u0027BrokerSmith\u0027);", + "sql": "SELECT Broker, COUNT(DISTINCT PolicyType) FROM PolicyBroker GROUP BY Broker;", + "sql_explanation": "The SQL query counts the number of distinct policy types for each broker in the PolicyBroker table, grouped by broker." +}, { + "id": "4630", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the policy type with the highest claim amount.", + "sql_context": "CREATE TABLE HighestClaim (PolicyID INT, PolicyType VARCHAR(20), ClaimAmount DECIMAL(10, 2)); INSERT INTO HighestClaim (PolicyID, PolicyType, ClaimAmount) VALUES (1, \u0027Auto\u0027, 500.00), (2, \u0027Home\u0027, 1000.00), (3, \u0027Auto\u0027, 750.00);", + "sql": "SELECT PolicyType, MAX(ClaimAmount) FROM HighestClaim GROUP BY PolicyType;", + "sql_explanation": "The SQL query calculates the maximum claim amount for each policy type in the HighestClaim table, grouped by policy type." +}, { + "id": "5024", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many policyholders are there in each city?", + "sql_context": "CREATE TABLE Policyholders (Policyholder TEXT, City TEXT); INSERT INTO Policyholders (Policyholder, City) VALUES (\u0027John Doe\u0027, \u0027New York\u0027), (\u0027Jane Smith\u0027, \u0027Los Angeles\u0027), (\u0027Alice Johnson\u0027, \u0027San Francisco\u0027), (\u0027Bob Brown\u0027, \u0027New York\u0027);", + "sql": "SELECT City, COUNT(Policyholder) FROM Policyholders GROUP BY City;", + "sql_explanation": "This query calculates the number of policyholders in each city. It does this by using the GROUP BY statement to group the data by the City column, and then counts the number of Policyholder values in each group using the COUNT function." +}, { + "id": "346", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 3 destinations in Europe with the longest delivery times for shipments from Australia in November 2022", + "sql_context": "CREATE TABLE Shipments (id INT, source VARCHAR(50), destination VARCHAR(50), weight FLOAT, ship_date DATE, delivery_date DATE); INSERT INTO Shipments (id, source, destination, weight, ship_date, delivery_date) VALUES (30, \u0027Australia\u0027, \u0027Germany\u0027, 400, \u00272022-11-01\u0027, \u00272022-11-10\u0027); INSERT INTO Shipments (id, source, destination, weight, ship_date, delivery_date) VALUES (31, \u0027Australia\u0027, \u0027France\u0027, 300, \u00272022-11-15\u0027, \u00272022-11-25\u0027); INSERT INTO Shipments (id, source, destination, weight, ship_date, delivery_date) VALUES (32, \u0027Australia\u0027, \u0027UK\u0027, 600, \u00272022-11-30\u0027, \u00272022-12-15\u0027);", + "sql": "SELECT destination, AVG(DATEDIFF(day, ship_date, delivery_date)) as avg_delivery_time FROM Shipments WHERE source \u003d \u0027Australia\u0027 AND ship_date BETWEEN \u00272022-11-01\u0027 AND \u00272022-11-30\u0027 GROUP BY destination ORDER BY avg_delivery_time DESC LIMIT 3;", + "sql_explanation": "This query identifies the top 3 destinations in Europe with the longest delivery times for shipments from Australia in November 2022 by grouping by destination, averaging the difference between delivery_date and ship_date, and ordering the results in descending order, limiting the results to 3." +}, { + "id": "452", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Present the warehouse locations and their respective total storage capacity (in cubic meters) for the region \u0027Europe\u0027 as of 2022-02-01.", + "sql_context": "CREATE TABLE Warehouses (WarehouseID INT, WarehouseLocation VARCHAR(100), Region VARCHAR(50), StorageCapacity DECIMAL(10,2)); INSERT INTO Warehouses VALUES (1, \u0027Warehouse B\u0027, \u0027Europe\u0027, 5000);", + "sql": "SELECT Warehouses.WarehouseLocation, SUM(Warehouses.StorageCapacity) as TotalStorageCapacity FROM Warehouses WHERE Warehouses.Region \u003d \u0027Europe\u0027 AND Warehouses.StorageCapacity IS NOT NULL GROUP BY Warehouses.WarehouseLocation;", + "sql_explanation": "This SQL query filters for warehouses in the European region and calculates the total storage capacity (in cubic meters) for each warehouse location using the SUM function and GROUP BY clause." +}, { + "id": "1139", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of packages that were delivered late, for each warehouse, in the last week?", + "sql_context": "CREATE TABLE shipments (shipment_id INT, warehouse_id INT, shipped_date DATE, delivered_date DATE, late_flag BOOLEAN); INSERT INTO shipments (shipment_id, warehouse_id, shipped_date, delivered_date, late_flag) VALUES (1, 1, \u00272022-02-21\u0027, \u00272022-02-23\u0027, true), (2, 2, \u00272022-02-22\u0027, \u00272022-02-24\u0027, false), (3, 3, \u00272022-02-23\u0027, \u00272022-02-26\u0027, true);", + "sql": "SELECT warehouse_id, AVG(late_flag) * 100.0 as pct_late_deliveries FROM shipments WHERE shipped_date BETWEEN DATEADD(day, -7, GETDATE()) AND GETDATE() GROUP BY warehouse_id;", + "sql_explanation": "This query calculates the percentage of packages that were delivered late for each warehouse in the last week. It does this by finding the average value of the late_flag column for each shipment that occurred in the last week, and then multiplying that average by 100.0 to convert it to a percentage." +}, { + "id": "1276", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of reverse logistics events for each product category in South America.", + "sql_context": "CREATE TABLE Reverse_Logistics (id INT, reverse_date DATETIME, reverse_country VARCHAR(50), product_category VARCHAR(50), reverse_event INT); INSERT INTO Reverse_Logistics (id, reverse_date, reverse_country, product_category, reverse_event) VALUES (1, \u00272022-01-01\u0027, \u0027Brazil\u0027, \u0027Electronics\u0027, 1), (2, \u00272022-01-02\u0027, \u0027Argentina\u0027, \u0027Furniture\u0027, 2), (3, \u00272022-01-03\u0027, \u0027Colombia\u0027, \u0027Appliances\u0027, 3);", + "sql": "SELECT product_category, SUM(reverse_event) total_events FROM Reverse_Logistics WHERE reverse_country IN (\u0027Brazil\u0027, \u0027Argentina\u0027, \u0027Colombia\u0027) GROUP BY product_category;", + "sql_explanation": "This query calculates the number of reverse logistics events for each product category in South America by finding the sum of the reverse events for each country in South America and grouping the results by product category." +}, { + "id": "1478", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many pallets were delivered to each warehouse in Florida on the last day of the month?", + "sql_context": "CREATE TABLE deliveries (id INT, warehouse_state VARCHAR(20), pallets INT, delivery_date DATE); INSERT INTO deliveries (id, warehouse_state, pallets, delivery_date) VALUES (1, \u0027Florida\u0027, 20, \u00272022-01-31\u0027), (2, \u0027Florida\u0027, 30, \u00272022-01-31\u0027);", + "sql": "SELECT warehouse_state, COUNT(pallets) FROM deliveries WHERE warehouse_state \u003d \u0027Florida\u0027 AND delivery_date \u003d LAST_DAY(CURRENT_DATE) GROUP BY warehouse_state;", + "sql_explanation": "This query counts the number of pallets delivered to each warehouse in Florida on the last day of the month by filtering the deliveries table based on the warehouse state and delivery date, then grouping the results by the warehouse state and counting the number of matching records." +}, { + "id": "1873", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average delivery time for orders shipped from \u0027Warehouse 3\u0027?", + "sql_context": "CREATE TABLE Orders (id INT, warehouse_id INT, shipped_date DATE, delivered_date DATE); INSERT INTO Orders (id, warehouse_id, shipped_date, delivered_date) VALUES (1, 1, \u00272022-01-01\u0027, \u00272022-01-04\u0027); INSERT INTO Orders (id, warehouse_id, shipped_date, delivered_date) VALUES (2, 2, \u00272022-01-02\u0027, \u00272022-01-06\u0027); INSERT INTO Orders (id, warehouse_id, shipped_date, delivered_date) VALUES (3, 3, \u00272022-01-03\u0027, \u00272022-01-08\u0027);", + "sql": "SELECT warehouse_id, AVG(DATEDIFF(delivered_date, shipped_date)) as avg_delivery_time FROM Orders WHERE warehouse_id \u003d 3 GROUP BY warehouse_id;", + "sql_explanation": "Calculate the average delivery time for orders shipped from Warehouse 3." +}, { + "id": "1910", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 3 countries with the highest total quantities of goods received from all continents?", + "sql_context": "CREATE TABLE shipments (id INT, goods_id INT, quantity INT, source_continent VARCHAR(50), destination_country VARCHAR(50)); INSERT INTO shipments (id, goods_id, quantity, source_continent, destination_country) VALUES (1, 101, 50, \u0027Asia\u0027, \u0027Brazil\u0027), (2, 102, 75, \u0027Africa\u0027, \u0027India\u0027), (3, 103, 100, \u0027Europe\u0027, \u0027USA\u0027), (4, 104, 20, \u0027Australia\u0027, \u0027Canada\u0027), (5, 105, 30, \u0027Antarctica\u0027, \u0027Russia\u0027);", + "sql": "SELECT destination_country, SUM(quantity) as total_quantity FROM shipments GROUP BY destination_country ORDER BY total_quantity DESC LIMIT 3;", + "sql_explanation": "This SQL query identifies the top 3 countries with the highest total quantities of goods received from all continents. It starts by selecting the \u0027destination_country\u0027 column and summing the \u0027quantity\u0027 column for each row, grouped by \u0027destination_country\u0027. Then, it orders the results in descending order by the summed quantities and limits the output to the top 3 rows." +}, { + "id": "2059", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all unique modes of transportation and their associated average costs for freight forwarding in the Asia-Pacific region.", + "sql_context": "CREATE TABLE Transportation (id INT, mode TEXT, type TEXT, cost FLOAT); INSERT INTO Transportation (id, mode, type, cost) VALUES (1, \u0027Sea\u0027, \u0027Full Container Load\u0027, 1500), (2, \u0027Air\u0027, \u0027Express\u0027, 5000), (3, \u0027Rail\u0027, \u0027Less than Container Load\u0027, 800);", + "sql": "SELECT DISTINCT mode, AVG(cost) FROM Transportation WHERE type \u003d \u0027Full Container Load\u0027 AND country IN (\u0027Asia\u0027, \u0027Pacific\u0027) GROUP BY mode;", + "sql_explanation": "This query selects distinct mode values and calculates the average cost for each mode where the type is Full Container Load and the country is either Asia or Pacific. The result is grouped by mode." +}, { + "id": "3063", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total weight of goods in transit on each route, grouped by the origin country and the carrier?", + "sql_context": "CREATE TABLE transit (id INT, goods_id INT, weight INT, origin_country VARCHAR(50), carrier VARCHAR(50)); INSERT INTO transit (id, goods_id, weight, origin_country, carrier) VALUES (1, 101, 25, \u0027Canada\u0027, \u0027Carrier A\u0027), (2, 102, 35, \u0027Mexico\u0027, \u0027Carrier B\u0027), (3, 103, 45, \u0027China\u0027, \u0027Carrier A\u0027);", + "sql": "SELECT origin_country, carrier, SUM(weight) as total_weight FROM transit GROUP BY origin_country, carrier;", + "sql_explanation": "This SQL query calculates the total weight of goods in transit on each route, grouped by the origin country and the carrier. It starts by selecting the \u0027origin_country\u0027 and \u0027carrier\u0027 columns and summing the \u0027weight\u0027 column for each row. Then, it groups the results by the \u0027origin_country\u0027 and \u0027carrier\u0027 columns, which aggregates the summed weights by origin country and carrier." +}, { + "id": "3152", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the daily average distance for freight shipped to \u0027Berlin\u0027.", + "sql_context": "CREATE TABLE Freight (id INT PRIMARY KEY, shipment_id INT, origin VARCHAR(50), destination VARCHAR(50), distance INT, cost FLOAT); INSERT INTO Freight (id, shipment_id, origin, destination, distance, cost) VALUES (13, 7, \u0027Paris\u0027, \u0027Berlin\u0027, 1200, 5600.2), (14, 8, \u0027London\u0027, \u0027Berlin\u0027, 1000, 4800.5), (15, 9, \u0027Warsaw\u0027, \u0027Berlin\u0027, 500, 2400.0), (16, 10, \u0027Rome\u0027, \u0027Berlin\u0027, 1300, 6200.3), (17, 11, \u0027Brussels\u0027, \u0027Berlin\u0027, 800, 3600.0), (18, 12, \u0027Madrid\u0027, \u0027Berlin\u0027, 1800, 8100.0);", + "sql": "SELECT AVG(distance) FROM Freight WHERE destination \u003d \u0027Berlin\u0027 GROUP BY destination HAVING COUNT(*) \u003e 1;", + "sql_explanation": "This query calculates the daily average distance for freight shipped to \u0027Berlin\u0027 by averaging the distance of all shipments destined for \u0027Berlin\u0027." +}, { + "id": "457", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 3 donors who contributed the most to the \u0027Arts \u0026 Culture\u0027 project category in 2021.", + "sql_context": "CREATE TABLE donors (donor_id INT, donor_name VARCHAR(255), donation_amount DECIMAL, donation_date DATE, project_category VARCHAR(255)); INSERT INTO donors (donor_id, donor_name, donation_amount, donation_date, project_category) VALUES (1, \u0027John Smith\u0027, 500, \u00272021-06-15\u0027, \u0027Arts \u0026 Culture\u0027), (2, \u0027Jane Doe\u0027, 750, \u00272021-03-01\u0027, \u0027Arts \u0026 Culture\u0027), (3, \u0027Michael Lee\u0027, 300, \u00272021-12-28\u0027, \u0027Education\u0027);", + "sql": "SELECT donor_name, SUM(donation_amount) as total_donation FROM donors WHERE donation_date BETWEEN \u00272021-01-01\u0027 AND \u00272021-12-31\u0027 AND project_category \u003d \u0027Arts \u0026 Culture\u0027 GROUP BY donor_name ORDER BY total_donation DESC LIMIT 3;", + "sql_explanation": "This query identifies the top 3 donors who contributed the most to the \u0027Arts \u0026 Culture\u0027 project category in 2021. It filters the donors table for records between \u00272021-01-01\u0027 and \u00272021-12-31\u0027 and groups the results by the donor_name column. It then calculates the total donation amount for each group and orders the results in descending order by total donation amount. Finally, it limits the results to the top 3 records." +}, { + "id": "502", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List top 5 volunteers by total hours served in 2023, for each month separately?", + "sql_context": "CREATE TABLE volunteers (id INT, name VARCHAR(50), hours_served INT, volunteer_date DATE); INSERT INTO volunteers (id, name, hours_served, volunteer_date) VALUES (1, \u0027Alice\u0027, 10, \u00272023-01-05\u0027); INSERT INTO volunteers (id, name, hours_served, volunteer_date) VALUES (2, \u0027Bob\u0027, 15, \u00272023-01-07\u0027);", + "sql": "SELECT EXTRACT(MONTH FROM volunteer_date) as month, name, SUM(hours_served) as total_hours FROM volunteers WHERE volunteer_date BETWEEN \u00272023-01-01\u0027 AND \u00272023-12-31\u0027 GROUP BY month, name ORDER BY month, total_hours DESC;", + "sql_explanation": "This SQL query lists the top 5 volunteers by total hours served in 2023, for each month separately by extracting the month from the volunteer_date, filtering the date between 2023-01-01 and 2023-12-31, then grouping and summing the hours_served by month and name, and ordering by month and total_hours in descending order." +}, { + "id": "565", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the number of days since the last donation for each donor, for donors who have made at least one donation in the current year.", + "sql_context": "CREATE TABLE donor (don_id INT, donor_name VARCHAR(255)); CREATE TABLE donation (don_id INT, donor_id INT, donation_date DATE);", + "sql": "SELECT donor_id, DATEDIFF(CURRENT_DATE, MAX(donation_date)) AS days_since_last_donation FROM donation WHERE EXTRACT(YEAR FROM donation_date) \u003d EXTRACT(YEAR FROM CURRENT_DATE) GROUP BY donor_id HAVING COUNT(*) \u003e 0;", + "sql_explanation": "This query calculates the number of days since the last donation for each donor, for donors who have made at least one donation in the current year, by grouping the donation table by the donor_id column, extracting the year from the donation_date column using the EXTRACT function, and filtering the rows where the year is the current year. The maximum donation date for each group is found using the MAX function, and the number of days since that date is calculated using the DATEDIFF function. The result is ordered by the number of days since the last donation in ascending order." +}, { + "id": "650", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of unique donors who made a donation in each month of 2022.", + "sql_context": "CREATE TABLE donations (id INT, donor_id INT, donation_amount DECIMAL(10,2), donation_date DATE); INSERT INTO donations (id, donor_id, donation_amount, donation_date) VALUES (1, 1, 500.00, \u00272022-01-01\u0027), (2, 2, 300.00, \u00272022-01-15\u0027), (3, 1, 200.00, \u00272022-02-01\u0027);", + "sql": "SELECT EXTRACT(MONTH FROM donation_date) as month, COUNT(DISTINCT donor_id) as num_donors FROM donations WHERE donation_date \u003e\u003d \u00272022-01-01\u0027 AND donation_date \u003c\u003d \u00272022-12-31\u0027 GROUP BY month ORDER BY month;", + "sql_explanation": "This query finds the number of unique donors who made a donation in each month of 2022 by extracting the month from the donation_date and grouping the donations table by month. The number of distinct donor_id values is then calculated for each group. The results are then sorted by month in ascending order." +}, { + "id": "885", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 10 zip codes with the highest average donation amount in the last year.", + "sql_context": "CREATE TABLE donations (id INT, donor_zip TEXT, donation_amount DECIMAL, donation_date DATE); INSERT INTO donations (id, donor_zip, donation_amount, donation_date) VALUES (1, \u002710001\u0027, 100.00, \u00272022-01-01\u0027), (2, \u002790001\u0027, 200.00, \u00272022-04-15\u0027);", + "sql": "SELECT donor_zip, AVG(donation_amount) as avg_donation FROM donations WHERE donation_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) GROUP BY donor_zip ORDER BY avg_donation DESC LIMIT 10;", + "sql_explanation": "First, a subquery is created to filter the donations table for the last year. Then, the outer query calculates the average donation amount for each zip code in the subquery and orders the results in descending order. The query also limits the results to the top 10 zip codes." +}, { + "id": "988", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of volunteer hours per program category in Q2 2022?", + "sql_context": "CREATE TABLE q2_program_category (id INT, program_category VARCHAR(50), program VARCHAR(50), volunteer_hours INT); INSERT INTO q2_program_category (id, program_category, program, volunteer_hours) VALUES (1, \u0027Education\u0027, \u0027Mentorship\u0027, 10), (2, \u0027Health\u0027, \u0027Tutoring\u0027, 15), (3, \u0027Education\u0027, \u0027Mentorship\u0027, 12);", + "sql": "SELECT program_category, SUM(volunteer_hours) as total_volunteer_hours FROM q2_program_category WHERE volunteer_date BETWEEN \u00272022-04-01\u0027 AND \u00272022-06-30\u0027 GROUP BY program_category;", + "sql_explanation": "This SQL query calculates the total number of volunteer hours per program category in Q2 2022. It groups the results by the program_category column and calculates the sum of volunteer_hours for each category." +}, { + "id": "1344", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many volunteers signed up each month in 2022?", + "sql_context": "CREATE TABLE volunteers (volunteer_id INT, signup_date DATE); INSERT INTO volunteers (volunteer_id, signup_date) VALUES (1, \u00272022-01-05\u0027), (2, \u00272022-03-30\u0027), (3, \u00272022-04-15\u0027);", + "sql": "SELECT EXTRACT(MONTH FROM signup_date) as month, COUNT(*) as num_volunteers FROM volunteers WHERE YEAR(signup_date) \u003d 2022 GROUP BY EXTRACT(MONTH FROM signup_date);", + "sql_explanation": "This query counts the number of volunteers that signed up each month in 2022 using the COUNT function and GROUP BY clause. The EXTRACT function is used to retrieve the month from the signup_date field." +}, { + "id": "1631", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many hours were volunteered by each country in 2021?", + "sql_context": "CREATE TABLE volunteer_hours (id INT, volunteer_name TEXT, country TEXT, hours INT, volunteer_date DATE); INSERT INTO volunteer_hours (id, volunteer_name, country, hours, volunteer_date) VALUES (1, \u0027Alice\u0027, \u0027United States\u0027, 5, \u00272021-06-15\u0027); INSERT INTO volunteer_hours (id, volunteer_name, country, hours, volunteer_date) VALUES (2, \u0027Bob\u0027, \u0027Canada\u0027, 8, \u00272021-12-31\u0027);", + "sql": "SELECT country, SUM(hours) as total_hours FROM volunteer_hours WHERE volunteer_date \u003e\u003d \u00272021-01-01\u0027 AND volunteer_date \u003c \u00272022-01-01\u0027 GROUP BY country;", + "sql_explanation": "The SQL query calculates the total hours volunteered by each country in 2021 by using the SUM() function to add up the hours values in the volunteer_hours table, grouped by the country column. The WHERE clause filters the records to only include volunteer hours recorded in 2021." +}, { + "id": "1695", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total donation amount from each country in November 2020?", + "sql_context": "CREATE TABLE donations_by_country (id INT, country TEXT, donation_date DATE, donation_amount DECIMAL(10,2)); INSERT INTO donations_by_country (id, country, donation_date, donation_amount) VALUES (1, \u0027USA\u0027, \u00272020-11-05\u0027, 100.00), (2, \u0027Canada\u0027, \u00272020-11-15\u0027, 200.00);", + "sql": "SELECT country, SUM(donation_amount) FROM donations_by_country WHERE donation_date \u003e\u003d \u00272020-11-01\u0027 AND donation_date \u003c \u00272020-12-01\u0027 GROUP BY country;", + "sql_explanation": "This query calculates the total donation amount from each country in November 2020 by selecting the country and summing the donation_amount from the donations_by_country table, filtering where the donation_date is in November 2020, and grouping by country." +}, { + "id": "1859", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount donated by each donor in Q1 2021?", + "sql_context": "CREATE TABLE Donors (DonorID INT, DonorName TEXT, DonationAmount DECIMAL(10,2)); INSERT INTO Donors (DonorID, DonorName, DonationAmount) VALUES (1, \u0027John Doe\u0027, 500.00), (2, \u0027Jane Smith\u0027, 350.00);", + "sql": "SELECT DonorName, SUM(DonationAmount) as TotalDonation FROM Donors WHERE DonationDate BETWEEN \u00272021-01-01\u0027 AND \u00272021-03-31\u0027 GROUP BY DonorName;", + "sql_explanation": "The SQL query selects the DonorName and calculates the sum of DonationAmount for each donor, filtering by the date range of Q1 2021." +}, { + "id": "1977", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the average donation amount in each country in Q1 2021?", + "sql_context": "CREATE TABLE Donations (DonationID int, Country varchar(50), AmountDonated numeric(10,2), DonationDate date); INSERT INTO Donations (DonationID, Country, AmountDonated, DonationDate) VALUES (1, \u0027Mexico\u0027, 100.00, \u00272021-01-01\u0027), (2, \u0027Brazil\u0027, 150.00, \u00272021-04-30\u0027);", + "sql": "SELECT Country, AVG(AmountDonated) as AvgDonation FROM Donations WHERE DonationDate BETWEEN \u00272021-01-01\u0027 AND \u00272021-03-31\u0027 GROUP BY Country;", + "sql_explanation": "This SQL query calculates the average donation amount in each country in Q1 2021. It does this by selecting the Country and averaging the AmountDonated column for each country. The WHERE clause filters out any donations that were not made in Q1 2021, and the GROUP BY clause groups the results by country." +}, { + "id": "2319", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of volunteers who registered each month in 2023?", + "sql_context": "CREATE TABLE volunteers (volunteer_id INT, registration_date DATE); INSERT INTO volunteers (volunteer_id, registration_date) VALUES (1, \u00272023-01-02\u0027), (2, \u00272023-04-15\u0027), (3, \u00272023-07-20\u0027), (4, \u00272023-09-10\u0027), (5, \u00272023-12-25\u0027);", + "sql": "SELECT EXTRACT(MONTH FROM registration_date) as month, COUNT(*) as num_volunteers FROM volunteers GROUP BY month ORDER BY month;", + "sql_explanation": "This query calculates the number of volunteers who registered each month in 2023 by extracting the month from the registration date, grouping the results by month, and returning the count of volunteers for each month." +}, { + "id": "2355", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many volunteers signed up in each city in 2022?", + "sql_context": "CREATE TABLE Volunteers (VolunteerID int, VolunteerName varchar(50), City varchar(50), VolunteerDate date); INSERT INTO Volunteers (VolunteerID, VolunteerName, City, VolunteerDate) VALUES (1, \u0027Bob Johnson\u0027, \u0027New York\u0027, \u00272022-01-01\u0027), (2, \u0027Sally Davis\u0027, \u0027Miami\u0027, \u00272022-05-15\u0027);", + "sql": "SELECT City, COUNT(*) as NumVolunteers FROM Volunteers WHERE VolunteerDate BETWEEN \u00272022-01-01\u0027 AND \u00272022-12-31\u0027 GROUP BY City;", + "sql_explanation": "This SQL query calculates the number of volunteers who signed up in each city in 2022. It does this by selecting the City and counting the number of volunteers for each city. The WHERE clause filters out any volunteers who did not sign up in 2022, and the GROUP BY clause groups the results by city." +}, { + "id": "2543", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total amount donated by each program in the year 2019?", + "sql_context": "CREATE TABLE donations (id INT, name TEXT, program TEXT, donation_amount DECIMAL(10,2), donation_date DATE); INSERT INTO donations (id, name, program, donation_amount, donation_date) VALUES (1, \u0027John Doe\u0027, \u0027Disaster Relief\u0027, 500.00, \u00272019-01-01\u0027); INSERT INTO donations (id, name, program, donation_amount, donation_date) VALUES (2, \u0027Jane Smith\u0027, \u0027Education\u0027, 600.00, \u00272019-05-15\u0027); INSERT INTO donations (id, name, program, donation_amount, donation_date) VALUES (3, \u0027Bob Williams\u0027, \u0027Healthcare\u0027, 400.00, \u00272018-08-10\u0027);", + "sql": "SELECT program, SUM(donation_amount) AS total_donations FROM donations WHERE YEAR(donation_date) \u003d 2019 GROUP BY program;", + "sql_explanation": "The SQL query calculates the total amount donated by each program in the year 2019 by using SELECT statement with GROUP BY clause on program column and SUM function on donation_amount column." +}, { + "id": "2608", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount donated per quarter, based on the \u0027donations\u0027 table?", + "sql_context": "CREATE TABLE donations (id INT, donor_id INT, amount DECIMAL(10,2), donation_date DATE);", + "sql": "SELECT EXTRACT(QUARTER FROM donations.donation_date) AS quarter, SUM(donations.amount) FROM donations GROUP BY quarter;", + "sql_explanation": "This query calculates the total amount donated per quarter using the EXTRACT() function and groups the result by the \u0027quarter\u0027 column. It does not require a join since all the necessary information is available in the \u0027donations\u0027 table." +}, { + "id": "2742", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 5 countries with the highest total donations from the \u0027Donations\u0027 table, grouped by Country.", + "sql_context": "CREATE TABLE Donations (DonationID INT, DonorID INT, Amount DECIMAL(10, 2), Country VARCHAR(50));", + "sql": "SELECT Country, SUM(Amount) AS TotalDonations FROM Donations GROUP BY Country ORDER BY TotalDonations DESC LIMIT 5;", + "sql_explanation": "The SQL query calculates the total donations by country and orders the results in descending order. The query then limits the output to the top 5 countries with the highest total donations." +}, { + "id": "2898", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who was the top donor in the \u0027Southeast\u0027 region in the year 2020?", + "sql_context": "CREATE TABLE Donors (donor_id INT, region VARCHAR(20), amount DECIMAL(10,2), donation_year INT); INSERT INTO Donors (donor_id, region, amount, donation_year) VALUES (1, \u0027Southeast\u0027, 7000.00, 2020), (2, \u0027Southeast\u0027, 6000.00, 2020);", + "sql": "SELECT donor_id, MAX(amount) FROM Donors WHERE region \u003d \u0027Southeast\u0027 AND donation_year \u003d 2020 GROUP BY donor_id;", + "sql_explanation": "This SQL query identifies the top donor in the \u0027Southeast\u0027 region in the year 2020 by selecting the \u0027donor_id\u0027 and the maximum \u0027amount\u0027 where the \u0027region\u0027 is \u0027Southeast\u0027 and \u0027donation_year\u0027 is 2020, and grouping the results by \u0027donor_id\u0027." +}, { + "id": "2941", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum donation amount made by a repeat donor in Canada?", + "sql_context": "CREATE TABLE donors (donor_id int, donation_amount decimal(10,2), donation_date date, country varchar(50)); INSERT INTO donors (donor_id, donation_amount, donation_date, country) VALUES (1, 150.00, \u00272020-01-01\u0027, \u0027Canada\u0027), (1, 75.00, \u00272020-02-01\u0027, \u0027Canada\u0027), (2, 250.00, \u00272020-03-01\u0027, \u0027Canada\u0027);", + "sql": "SELECT MIN(donation_amount) FROM donors WHERE country \u003d \u0027Canada\u0027 GROUP BY donor_id HAVING COUNT(donor_id) \u003e 1;", + "sql_explanation": "The query first filters the data to only include donors from Canada. Then, it groups the data by donor_id and applies a HAVING clause to only include repeat donors. Lastly, it calculates the minimum donation amount for these donors." +}, { + "id": "2981", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total budget and total number of events for each event type, from the \u0027Event_Data\u0027 table, grouped by Event_Type.", + "sql_context": "CREATE TABLE Event_Data (EventID INT, Event_Type VARCHAR(50), Budget DECIMAL(10, 2));", + "sql": "SELECT Event_Type, SUM(Budget) AS Total_Budget, COUNT(*) AS Total_Events FROM Event_Data GROUP BY Event_Type;", + "sql_explanation": "The SQL query calculates the total budget and total number of events for each event type using the SUM(Budget) and COUNT(*) functions. The query groups the results based on the Event_Type column." +}, { + "id": "3797", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many volunteers signed up in each region for the \u0027Tree Planting\u0027 campaign?", + "sql_context": "CREATE TABLE Volunteers (volunteer_id INT, volunteer_name VARCHAR(50), region VARCHAR(50), campaign VARCHAR(50)); INSERT INTO Volunteers (volunteer_id, volunteer_name, region, campaign) VALUES (1, \u0027Alice Johnson\u0027, \u0027Northeast\u0027, \u0027Tree Planting\u0027), (2, \u0027Bob Brown\u0027, \u0027Southeast\u0027, \u0027Food Drive\u0027);", + "sql": "SELECT region, COUNT(*) FROM Volunteers WHERE campaign \u003d \u0027Tree Planting\u0027 GROUP BY region;", + "sql_explanation": "The SQL query counts the number of volunteers who signed up for the \u0027Tree Planting\u0027 campaign in each region by grouping the Volunteers table by region and filtering for the \u0027Tree Planting\u0027 campaign." +}, { + "id": "3830", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount donated to each program in the \u0027NonprofitDB\u0027 database?", + "sql_context": "CREATE TABLE Program (ID INT, Name VARCHAR(255)); INSERT INTO Program (ID, Name) VALUES (1, \u0027Education\u0027), (2, \u0027Health\u0027), (3, \u0027Environment\u0027); CREATE TABLE Donation (ID INT, Amount DECIMAL(10, 2), DonorID INT, ProgramID INT); INSERT INTO Donation (ID, Amount, DonorID, ProgramID) VALUES (1, 500.00, 1, 1), (2, 1000.00, 2, 1), (3, 250.00, 3, 1), (4, 750.00, 4, 2), (5, 300.00, 5, 2), (6, 800.00, 6, 3);", + "sql": "SELECT d.ProgramID, SUM(d.Amount) as TotalDonations FROM Donation d GROUP BY d.ProgramID;", + "sql_explanation": "This SQL query calculates the total amount donated to each program by grouping the Donation table by the ProgramID column and then calculating the sum of the Amount column for each group." +}, { + "id": "4999", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many donors have there been in total from each continent?", + "sql_context": "CREATE TABLE donor_continent(donor_id INT, donor_name TEXT, continent TEXT); INSERT INTO donor_continent(donor_id, donor_name, continent) VALUES (1, \u0027John Doe\u0027, \u0027North America\u0027), (2, \u0027Jane Smith\u0027, \u0027North America\u0027), (3, \u0027Alice Johnson\u0027, \u0027Africa\u0027);", + "sql": "SELECT continent, COUNT(*) FROM donor_continent GROUP BY continent;", + "sql_explanation": "This query selects all continents represented in the \u0027donor_continent\u0027 table. It then groups these continents by their respective continent. Finally, it counts the number of donors in each group." +}, { + "id": "5071", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many volunteers are there in the volunteers table, grouped by their skill level?", + "sql_context": "CREATE TABLE volunteers (volunteer_id INT, skill_level VARCHAR(25)); INSERT INTO volunteers (volunteer_id, skill_level) VALUES (1, \u0027Intermediate\u0027), (2, \u0027Beginner\u0027), (3, \u0027Advanced\u0027);", + "sql": "SELECT COUNT(*), skill_level FROM volunteers GROUP BY skill_level;", + "sql_explanation": "This query counts the number of volunteers in the volunteers table, grouped by their skill level. It uses the COUNT function to count the number of volunteers and the GROUP BY clause to group the results by skill_level." +}, { + "id": "2299", + "domain": "archeology", + "domain_description": "Detailed records on excavation sites, artifact analysis, historical context, and public outreach in archeology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of artifacts found per excavation site, in descending order?", + "sql_context": "CREATE TABLE ExcavationSitesArtifacts (SiteID INT, ArtifactCount INT); INSERT INTO ExcavationSitesArtifacts (SiteID, ArtifactCount) VALUES (1, 5), (2, 3), (3, 2); CREATE TABLE ExcavationSites (SiteID INT, Name VARCHAR(50)); INSERT INTO ExcavationSites (SiteID, Name) VALUES (1, \u0027Site A\u0027), (2, \u0027Site B\u0027), (3, \u0027Site C\u0027);", + "sql": "SELECT AVG(ArtifactCount) AS AvgArtifactsPerSite FROM ExcavationSitesArtifacts GROUP BY SiteID ORDER BY AvgArtifactsPerSite DESC;", + "sql_explanation": "This query calculates the average number of artifacts found per excavation site by grouping the ExcavationSitesArtifacts table by SiteID and calculating the average ArtifactCount. The result is ordered by the average number of artifacts per site in descending order." +}, { + "id": "2964", + "domain": "archeology", + "domain_description": "Detailed records on excavation sites, artifact analysis, historical context, and public outreach in archeology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 3 countries with the most excavation sites in our database, and how many sites are there in each?", + "sql_context": "CREATE TABLE excavation_sites(site_id INT, site_name TEXT, country TEXT, num_artifacts INT); INSERT INTO excavation_sites(site_id, site_name, country, num_artifacts) VALUES (1, \u0027Site A\u0027, \u0027Egypt\u0027, 500), (2, \u0027Site B\u0027, \u0027Italy\u0027, 300), (3, \u0027Site C\u0027, \u0027Egypt\u0027, 700), (4, \u0027Site D\u0027, \u0027Mexico\u0027, 600), (5, \u0027Site E\u0027, \u0027Italy\u0027, 400);", + "sql": "SELECT country, COUNT(*) as num_sites FROM excavation_sites GROUP BY country ORDER BY num_sites DESC LIMIT 3;", + "sql_explanation": "This query groups the excavation sites by country and counts the number of sites in each country. It then orders the results by the count in descending order and limits the output to the top 3 countries." +}, { + "id": "3213", + "domain": "archeology", + "domain_description": "Detailed records on excavation sites, artifact analysis, historical context, and public outreach in archeology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which artifacts were found in more than one excavation site?", + "sql_context": "CREATE TABLE Artifacts (id INT, excavation_site VARCHAR(20), artifact_name VARCHAR(30), pieces INT); INSERT INTO Artifacts (id, excavation_site, artifact_name, pieces) VALUES (1, \u0027BronzeAge\u0027, \u0027Sword\u0027, 3000,), (2, \u0027AncientRome\u0027, \u0027Sword\u0027, 2500,);", + "sql": "SELECT artifact_name FROM Artifacts GROUP BY artifact_name HAVING COUNT(DISTINCT excavation_site) \u003e 1;", + "sql_explanation": "Group the Artifacts table by artifact_name and filter the result by counting the distinct excavation sites greater than 1." +}, { + "id": "4606", + "domain": "archeology", + "domain_description": "Detailed records on excavation sites, artifact analysis, historical context, and public outreach in archeology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many artifacts are there in total for each period?", + "sql_context": "CREATE TABLE ExcavationSites (site_id INT, site_name TEXT, period TEXT); INSERT INTO ExcavationSites (site_id, site_name, period) VALUES (1, \u0027SiteA\u0027, \u0027Iron Age\u0027), (2, \u0027SiteB\u0027, \u0027Bronze Age\u0027); CREATE TABLE Artifacts (artifact_id INT, site_id INT, artifact_name TEXT, period TEXT); INSERT INTO Artifacts (artifact_id, site_id, artifact_name, period) VALUES (1, 1, \u0027Artifact1\u0027, \u0027Iron Age\u0027), (2, 1, \u0027Artifact2\u0027, \u0027Iron Age\u0027), (3, 2, \u0027Artifact3\u0027, \u0027Bronze Age\u0027);", + "sql": "SELECT period, COUNT(*) as total_artifacts FROM Artifacts GROUP BY period;", + "sql_explanation": "Group Artifacts by period and count the number of records per period." +}, { + "id": "1681", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of electric vehicles sold in urban areas compared to rural areas?", + "sql_context": "CREATE TABLE VehicleSales (Location VARCHAR(50), VehicleType VARCHAR(50), Sales INT); INSERT INTO VehicleSales (Location, VehicleType, Sales) VALUES (\u0027Urban\u0027, \u0027Electric\u0027, 75000), (\u0027Urban\u0027, \u0027Gasoline\u0027, 50000), (\u0027Rural\u0027, \u0027Electric\u0027, 25000), (\u0027Rural\u0027, \u0027Gasoline\u0027, 75000);", + "sql": "SELECT Location, (SUM(CASE WHEN VehicleType \u003d \u0027Electric\u0027 THEN Sales ELSE 0 END) / SUM(Sales)) * 100 as Percentage FROM VehicleSales GROUP BY Location;", + "sql_explanation": "We start by using a SELECT statement to get the total sales for electric and gasoline vehicles in urban and rural areas. We then use a CASE statement to calculate the percentage of electric vehicle sales for each location. Finally, we group by the Location column." +}, { + "id": "1879", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many electric vehicles were sold per month in the \u0027sales\u0027 table?", + "sql_context": "CREATE TABLE sales (id INT, sale_date DATE, vehicle_type VARCHAR(20));", + "sql": "SELECT DATE_TRUNC(\u0027month\u0027, sale_date) AS month, COUNT(*) FILTER (WHERE vehicle_type \u003d \u0027Electric\u0027) AS electric_sales FROM sales GROUP BY month;", + "sql_explanation": "Truncates dates to the month level, then counts the number of electric vehicle sales per month." +}, { + "id": "2014", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of unique vehicle makes and models in the \u0027vehicle_info\u0027 table, grouped by \u0027vehicle_type\u0027.", + "sql_context": "CREATE TABLE vehicle_info (id INT, vehicle_make VARCHAR, vehicle_model VARCHAR, vehicle_type VARCHAR);", + "sql": "SELECT vehicle_type, COUNT(DISTINCT CONCAT(vehicle_make, \u0027_\u0027, vehicle_model)) AS unique_vehicles FROM vehicle_info GROUP BY vehicle_type;", + "sql_explanation": "The query calculates the number of unique vehicle makes and models in the \u0027vehicle_info\u0027 table, grouped by \u0027vehicle_type\u0027." +}, { + "id": "2262", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of electric vehicles sold by manufacturer \u0027XYZ\u0027?", + "sql_context": "CREATE TABLE sales_data (manufacturer VARCHAR(10), vehicle_type VARCHAR(10), quantity INT);", + "sql": "SELECT manufacturer, SUM(quantity) FROM sales_data WHERE vehicle_type \u003d \u0027Electric\u0027 AND manufacturer \u003d \u0027XYZ\u0027 GROUP BY manufacturer;", + "sql_explanation": "Calculate the total number of electric vehicles sold by manufacturer \u0027XYZ\u0027 by using the SUM window function, filtering vehicle_type with \u0027Electric\u0027 and manufacturer with \u0027XYZ\u0027, and grouping by manufacturer." +}, { + "id": "2507", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the percentage of electric vehicles sold in each country.", + "sql_context": "CREATE TABLE sales (id INT, country VARCHAR(20), vehicle_type VARCHAR(10)); INSERT INTO sales (id, country, vehicle_type) VALUES (1, \u0027USA\u0027, \u0027EV\u0027), (2, \u0027Canada\u0027, \u0027Hybrid\u0027), (3, \u0027Mexico\u0027, \u0027EV\u0027), (4, \u0027USA\u0027, \u0027Hybrid\u0027), (5, \u0027Canada\u0027, \u0027EV\u0027);", + "sql": "SELECT country, COUNT(*) FILTER (WHERE vehicle_type \u003d \u0027EV\u0027) * 100.0 / COUNT(*) AS pct_ev_sold FROM sales GROUP BY country;", + "sql_explanation": "Uses a SELECT statement with the GROUP BY clause to categorize sales by country, the COUNT function to count the number of electric vehicles sold, and the * and / operators to calculate the percentage of electric vehicles sold in each country." +}, { + "id": "2527", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of autonomous driving research projects for each country in the last 5 years?", + "sql_context": "CREATE TABLE AutonomousDrivingResearch (Id INT, Project VARCHAR(50), Country VARCHAR(50), StartDate DATE); INSERT INTO AutonomousDrivingResearch (Id, Project, Country, StartDate) VALUES (1, \u0027Project A\u0027, \u0027USA\u0027, \u00272017-05-15\u0027); INSERT INTO AutonomousDrivingResearch (Id, Project, Country, StartDate) VALUES (2, \u0027Project B\u0027, \u0027Germany\u0027, \u00272018-02-28\u0027); INSERT INTO AutonomousDrivingResearch (Id, Project, Country, StartDate) VALUES (3, \u0027Project C\u0027, \u0027China\u0027, \u00272019-11-11\u0027); INSERT INTO AutonomousDrivingResearch (Id, Project, Country, StartDate) VALUES (4, \u0027Project D\u0027, \u0027Japan\u0027, \u00272020-08-01\u0027);", + "sql": "SELECT Country, COUNT(*) FROM AutonomousDrivingResearch WHERE StartDate \u003e\u003d DATEADD(year, -5, GETDATE()) GROUP BY Country;", + "sql_explanation": "This query retrieves the total number of autonomous driving research projects for each country in the last 5 years by selecting the country and count of records from the AutonomousDrivingResearch table where the start date is greater than or equal to 5 years before the current date and grouping the results by country." +}, { + "id": "2790", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the number of autonomous driving research papers published per year", + "sql_context": "CREATE TABLE research_papers (id INT, title VARCHAR(100), publication_year INT, autonomous_driving BOOLEAN); INSERT INTO research_papers (id, title, publication_year, autonomous_driving) VALUES (1, \u0027Autonomous Driving and AI\u0027, 2020, true), (2, \u0027Hybrid Vehicle Efficiency\u0027, 2021, false), (3, \u0027EV Charging Infrastructure\u0027, 2021, false), (4, \u0027Sensors in Autonomous Vehicles\u0027, 2022, true);", + "sql": "SELECT publication_year, COUNT(*) FROM research_papers WHERE autonomous_driving \u003d true GROUP BY publication_year;", + "sql_explanation": "This query identifies the number of autonomous driving research papers published per year by counting the rows where the autonomous_driving column is true and grouping by publication_year." +}, { + "id": "3031", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the number of electric vehicles sold by each manufacturer", + "sql_context": "CREATE TABLE manufacturer_sales (id INT, manufacturer VARCHAR(50), vehicle_type VARCHAR(20), sale_year INT, quantity INT); INSERT INTO manufacturer_sales (id, manufacturer, vehicle_type, sale_year, quantity) VALUES (1, \u0027Tesla\u0027, \u0027EV\u0027, 2021, 30000), (2, \u0027Tesla\u0027, \u0027EV\u0027, 2022, 50000), (3, \u0027Toyota\u0027, \u0027Hybrid\u0027, 2021, 20000), (4, \u0027Toyota\u0027, \u0027Hybrid\u0027, 2022, 25000), (5, \u0027Ford\u0027, \u0027EV\u0027, 2022, 10000);", + "sql": "SELECT manufacturer, SUM(quantity) FROM manufacturer_sales WHERE vehicle_type \u003d \u0027EV\u0027 GROUP BY manufacturer;", + "sql_explanation": "This query lists the number of electric vehicles sold by each manufacturer by summing the quantity column where vehicle_type is \u0027EV\u0027 and grouping by manufacturer." +}, { + "id": "3169", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many autonomous driving research papers were published per month in 2021?", + "sql_context": "CREATE TABLE ResearchPapers (ID INT, Title TEXT, Author TEXT, PublicationDate DATE); INSERT INTO ResearchPapers (ID, Title, Author, PublicationDate) VALUES (1, \u0027Deep Learning for Autonomous Driving\u0027, \u0027John Doe\u0027, \u00272021-03-15\u0027); INSERT INTO ResearchPapers (ID, Title, Author, PublicationDate) VALUES (2, \u0027Reinforcement Learning in Autonomous Vehicles\u0027, \u0027Jane Smith\u0027, \u00272021-07-22\u0027);", + "sql": "SELECT COUNT(*) FROM ResearchPapers WHERE YEAR(PublicationDate) \u003d 2021 GROUP BY MONTH(PublicationDate);", + "sql_explanation": "This query counts the number of autonomous driving research papers published per month in 2021 by filtering the ResearchPapers table based on the PublicationDate, grouping the results by month, and then counting the number of rows in each group." +}, { + "id": "3304", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average horsepower of sports cars in the \u0027green_cars\u0027 table by manufacturer?", + "sql_context": "CREATE TABLE green_cars (id INT PRIMARY KEY, make VARCHAR(50), model VARCHAR(50), year INT, horsepower INT, is_electric BOOLEAN, is_sports BOOLEAN);", + "sql": "SELECT make, AVG(horsepower) as avg_horsepower FROM green_cars WHERE is_sports \u003d TRUE GROUP BY make;", + "sql_explanation": "The SQL query groups the \u0027green_cars\u0027 table by the \u0027make\u0027 column, filtering the rows where the \u0027is_sports\u0027 column is set to TRUE, and then calculates the average horsepower for sports cars by manufacturer using the AVG function." +}, { + "id": "3645", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the number of electric vehicles sold in the US by year.", + "sql_context": "CREATE TABLE EV_Sales (id INT, vehicle_type VARCHAR(50), year INT, quantity_sold INT);", + "sql": "SELECT year, SUM(quantity_sold) FROM EV_Sales WHERE vehicle_type \u003d \u0027Electric\u0027 GROUP BY year;", + "sql_explanation": "This query calculates the total number of electric vehicles sold in the US by grouping the EV_Sales table where the vehicle_type is Electric by year and then summing the quantity_sold column." +}, { + "id": "3736", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the total number of electric vehicle models for each brand", + "sql_context": "CREATE TABLE electric_vehicles (id INT PRIMARY KEY, brand VARCHAR(255), model VARCHAR(255), production_year INT, range INT);", + "sql": "SELECT brand, COUNT(DISTINCT model) as total_models FROM electric_vehicles GROUP BY brand;", + "sql_explanation": "Selects all unique \u0027brand\u0027 values and counts the number of unique \u0027model\u0027 values for each brand, then groups the results by \u0027brand\u0027 column to show the total number of electric vehicle models for each brand." +}, { + "id": "4009", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average speed of electric vehicles in the \u0027test_drives\u0027 table, grouped by \u0027vehicle_make\u0027?", + "sql_context": "CREATE TABLE test_drives (drive_id INT, vehicle_make VARCHAR(20), avg_speed FLOAT); INSERT INTO test_drives (drive_id, vehicle_make, avg_speed) VALUES (1, \u0027Tesla\u0027, 65.3), (2, \u0027Tesla\u0027, 68.1), (3, \u0027Rivian\u0027, 62.9), (4, \u0027Rivian\u0027, 64.5);", + "sql": "SELECT vehicle_make, AVG(avg_speed) avg_speed FROM test_drives GROUP BY vehicle_make;", + "sql_explanation": "Calculates the average speed for each vehicle_make by grouping the test_drives table based on vehicle_make and finding the average speed for each group." +}, { + "id": "4543", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average horsepower of vehicles by make?", + "sql_context": "CREATE TABLE Vehicles (Id INT, Make VARCHAR(255), Model VARCHAR(255), Year INT, Horsepower INT); INSERT INTO Vehicles (Id, Make, Model, Year, Horsepower) VALUES (1, \u0027Toyota\u0027, \u0027Corolla\u0027, 2020, 139); INSERT INTO Vehicles (Id, Make, Model, Year, Horsepower) VALUES (2, \u0027Toyota\u0027, \u0027Camry\u0027, 2020, 203); INSERT INTO Vehicles (Id, Make, Model, Year, Horsepower) VALUES (3, \u0027Honda\u0027, \u0027Civic\u0027, 2020, 158);", + "sql": "SELECT Make, AVG(Horsepower) AS Avg_Horsepower FROM Vehicles GROUP BY Make;", + "sql_explanation": "This SQL query calculates the average horsepower for each vehicle make in the Vehicles table. It does so by using the AVG() function along with the GROUP BY clause." +}, { + "id": "5284", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get the number of electric vehicle models released in each year.", + "sql_context": "CREATE TABLE EVData (Id INT, Model VARCHAR(50), Year INT); INSERT INTO EVData (Id, Model, Year) VALUES (1, \u0027Tesla Model 3\u0027, 2017), (2, \u0027Tesla Model Y\u0027, 2018), (3, \u0027Chevy Bolt\u0027, 2016);", + "sql": "SELECT Year, COUNT(DISTINCT Model) FROM EVData GROUP BY Year;", + "sql_explanation": "Get the number of electric vehicle models released in each year by selecting the Year and the count of distinct Model from the EVData table, grouped by the Year column." +}, { + "id": "794", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the number of unique wallet addresses that have interacted with the \u0027Uniswap\u0027 dapp in the \u0027Ethereum\u0027 network.", + "sql_context": "CREATE TABLE ethereum_uniswap_interactions (wallet_address TEXT, interaction_date DATE, network_name TEXT);", + "sql": "SELECT wallet_address, COUNT(DISTINCT interaction_date) as interactions FROM ethereum_uniswap_interactions WHERE network_name \u003d \u0027Ethereum\u0027 AND wallet_address \u003d \u0027Uniswap\u0027 GROUP BY wallet_address;", + "sql_explanation": "The SQL query identifies the number of unique wallet addresses that have interacted with the \u0027Uniswap\u0027 dapp in the \u0027Ethereum\u0027 network by using the COUNT(DISTINCT) function to count the number of unique interaction dates for each wallet address in the \u0027ethereum_uniswap_interactions\u0027 table where the network name is \u0027Ethereum\u0027 and the wallet address is \u0027Uniswap\u0027." +}, { + "id": "973", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum transaction amount for each digital asset in the \u0027crypto_transactions\u0027 table, partitioned by day?", + "sql_context": "CREATE TABLE crypto_transactions (transaction_id INT, digital_asset VARCHAR(20), transaction_amount DECIMAL(10,2), transaction_time DATETIME);", + "sql": "SELECT digital_asset, MAX(transaction_amount) as max_transaction_amount, DATE_TRUNC(\u0027day\u0027, transaction_time) as day FROM crypto_transactions GROUP BY digital_asset, day ORDER BY day;", + "sql_explanation": "The SQL query uses the MAX function to find the maximum transaction amount for each digital asset and partitions the results by day using the DATE_TRUNC function. The results are then grouped by the digital_asset and day columns and ordered by day." +}, { + "id": "1897", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of digital assets issued in the United States and their respective asset classes.", + "sql_context": "CREATE TABLE digital_assets (asset_id INT, name VARCHAR(255), issue_country VARCHAR(50), asset_class VARCHAR(50)); INSERT INTO digital_assets (asset_id, name, issue_country, asset_class) VALUES (1, \u0027Asset1\u0027, \u0027USA\u0027, \u0027Equity\u0027), (2, \u0027Asset2\u0027, \u0027USA\u0027, \u0027Commodity\u0027), (3, \u0027Asset3\u0027, \u0027Canada\u0027, \u0027Currency\u0027);", + "sql": "SELECT issue_country, asset_class, COUNT(*) as num_assets FROM digital_assets WHERE issue_country \u003d \u0027USA\u0027 GROUP BY issue_country, asset_class;", + "sql_explanation": "This query filters the digital_assets table to only include rows where the issue_country is \u0027USA\u0027. Then, it groups the results by both issue_country and asset_class, and calculates the count of assets for each combination of these columns." +}, { + "id": "2530", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 3 countries with the most digital assets issued.", + "sql_context": "CREATE TABLE digital_assets (id INT, name VARCHAR, issue_country VARCHAR); INSERT INTO digital_assets (id, name, issue_country) VALUES (1, \u0027CryptoCoin\u0027, \u0027United States\u0027), (2, \u0027DigiToken\u0027, \u0027Japan\u0027), (3, \u0027BitAsset\u0027, \u0027China\u0027), (4, \u0027EtherCoin\u0027, \u0027China\u0027), (5, \u0027RippleToken\u0027, \u0027India\u0027), (6, \u0027LiteCoin\u0027, \u0027Canada\u0027), (7, \u0027MoneroCoin\u0027, \u0027Germany\u0027);", + "sql": "SELECT issue_country, COUNT(*) as num_assets FROM digital_assets GROUP BY issue_country ORDER BY num_assets DESC LIMIT 3;", + "sql_explanation": "The SQL query calculates the number of records in the digital_assets table for each issue_country using the GROUP BY clause. The query then orders the results in descending order by the num_assets column and returns the top 3 records using the LIMIT clause." +}, { + "id": "2602", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the highest daily trading volume for each digital asset category?", + "sql_context": "CREATE TABLE digital_asset_categories (id INT, name VARCHAR(255)); CREATE TABLE digital_assets (id INT, category_id INT, name VARCHAR(255), daily_trading_volume DECIMAL(10,2)); INSERT INTO digital_asset_categories (id, name) VALUES (1, \u0027CategoryA\u0027), (2, \u0027CategoryB\u0027), (3, \u0027CategoryC\u0027); INSERT INTO digital_assets (id, category_id, name, daily_trading_volume) VALUES (1, 1, \u0027Asset1\u0027, 5000), (2, 1, \u0027Asset2\u0027, 3000), (3, 2, \u0027Asset3\u0027, 2000), (4, 2, \u0027Asset4\u0027, 1000), (5, 3, \u0027Asset5\u0027, 500);", + "sql": "SELECT category_id, MAX(daily_trading_volume) AS Highest_Daily_Trading_Volume FROM digital_assets GROUP BY category_id;", + "sql_explanation": "This query groups the digital_assets table by category_id and calculates the highest daily trading volume for each digital asset category." +}, { + "id": "2631", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the digital assets that have had a transaction on the most number of distinct dates.", + "sql_context": "CREATE TABLE transactions (asset TEXT, tx_date DATE); INSERT INTO transactions (asset, tx_date) VALUES (\u0027Securitize\u0027, \u00272021-01-01\u0027), (\u0027Securitize\u0027, \u00272021-01-02\u0027), (\u0027Polymath\u0027, \u00272021-01-01\u0027), (\u0027Polymath\u0027, \u00272021-01-02\u0027), (\u0027Polymath\u0027, \u00272021-01-03\u0027);", + "sql": "SELECT asset, COUNT(DISTINCT tx_date) AS distinct_dates FROM transactions GROUP BY asset ORDER BY distinct_dates DESC;", + "sql_explanation": "This query uses the COUNT function with the DISTINCT keyword to count the number of distinct transaction dates for each digital asset. The results are then ordered by the number of distinct transaction dates in descending order." +}, { + "id": "2638", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the top 3 countries with the most regulatory actions in descending order", + "sql_context": "CREATE TABLE RegulatoryActions (country VARCHAR(255), action_date DATE); INSERT INTO RegulatoryActions (country, action_date) VALUES (\u0027USA\u0027, \u00272021-01-01\u0027), (\u0027USA\u0027, \u00272021-03-01\u0027), (\u0027China\u0027, \u00272021-02-01\u0027), (\u0027Japan\u0027, \u00272021-04-01\u0027), (\u0027India\u0027, \u00272021-05-01\u0027);", + "sql": "SELECT country, COUNT(*) as total_actions FROM RegulatoryActions GROUP BY country ORDER BY total_actions DESC LIMIT 3;", + "sql_explanation": "This query displays the top 3 countries with the most regulatory actions in descending order. The COUNT function counts the number of rows in the RegulatoryActions table for each country, the GROUP BY clause groups the results by country, and the ORDER BY clause sorts the results by the total_actions column in descending order. The LIMIT clause limits the number of results to 3." +}, { + "id": "2671", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of smart contracts created per month?", + "sql_context": "CREATE TABLE smart_contracts (id INT, name VARCHAR(255), creation_date DATE); INSERT INTO smart_contracts (id, name, creation_date) VALUES (1, \u0027SC1\u0027, \u00272022-01-01\u0027), (2, \u0027SC2\u0027, \u00272022-01-15\u0027), (3, \u0027SC3\u0027, \u00272022-02-03\u0027), (4, \u0027SC4\u0027, \u00272022-02-28\u0027), (5, \u0027SC5\u0027, \u00272022-03-10\u0027);", + "sql": "SELECT DATE_FORMAT(creation_date, \u0027%Y-%m\u0027) AS Month, COUNT(*) AS Total_Contracts FROM smart_contracts GROUP BY Month;", + "sql_explanation": "This query extracts the year and month from the creation_date column, groups by month, and calculates the total number of smart contracts created per month." +}, { + "id": "3178", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many smart contracts have been deployed on a specific blockchain platform?", + "sql_context": "CREATE TABLE blockchain_platforms (platform_id INT, name VARCHAR(255), smart_contract_count INT);", + "sql": "SELECT name, SUM(smart_contract_count) FROM blockchain_platforms WHERE name \u003d \u0027Ethereum\u0027 GROUP BY name;", + "sql_explanation": "The SQL query calculates the sum of the smart_contract_count column from the blockchain_platforms table where the name column matches the input value \u0027Ethereum\u0027, and groups the results by name." +}, { + "id": "3682", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of unique digital assets by network?", + "sql_context": "CREATE TABLE digital_assets (id INT, name VARCHAR(255), network VARCHAR(255)); INSERT INTO digital_assets (id, name, network) VALUES (1, \u0027Bitcoin\u0027, \u0027Bitcoin\u0027), (2, \u0027Ethereum\u0027, \u0027Ethereum\u0027), (3, \u0027USDT\u0027, \u0027Ethereum\u0027), (4, \u0027BNB\u0027, \u0027Binance Smart Chain\u0027), (5, \u0027ADA\u0027, \u0027Cardano\u0027), (6, \u0027SOL\u0027, \u0027Solana\u0027);", + "sql": "SELECT network, COUNT(DISTINCT name) as unique_assets FROM digital_assets GROUP BY network;", + "sql_explanation": "This SQL query groups the digital_assets table by the network column, calculates the count of unique assets in each group, and returns the total number of unique digital assets by network." +}, { + "id": "3914", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many regulatory violations have been reported for each country in the blockchain industry?", + "sql_context": "CREATE TABLE regulatory_violations (violation_id INT, country VARCHAR(50), violation_type VARCHAR(50), description TEXT); INSERT INTO regulatory_violations (violation_id, country, violation_type, description) VALUES (1, \u0027USA\u0027, \u0027Securities Fraud\u0027, \u0027Description 1\u0027), (2, \u0027Canada\u0027, \u0027Money Laundering\u0027, \u0027Description 2\u0027), (3, \u0027USA\u0027, \u0027Insider Trading\u0027, \u0027Description 3\u0027), (4, \u0027UK\u0027, \u0027Terrorism Financing\u0027, \u0027Description 4\u0027);", + "sql": "SELECT country, COUNT(*) as num_violations FROM regulatory_violations GROUP BY country;", + "sql_explanation": "This query groups the regulatory_violations table by country and calculates the count of rows for each group. This represents the number of regulatory violations that have been reported for each country in the blockchain industry." +}, { + "id": "4046", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total value of digital assets held by each exchange on the blockchain?", + "sql_context": "CREATE TABLE digital_assets (asset_id INT, name VARCHAR(50), exchange VARCHAR(50), quantity DECIMAL(10,2)); INSERT INTO digital_assets (asset_id, name, exchange, quantity) VALUES (1, \u0027BTC\u0027, \u0027Binance\u0027, 1000), (2, \u0027ETH\u0027, \u0027Binance\u0027, 2000), (3, \u0027BTC\u0027, \u0027Coinbase\u0027, 1500), (4, \u0027ETH\u0027, \u0027Coinbase\u0027, 2500);", + "sql": "SELECT exchange, SUM(quantity) as total_value FROM digital_assets GROUP BY exchange;", + "sql_explanation": "This query groups the digital_assets table by exchange and calculates the sum of the quantity column for each group. This represents the total value of digital assets held by each exchange on the blockchain." +}, { + "id": "4902", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average age of developers in each country", + "sql_context": "CREATE TABLE Developers (name VARCHAR(255), country VARCHAR(255), age INT); INSERT INTO Developers (name, country, age) VALUES (\u0027Dev1\u0027, \u0027USA\u0027, 30), (\u0027Dev2\u0027, \u0027USA\u0027, 35), (\u0027Dev3\u0027, \u0027China\u0027, 25);", + "sql": "SELECT country, AVG(age) as avg_age FROM Developers GROUP BY country;", + "sql_explanation": "This query calculates the average age of developers in each country. The AVG function calculates the average age for each row in the Developers table, and the GROUP BY clause groups the results by country." +}, { + "id": "587", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average salary of male and female employees in each department?", + "sql_context": "CREATE TABLE Employee (id INT, Name VARCHAR(50), DepartmentID INT, Salary FLOAT, Gender VARCHAR(10)); INSERT INTO Employee (id, Name, DepartmentID, Salary, Gender) VALUES (101, \u0027Employee1\u0027, 1, 50000, \u0027Female\u0027); INSERT INTO Employee (id, Name, DepartmentID, Salary, Gender) VALUES (102, \u0027Employee2\u0027, 1, 55000, \u0027Male\u0027); INSERT INTO Employee (id, Name, DepartmentID, Salary, Gender) VALUES (103, \u0027Employee3\u0027, 2, 60000, \u0027Female\u0027);", + "sql": "SELECT DepartmentID, AVG(CASE WHEN Gender \u003d \u0027Male\u0027 THEN Salary ELSE NULL END) AS AvgMaleSalary, AVG(CASE WHEN Gender \u003d \u0027Female\u0027 THEN Salary ELSE NULL END) AS AvgFemaleSalary FROM Employee GROUP BY DepartmentID;", + "sql_explanation": "This SQL query calculates the average salary for male and female employees in each department by using a conditional aggregation with the CASE statement." +}, { + "id": "640", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many traffic violations were issued in Chicago in the year 2020, and what was the most common type?", + "sql_context": "CREATE TABLE violations (id INT, city VARCHAR(255), date DATE, type VARCHAR(255), description TEXT); INSERT INTO violations (id, city, date, type, description) VALUES (1, \u0027Chicago\u0027, \u00272020-01-01\u0027, \u0027Speeding\u0027, \u0027Exceeding the speed limit\u0027), (2, \u0027Chicago\u0027, \u00272020-02-01\u0027, \u0027Parking\u0027, \u0027Parking in a no-parking zone\u0027);", + "sql": "SELECT COUNT(*) FROM violations WHERE city \u003d \u0027Chicago\u0027 AND YEAR(date) \u003d 2020; SELECT type, COUNT(*) FROM violations WHERE city \u003d \u0027Chicago\u0027 AND YEAR(date) \u003d 2020 GROUP BY type ORDER BY COUNT(*) DESC LIMIT 1;", + "sql_explanation": "The first query counts the number of traffic violations issued in Chicago in the year 2020 by selecting all records where city is \u0027Chicago\u0027 and the year of date is 2020. The second query calculates the most common type of traffic violation in Chicago in the year 2020 by selecting the type column and counting the number of occurrences where city is \u0027Chicago\u0027 and the year of date is 2020, then grouping the results by type and ordering them in descending order by the count. The query returns the top result, which is the most common type of violation." +}, { + "id": "1513", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 3 contributors in \u0027campaign_contributions\u0027 table?", + "sql_context": "CREATE TABLE campaign_contributions (contributor_id INT, contributor_name VARCHAR(255), amount DECIMAL(10,2), contribution_date DATE);", + "sql": "SELECT contributor_name, SUM(amount) AS total_contributions FROM campaign_contributions GROUP BY contributor_name ORDER BY total_contributions DESC LIMIT 3;", + "sql_explanation": "This query calculates the total contributions for each contributor and orders them in descending order. It then selects the top 3 contributors." +}, { + "id": "3120", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of hospitals in the state of California, grouped by hospital type?", + "sql_context": "CREATE TABLE hospitals (id INT, name TEXT, state TEXT, hospital_type TEXT, num_beds INT); INSERT INTO hospitals (id, name, state, hospital_type, num_beds) VALUES (1, \u0027General Hospital\u0027, \u0027California\u0027, \u0027Acute Care\u0027, 300), (2, \u0027Specialty Hospital\u0027, \u0027California\u0027, \u0027Rehabilitation\u0027, 150), (3, \u0027Teaching Hospital\u0027, \u0027California\u0027, \u0027Acute Care\u0027, 500);", + "sql": "SELECT hospital_type, COUNT(*) as total FROM hospitals WHERE state \u003d \u0027California\u0027 GROUP BY hospital_type;", + "sql_explanation": "The SQL query selects the hospital_type and counts the number of hospitals for each type in the state of California. It uses the WHERE clause to filter hospitals by state and the GROUP BY clause to group the results by hospital_type." +}, { + "id": "3289", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the most common type of crime in Paris, and how many times did it occur?", + "sql_context": "CREATE TABLE crimes (id INT, city VARCHAR(255), date DATE, type VARCHAR(255), description TEXT); INSERT INTO crimes (id, city, date, type, description) VALUES (1, \u0027Paris\u0027, \u00272022-01-01\u0027, \u0027Theft\u0027, \u0027Bicycle theft\u0027), (2, \u0027Paris\u0027, \u00272022-02-01\u0027, \u0027Vandalism\u0027, \u0027Graffiti\u0027);", + "sql": "SELECT type, COUNT(*) FROM crimes WHERE city \u003d \u0027Paris\u0027 GROUP BY type ORDER BY COUNT(*) DESC LIMIT 1;", + "sql_explanation": "This query calculates the most common type of crime in Paris by selecting the type column and counting the number of occurrences where city is \u0027Paris\u0027, then grouping the results by type and ordering them in descending order by the count. The query returns the top result, which is the most common type of crime." +}, { + "id": "3374", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average budget allocated to each department in the current fiscal year", + "sql_context": "CREATE TABLE Budget (BudgetID INT, Department TEXT, Amount DECIMAL(10,2), FiscalYear INT); INSERT INTO Budget (BudgetID, Department, Amount, FiscalYear) VALUES (1, \u0027Police\u0027, 5000000, 2023), (2, \u0027Education\u0027, 7000000, 2023), (3, \u0027Health\u0027, 8000000, 2023);", + "sql": "SELECT Department, AVG(Amount) FROM Budget WHERE FiscalYear \u003d YEAR(GETDATE()) GROUP BY Department;", + "sql_explanation": "This SQL query calculates the average budget allocated to each department in the current fiscal year by filtering the Budget table where FiscalYear is equal to the current year with the YEAR function, and then grouping the results by Department and calculating the average amount with the AVG function." +}, { + "id": "5121", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of criminal cases registered in each state?", + "sql_context": "CREATE TABLE states (id INT, state_name TEXT);CREATE TABLE criminal_cases (id INT, case_number INT, state_id INT, case_status TEXT);INSERT INTO states (id, state_name) VALUES (1, \u0027StateA\u0027), (2, \u0027StateB\u0027), (3, \u0027StateC\u0027);INSERT INTO criminal_cases (id, case_number, state_id, case_status) VALUES (1, 1001, 1, \u0027Open\u0027), (2, 1002, 2, \u0027Closed\u0027), (3, 1003, 3, \u0027Open\u0027);", + "sql": "SELECT state_id, COUNT(*) FROM criminal_cases GROUP BY state_id;", + "sql_explanation": "The number of criminal cases is calculated for each state using the COUNT function and GROUP BY clause." +}, { + "id": "5440", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average salary of employees in each federal agency?", + "sql_context": "CREATE TABLE agency (name VARCHAR(255), employees INT); CREATE TABLE employee (agency VARCHAR(255), salary DECIMAL(10,2)); INSERT INTO agency (name, employees) VALUES (\u0027Department of Defense\u0027, 750000), (\u0027Department of Veterans Affairs\u0027, 400000), (\u0027Department of Health and Human Services\u0027, 650000), (\u0027Department of Justice\u0027, 120000), (\u0027Department of State\u0027, 80000); INSERT INTO employee (agency, salary) VALUES (\u0027Department of Defense\u0027, 75000), (\u0027Department of Defense\u0027, 80000), (\u0027Department of Veterans Affairs\u0027, 50000), (\u0027Department of Veterans Affairs\u0027, 55000), (\u0027Department of Health and Human Services\u0027, 60000);", + "sql": "SELECT agency, AVG(salary) FROM employee GROUP BY agency;", + "sql_explanation": "This query calculates the average salary of employees in each federal agency. It groups the data by agency and then calculates the average value of the salary column, which will give the average salary for each agency." +}, { + "id": "5556", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of residents in each borough?", + "sql_context": "CREATE TABLE borough (name VARCHAR(255), population INT, avg_age DECIMAL(5,2)); INSERT INTO borough (name, population, avg_age) VALUES (\u0027Manhattan\u0027, 1644518, 36.78), (\u0027Brooklyn\u0027, 2592600, 34.56), (\u0027Queens\u0027, 2272771, 37.92), (\u0027Bronx\u0027, 1385108, 32.68), (\u0027Staten Island\u0027, 468730, 39.51);", + "sql": "SELECT name, AVG(avg_age) FROM borough GROUP BY name;", + "sql_explanation": "This query calculates the average age of residents in each borough. It groups the data by borough name and then calculates the average age for each group." +}, { + "id": "176", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the percentage of health equity metrics met by each community health worker in the West region for 2020 and 2021.", + "sql_context": "CREATE TABLE health_equity_metrics_worker (id INT, worker_id INT, region VARCHAR(50), year INT, metric1 BOOLEAN, metric2 BOOLEAN, metric3 BOOLEAN); INSERT INTO health_equity_metrics_worker (id, worker_id, region, year, metric1, metric2, metric3) VALUES (1, 1, \u0027West\u0027, 2020, true, true, false), (2, 2, \u0027West\u0027, 2020, true, false, true), (3, 3, \u0027West\u0027, 2020, false, true, true), (4, 1, \u0027West\u0027, 2021, true, true, true), (5, 2, \u0027West\u0027, 2021, true, true, false), (6, 3, \u0027West\u0027, 2021, true, false, true);", + "sql": "SELECT worker_id, (SUM(CASE WHEN metric1 THEN 1 ELSE 0 END) + SUM(CASE WHEN metric2 THEN 1 ELSE 0 END) + SUM(CASE WHEN metric3 THEN 1 ELSE 0 END)) * 100.0 / 3 as percentage_met FROM health_equity_metrics_worker WHERE region \u003d \u0027West\u0027 AND year IN (2020, 2021) GROUP BY worker_id;", + "sql_explanation": "The SQL query calculates the percentage of health equity metrics met by each community health worker in the West region for 2020 and 2021. It sums the number of metrics met for each worker, divides it by the total number of metrics (3) and multiplies by 100 to calculate the percentage. It groups the data by worker_id and filters it for the West region and the years 2020 and 2021." +}, { + "id": "2245", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of mental health parity violations by region in the past year?", + "sql_context": "CREATE TABLE mental_health_parity (violation_id INT, violation_date DATE, region VARCHAR(255)); INSERT INTO mental_health_parity (violation_id, violation_date, region) VALUES (1, \u00272021-01-01\u0027, \u0027Northeast\u0027), (2, \u00272021-02-01\u0027, \u0027Southeast\u0027), (3, \u00272021-03-01\u0027, \u0027Northeast\u0027);", + "sql": "SELECT region, COUNT(violation_id) FROM mental_health_parity WHERE violation_date \u003e\u003d DATEADD(year, -1, GETDATE()) GROUP BY region;", + "sql_explanation": "This query calculates the number of mental health parity violations by region in the past year. It filters the data using the WHERE clause to only include records from the past year and groups the data by the \u0027region\u0027 column. It then counts the number of records in each group using the COUNT function." +}, { + "id": "2623", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the cultural competency score for each hospital in the northeast region?", + "sql_context": "CREATE TABLE Hospitals (HospitalID INT, Name VARCHAR(255), Region VARCHAR(25), CulturalCompetencyScore INT); INSERT INTO Hospitals (HospitalID, Name, Region, CulturalCompetencyScore) VALUES (1, \u0027Hospital A\u0027, \u0027Northeast\u0027, 85), (2, \u0027Hospital B\u0027, \u0027Northeast\u0027, 90), (3, \u0027Hospital C\u0027, \u0027South\u0027, 75), (4, \u0027Hospital D\u0027, \u0027Midwest\u0027, 80);", + "sql": "SELECT Region, AVG(CulturalCompetencyScore) as AverageScore FROM Hospitals WHERE Region \u003d \u0027Northeast\u0027 GROUP BY Region;", + "sql_explanation": "This query calculates the average cultural competency score for hospitals in the northeast region. It filters the data by the Region column to only include records from the northeast, then groups the data by the Region column and calculates the average cultural competency score for each group." +}, { + "id": "2764", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the number of mental health parity incidents in each state for the last 6 months.", + "sql_context": "CREATE TABLE MentalHealthParity (IncidentID INT, IncidentDate DATE, State VARCHAR(255)); INSERT INTO MentalHealthParity (IncidentID, IncidentDate, State) VALUES (1, \u00272022-01-01\u0027, \u0027California\u0027); INSERT INTO MentalHealthParity (IncidentID, IncidentDate, State) VALUES (2, \u00272022-02-15\u0027, \u0027New York\u0027); INSERT INTO MentalHealthParity (IncidentID, IncidentDate, State) VALUES (3, \u00272022-03-05\u0027, \u0027Texas\u0027); INSERT INTO MentalHealthParity (IncidentID, IncidentDate, State) VALUES (4, \u00272022-04-10\u0027, \u0027Florida\u0027);", + "sql": "SELECT State, COUNT(*) FROM MentalHealthParity WHERE IncidentDate \u003e\u003d DATEADD(month, -6, GETDATE()) GROUP BY State;", + "sql_explanation": "The SQL query lists the number of mental health parity incidents in each state for the last 6 months. It filters the records based on the IncidentDate using the WHERE clause and the DATEADD function. Then, it groups the records by the State column and calculates the count using the COUNT function." +}, { + "id": "3162", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many community health workers have been added to the database each month?", + "sql_context": "CREATE TABLE community_health_workers (id INT, added_date DATE); INSERT INTO community_health_workers (id, added_date) VALUES (1, \u00272022-01-01\u0027), (2, \u00272022-01-05\u0027), (3, \u00272022-02-03\u0027), (4, \u00272022-02-10\u0027), (5, \u00272022-03-01\u0027);", + "sql": "SELECT DATE_FORMAT(added_date, \u0027%Y-%m\u0027) AS month, COUNT(*) FROM community_health_workers GROUP BY month;", + "sql_explanation": "The SQL query calculates the number of community health workers added to the database each month. It groups the records by month (year and month) and calculates the count of records for each month." +}, { + "id": "3202", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total health equity metric for each region in 2022?", + "sql_context": "CREATE TABLE HealthEquityMetrics (MetricID INT, Region VARCHAR(255), MetricValue INT, ReportDate DATE); INSERT INTO HealthEquityMetrics (MetricID, Region, MetricValue, ReportDate) VALUES (1, \u0027Northeast\u0027, 85, \u00272022-01-01\u0027); INSERT INTO HealthEquityMetrics (MetricID, Region, MetricValue, ReportDate) VALUES (2, \u0027Southeast\u0027, 78, \u00272022-02-15\u0027); INSERT INTO HealthEquityMetrics (MetricID, Region, MetricValue, ReportDate) VALUES (3, \u0027Midwest\u0027, 92, \u00272022-03-05\u0027); INSERT INTO HealthEquityMetrics (MetricID, Region, MetricValue, ReportDate) VALUES (4, \u0027West\u0027, 64, \u00272022-04-10\u0027);", + "sql": "SELECT Region, SUM(MetricValue) FROM HealthEquityMetrics WHERE YEAR(ReportDate) \u003d 2022 GROUP BY Region;", + "sql_explanation": "The SQL query calculates the total health equity metric for each region in 2022. It filters the records based on the ReportDate using the YEAR function. Then, it groups the records by the Region column and calculates the sum using the SUM function." +}, { + "id": "3216", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of community health workers who identify as Indigenous?", + "sql_context": "CREATE TABLE CommunityHealthWorkers (WorkerID INT, Age INT, Identity VARCHAR(255)); INSERT INTO CommunityHealthWorkers VALUES (1, 35, \u0027Indigenous\u0027); INSERT INTO CommunityHealthWorkers VALUES (2, 40, \u0027Non-Indigenous\u0027);", + "sql": "SELECT Identity, AVG(Age) FROM CommunityHealthWorkers WHERE Identity \u003d \u0027Indigenous\u0027 GROUP BY Identity;", + "sql_explanation": "The SQL query calculates the average age of community health workers who identify as Indigenous. It filters the data by the Identity column and calculates the average age using the AVG() function." +}, { + "id": "3313", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of cultural competency training hours by community health worker?", + "sql_context": "CREATE TABLE CulturalCompetency (WorkerID INT, TrainingHours INT); INSERT INTO CulturalCompetency (WorkerID, TrainingHours) VALUES (1, 20), (2, 25), (3, 30), (4, 35), (5, 40);", + "sql": "SELECT WorkerID, SUM(TrainingHours) as TotalTrainingHours FROM CulturalCompetency GROUP BY WorkerID;", + "sql_explanation": "This query calculates the total number of cultural competency training hours for each community health worker. It does this by using the SUM function on the TrainingHours column, grouped by the WorkerID column." +}, { + "id": "3665", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of mental health providers by county and race?", + "sql_context": "CREATE TABLE mental_health_providers (provider_id INT, age INT, county VARCHAR(255), race VARCHAR(255)); INSERT INTO mental_health_providers (provider_id, age, county, race) VALUES (1, 45, \u0027Orange County\u0027, \u0027Asian\u0027); INSERT INTO mental_health_providers (provider_id, age, county, race) VALUES (2, 50, \u0027Los Angeles County\u0027, \u0027African American\u0027); INSERT INTO mental_health_providers (provider_id, age, county, race) VALUES (3, 35, \u0027Orange County\u0027, \u0027Hispanic\u0027);", + "sql": "SELECT county, race, AVG(age) as avg_age FROM mental_health_providers GROUP BY county, race;", + "sql_explanation": "The SQL query calculates the average age of mental health providers for each unique combination of county and race by using the AVG function on the age column and grouping by the county and race columns." +}, { + "id": "3795", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average cultural competency score for mental health providers in each state?", + "sql_context": "CREATE TABLE mental_health_providers (id INT, name VARCHAR(50), state VARCHAR(50), cultural_competency_score DECIMAL(3,2)); INSERT INTO mental_health_providers (id, name, state, cultural_competency_score) VALUES (1, \u0027Dr. Sarah Johnson\u0027, \u0027California\u0027, 4.75), (2, \u0027Dr. Michael Davis\u0027, \u0027Texas\u0027, 4.50), (3, \u0027Dr. Emily Garcia\u0027, \u0027Florida\u0027, 4.25);", + "sql": "SELECT state, AVG(cultural_competency_score) FROM mental_health_providers GROUP BY state;", + "sql_explanation": "The SQL query calculates the average cultural competency score for mental health providers in each state. It groups the records by state and calculates the average cultural competency score using the AVG() function." +}, { + "id": "3863", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of community health workers who have completed cultural competency training in each state?", + "sql_context": "CREATE TABLE community_health_workers (worker_id INT, name VARCHAR(50), state VARCHAR(2), completed_training BOOLEAN);", + "sql": "SELECT state, AVG(completed_training::INT) FROM community_health_workers GROUP BY state;", + "sql_explanation": "This query calculates the percentage of community health workers who have completed cultural competency training in each state from the community_health_workers table by grouping the records by state and calculating the average value of the completed_training column, which is 1 for true and 0 for false. The completed_training column is cast to an integer so that the average can be calculated." +}, { + "id": "4209", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum mental health parity violation fine in each region?", + "sql_context": "CREATE TABLE MentalHealthParityFines (FineID INT, Region VARCHAR(255), Fine INT); INSERT INTO MentalHealthParityFines (FineID, Region, Fine) VALUES (1, \u0027Northeast\u0027, 50000), (2, \u0027Southeast\u0027, 75000), (3, \u0027Midwest\u0027, 60000), (4, \u0027Southwest\u0027, 80000), (5, \u0027West\u0027, 100000);", + "sql": "SELECT Region, MAX(Fine) as MaxFine FROM MentalHealthParityFines GROUP BY Region;", + "sql_explanation": "The SQL query calculates the maximum mental health parity violation fine in each region. It groups the data by the \u0027Region\u0027 column and then finds the maximum fine for each group using the MAX() function." +}, { + "id": "4250", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many cultural competency trainings have been conducted for each region?", + "sql_context": "CREATE TABLE cultural_competency_trainings (region VARCHAR(50), trainings INT); INSERT INTO cultural_competency_trainings (region, trainings) VALUES (\u0027Northeast\u0027, 300), (\u0027Southeast\u0027, 250), (\u0027Midwest\u0027, 200), (\u0027Southwest\u0027, 180), (\u0027West\u0027, 350);", + "sql": "SELECT region, SUM(trainings) FROM cultural_competency_trainings GROUP BY region;", + "sql_explanation": "The SQL query calculates the total number of cultural competency trainings conducted for each region by summing up the trainings column for each unique region." +}, { + "id": "4521", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of community health workers by ethnicity?", + "sql_context": "CREATE TABLE community_health_workers (id INT, name TEXT, ethnicity TEXT, zipcode INT);", + "sql": "SELECT ethnicity, COUNT(*) FROM community_health_workers GROUP BY ethnicity;", + "sql_explanation": "This query groups community health workers by their ethnicity and counts the number of occurrences for each group." +}, { + "id": "4584", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many community health workers are there in total, categorized by their ethnicity?", + "sql_context": "CREATE TABLE community_health_worker (id INT, name TEXT, ethnicity TEXT); INSERT INTO community_health_worker (id, name, ethnicity) VALUES (1, \u0027Ana Garcia\u0027, \u0027Latino\u0027), (2, \u0027Hiroshi Tanaka\u0027, \u0027Asian\u0027), (3, \u0027Sara Johnson\u0027, \u0027African American\u0027), (4, \u0027Peter Brown\u0027, \u0027Caucasian\u0027);", + "sql": "SELECT COUNT(*), ethnicity FROM community_health_worker GROUP BY ethnicity;", + "sql_explanation": "This query counts the number of community health workers by ethnicity, providing a breakdown of the total count." +}, { + "id": "4650", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average mental health parity case count per region?", + "sql_context": "CREATE TABLE mental_health_parity (region VARCHAR(20), case_count INT); INSERT INTO mental_health_parity (region, case_count) VALUES (\u0027Northeast\u0027, 200), (\u0027Southeast\u0027, 150), (\u0027Midwest\u0027, 180), (\u0027Southwest\u0027, 250), (\u0027West\u0027, 220);", + "sql": "SELECT region, AVG(case_count) FROM mental_health_parity GROUP BY region;", + "sql_explanation": "The SQL query calculates the average mental health parity case count for each region by using the AVG function and grouping by the region column." +}, { + "id": "4751", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average health equity metric score for each state?", + "sql_context": "CREATE TABLE state_health_equity (state VARCHAR(20), metric_score INT); INSERT INTO state_health_equity (state, metric_score) VALUES (\u0027California\u0027, 82), (\u0027Texas\u0027, 88), (\u0027New York\u0027, 85);", + "sql": "SELECT state, AVG(metric_score) FROM state_health_equity GROUP BY state;", + "sql_explanation": "This query calculates the average health equity metric score for each state. It groups the data by state and calculates the average metric score for each state." +}, { + "id": "4824", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum cultural competency score by race?", + "sql_context": "CREATE TABLE if not exists cultural_competency_scores (score INT, race VARCHAR(255)); INSERT INTO cultural_competency_scores (score, race) VALUES (90, \u0027Hispanic\u0027), (85, \u0027African American\u0027), (95, \u0027Asian\u0027);", + "sql": "SELECT MAX(score), race FROM cultural_competency_scores GROUP BY race;", + "sql_explanation": "This query calculates the maximum cultural competency score by race. It uses the MAX function to find the highest score and the GROUP BY clause to group the results by race." +}, { + "id": "4869", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total salary of community health workers by race?", + "sql_context": "CREATE TABLE community_health_workers (id INT, name VARCHAR(50), race VARCHAR(50), salary DECIMAL(10,2)); INSERT INTO community_health_workers (id, name, race, salary) VALUES (1, \u0027John Doe\u0027, \u0027White\u0027, 60000.00), (2, \u0027Jane Smith\u0027, \u0027Black\u0027, 55000.00), (3, \u0027Jim Brown\u0027, \u0027Hispanic\u0027, 72000.00);", + "sql": "SELECT race, SUM(salary) FROM community_health_workers GROUP BY race;", + "sql_explanation": "The SQL query calculates the total salary of community health workers by race. It groups the records by race and calculates the total salary using the SUM() function." +}, { + "id": "5010", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of community health workers in each state?", + "sql_context": "CREATE TABLE community_health_workers (worker_id INT, age INT, state VARCHAR(20)); INSERT INTO community_health_workers (worker_id, age, state) VALUES (1, 45, \u0027California\u0027), (2, 35, \u0027Texas\u0027), (3, 50, \u0027California\u0027), (4, 40, \u0027Texas\u0027);", + "sql": "SELECT state, AVG(age) FROM community_health_workers GROUP BY state", + "sql_explanation": "This query calculates the average age of community health workers in each state by using the AVG function on the \u0027age\u0027 column, and the GROUP BY keyword on the \u0027state\u0027 column." +}, { + "id": "878", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many articles were published by \u0027The Denver Daily\u0027 in each month of the last year, including months without any articles?", + "sql_context": "CREATE TABLE the_denver_daily (publication_date DATE);", + "sql": "SELECT to_char(publication_date, \u0027Month\u0027) as month, COUNT(*) as articles FROM the_denver_daily WHERE publication_date \u003e DATE(\u0027now\u0027,\u0027-1 year\u0027) GROUP BY month ORDER BY MIN(publication_date);", + "sql_explanation": "This query first selects all publication dates from \u0027The Denver Daily\u0027 within the last year. It then groups the publication dates by month and counts the number of articles in each month. Finally, it orders the months by date." +}, { + "id": "945", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of articles published in each state of the USA for the current year?", + "sql_context": "CREATE TABLE articles (article_id INT, publication_date DATE, state VARCHAR(255)); INSERT INTO articles (article_id, publication_date, state) VALUES (1, \u00272022-01-01\u0027, \u0027California\u0027), (2, \u00272022-01-02\u0027, \u0027Texas\u0027), (3, \u00272022-01-03\u0027, \u0027Florida\u0027);", + "sql": "SELECT state, COUNT(article_id) FROM articles WHERE YEAR(publication_date) \u003d YEAR(GETDATE()) AND state IN (\u0027California\u0027, \u0027Texas\u0027, \u0027Florida\u0027, \u0027New York\u0027, \u0027Pennsylvania\u0027) GROUP BY state;", + "sql_explanation": "The SQL query counts the number of articles published in each state of the USA for the current year. It uses the WHERE clause to filter the results to only include articles published in the current year and in the specified states. It then uses the GROUP BY clause to group the results by state." +}, { + "id": "1052", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of articles published in the \"articles\" table for each month in the year 2022?", + "sql_context": "CREATE TABLE articles (id INT PRIMARY KEY, title TEXT, category TEXT, publication_date DATE, word_count INT, author_id INT);", + "sql": "SELECT EXTRACT(MONTH FROM publication_date) AS month, COUNT(*) AS articles_per_month FROM articles WHERE EXTRACT(YEAR FROM publication_date) \u003d 2022 GROUP BY month ORDER BY month;", + "sql_explanation": "This SQL query retrieves the total number of articles published for each month in the year 2022 by extracting the month from the \"publication_date\" column, filtering the results to only include rows where the year of the \"publication_date\" is 2022, grouping the results by month, and counting the number of articles for each month." +}, { + "id": "1234", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 3 countries with the highest number of articles related to human rights in 2022.", + "sql_context": "CREATE TABLE articles (id INT, title TEXT, category TEXT, publish_date DATE, country TEXT); INSERT INTO articles (id, title, category, publish_date, country) VALUES (1, \u0027Human Rights Abuses Exposed\u0027, \u0027human_rights\u0027, \u00272022-01-01\u0027, \u0027Mexico\u0027), (2, \u0027Technology and Human Rights\u0027, \u0027technology\u0027, \u00272022-06-05\u0027, \u0027South Korea\u0027);", + "sql": "SELECT country, COUNT(*) as num_articles FROM articles WHERE category \u003d \u0027human_rights\u0027 AND YEAR(publish_date) \u003d 2022 GROUP BY country ORDER BY num_articles DESC LIMIT 3;", + "sql_explanation": "Identify the top 3 countries with the highest number of articles related to human rights in 2022." +}, { + "id": "1332", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average word count of articles published on politics in the last month, grouped by week?", + "sql_context": "CREATE TABLE articles (id INT, title VARCHAR(255), word_count INT, publish_date DATE, topic VARCHAR(255)); INSERT INTO articles (id, title, word_count, publish_date, topic) VALUES (1, \u0027Article 1\u0027, 500, \u00272022-01-01\u0027, \u0027politics\u0027), (2, \u0027Article 2\u0027, 700, \u00272022-01-05\u0027, \u0027politics\u0027);", + "sql": "SELECT AVG(word_count), WEEKOFYEAR(publish_date) AS Week FROM articles WHERE topic \u003d \u0027politics\u0027 AND publish_date \u003e\u003d DATE_SUB(NOW(), INTERVAL 1 MONTH) GROUP BY Week;", + "sql_explanation": "The SQL query calculates the average word count of articles published on politics in the last month, grouped by week by using the AVG function, filtering the data where the topic is politics and the publish_date is within the last month, and grouping the result by the week of the publish date." +}, { + "id": "1484", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get the number of articles published in the first quarter of each year by \u0027The New York Times\u0027.", + "sql_context": "CREATE TABLE articles (id INT, title TEXT, publication_date DATE, publisher TEXT);", + "sql": "SELECT YEAR(publication_date) AS year, COUNT(*) AS count FROM articles WHERE publisher \u003d \u0027The New York Times\u0027 AND MONTH(publication_date) \u003c\u003d 3 GROUP BY year;", + "sql_explanation": "The SQL query selects the year and a count of articles from the articles table where the publisher is \u0027The New York Times\u0027 and the month of the publication date is less than or equal to 3. It then groups the results by year." +}, { + "id": "2023", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get the number of articles published per month and the average word count of those articles.", + "sql_context": "CREATE TABLE news_articles (id INT, title VARCHAR(100), content TEXT, publication_date DATE, word_count INT);", + "sql": "SELECT MONTH(publication_date) AS month, AVG(word_count) AS avg_word_count, COUNT(*) AS articles_count FROM news_articles GROUP BY month;", + "sql_explanation": "This SQL query retrieves the number of articles published per month and the average word count of those articles by extracting the month from the \u0027publication_date\u0027 column using the MONTH() function, calculating the average word count using the AVG() function on the \u0027word_count\u0027 column, and then using the GROUP BY clause to group the result by the month." +}, { + "id": "2474", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many news articles were published in the \"news_articles\" table in each month of 2022?", + "sql_context": "CREATE TABLE news_articles (id INT, title VARCHAR(100), author_id INT, published_date DATE); INSERT INTO news_articles (id, title, author_id, published_date) VALUES (1, \u0027News Article 1\u0027, 1, \u00272022-01-01\u0027), (2, \u0027News Article 2\u0027, 2, \u00272022-02-01\u0027);", + "sql": "SELECT MONTH(published_date), COUNT(*) FROM news_articles WHERE YEAR(published_date) \u003d 2022 GROUP BY MONTH(published_date);", + "sql_explanation": "This query counts the number of news articles published in each month of 2022. It does this by selecting the MONTH and COUNT (*) of all rows in the news_articles table, but only for rows where the YEAR of the published_date is 2022. It then groups the results by the MONTH of the published_date." +}, { + "id": "2498", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 3 authors with the most articles in the \u0027media_ethics\u0027 table?", + "sql_context": "CREATE TABLE media_ethics (article_id INT, author VARCHAR(50), title VARCHAR(100), published_date DATE, category VARCHAR(30)); INSERT INTO media_ethics (article_id, author, title, published_date, category) VALUES (1, \u0027John Doe\u0027, \u0027Article 5\u0027, \u00272021-01-05\u0027, \u0027Ethics\u0027), (2, \u0027Jane Smith\u0027, \u0027Article 6\u0027, \u00272021-01-06\u0027, \u0027Ethics\u0027);", + "sql": "SELECT author, COUNT(article_id) AS total_articles FROM media_ethics GROUP BY author ORDER BY total_articles DESC LIMIT 3;", + "sql_explanation": "This query groups the \u0027media_ethics\u0027 table by \u0027author\u0027 and counts the number of \u0027article_id\u0027 for each author. The query then orders the results by \u0027total_articles\u0027 in descending order and limits the results to the top 3 authors with the most articles." +}, { + "id": "2590", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total word count for articles published by \u0027John Doe\u0027 in the \u0027media\u0027 schema, grouped by month?", + "sql_context": "CREATE TABLE media.articles (article_id INT, title VARCHAR(100), author VARCHAR(100), publish_date DATE, word_count INT); INSERT INTO media.articles (article_id, title, author, publish_date, word_count) VALUES (1, \u0027Article 1\u0027, \u0027John Doe\u0027, \u00272021-01-01\u0027, 500), (2, \u0027Article 2\u0027, \u0027John Doe\u0027, \u00272021-02-01\u0027, 600);", + "sql": "SELECT MONTH(publish_date), SUM(word_count) FROM media.articles WHERE author \u003d \u0027John Doe\u0027 GROUP BY MONTH(publish_date);", + "sql_explanation": "This query calculates the total word count for articles published by \u0027John Doe\u0027 in the \u0027media\u0027 schema, grouped by month. It does this by summing (SUM) the \u0027word_count\u0027 column and grouping (GROUP BY) the \u0027publish_date\u0027 column by month (MONTH), but only for rows where the \u0027author\u0027 column is equal to \u0027John Doe\u0027." +}, { + "id": "2687", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many news articles were published per month in 2021 in the \u0027news\u0027 schema?", + "sql_context": "CREATE TABLE news.articles (article_id INT, title VARCHAR(100), publish_date DATE); INSERT INTO news.articles (article_id, title, publish_date) VALUES (1, \u0027Article 1\u0027, \u00272021-01-01\u0027), (2, \u0027Article 2\u0027, \u00272021-02-01\u0027);", + "sql": "SELECT MONTH(publish_date), COUNT(*) FROM news.articles WHERE YEAR(publish_date) \u003d 2021 GROUP BY MONTH(publish_date);", + "sql_explanation": "This query calculates the number of articles published per month in 2021. It does this by grouping (GROUP BY) the \u0027publish_date\u0027 column by month (MONTH) and then counting (COUNT*) the number of rows in each group. The WHERE clause is used to filter the data to only include rows from the year 2021." +}, { + "id": "2829", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of articles published per month in 2022", + "sql_context": "CREATE TABLE articles (id INT PRIMARY KEY, date DATE, is_published BOOLEAN); INSERT INTO articles (id, date, is_published) VALUES (1, \u00272022-01-01\u0027, true), (2, \u00272022-02-01\u0027, false), (3, \u00272022-03-01\u0027, true), (4, \u00272022-04-01\u0027, true), (5, \u00272023-01-01\u0027, false);", + "sql": "SELECT MONTH(date), COUNT(*) FROM articles WHERE YEAR(date) \u003d 2022 AND is_published \u003d true GROUP BY MONTH(date);", + "sql_explanation": "This SQL query groups the \u0027articles\u0027 table by the month of the \u0027date\u0027 column and calculates the count of rows where \u0027is_published\u0027 is true and the year is 2022 using the MONTH() and YEAR() functions." +}, { + "id": "3020", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 3 cities with the most news articles published about them in the \"news_articles\" table, and their corresponding article counts?", + "sql_context": "CREATE TABLE news_articles (article_id INT, city VARCHAR(255));", + "sql": "SELECT city, COUNT(*) AS article_count FROM news_articles GROUP BY city ORDER BY article_count DESC LIMIT 3;", + "sql_explanation": "This SQL query groups the \"news_articles\" table by city and calculates the count of articles for each city. Then, it orders the results in descending order by article count and limits the output to the top 3 cities." +}, { + "id": "3089", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of news articles published in the \"articles\" table by year?", + "sql_context": "CREATE TABLE articles (id INT, title VARCHAR(100), publication_date DATE);", + "sql": "SELECT EXTRACT(YEAR FROM publication_date) AS year, COUNT(*) AS num_articles FROM articles GROUP BY year;", + "sql_explanation": "The SQL query calculates the total number of news articles published in the \"articles\" table by year. It extracts the year from the publication date using the EXTRACT function. Then, it groups the table by the year. Finally, it uses the COUNT function to count the number of articles for each group." +}, { + "id": "3248", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of articles published by each author in the authors table, ordered by the number of articles in descending order?", + "sql_context": "CREATE TABLE authors (id INT PRIMARY KEY, name VARCHAR(50), articles INT); INSERT INTO authors (id, name, articles) VALUES (1, \u0027John Doe\u0027, 10), (2, \u0027Jane Smith\u0027, 15), (3, \u0027Bob Johnson\u0027, 12);", + "sql": "SELECT name, SUM(articles) AS total_articles FROM authors GROUP BY name ORDER BY total_articles DESC;", + "sql_explanation": "The SQL query sums the articles column in the authors table and groups the results by name. It then returns the name and total_articles for each group, ordered by total_articles in descending order." +}, { + "id": "3999", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many articles were published by each author in the \u0027reports\u0027 table, broken down by topic?", + "sql_context": "CREATE TABLE reports (id INT, author VARCHAR(255), title VARCHAR(255), published_date DATE, topic VARCHAR(255));", + "sql": "SELECT author, topic, COUNT(*) as articles_count FROM reports GROUP BY author, topic;", + "sql_explanation": "This SQL query groups the \u0027reports\u0027 table by the \u0027author\u0027 and \u0027topic\u0027 columns and calculates the count of rows for each group, which represents the number of articles published by each author for each topic." +}, { + "id": "4255", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of article topics by language?", + "sql_context": "CREATE TABLE articles (id INT PRIMARY KEY, title TEXT, topic VARCHAR(255), language VARCHAR(50), agency_id INT, FOREIGN KEY (agency_id) REFERENCES news_agency(id));", + "sql": "SELECT language, topic, COUNT(*) as total FROM articles GROUP BY language, topic;", + "sql_explanation": "This query groups the articles table by both language and topic, and then calculates the count of articles for each combination of language and topic." +}, { + "id": "4752", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of articles published by each author in the authors table?", + "sql_context": "CREATE TABLE authors (id INT PRIMARY KEY, name VARCHAR(50), articles INT); INSERT INTO authors (id, name, articles) VALUES (1, \u0027John Doe\u0027, 10), (2, \u0027Jane Smith\u0027, 15), (3, \u0027Bob Johnson\u0027, 12);", + "sql": "SELECT name, SUM(articles) AS total_articles FROM authors GROUP BY name;", + "sql_explanation": "The SQL query sums the articles column in the authors table and groups the results by name. It then returns the name and total_articles for each group." +}, { + "id": "4924", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of article topics in the \"articles\" table?", + "sql_context": "CREATE TABLE articles (id INT, title VARCHAR(100), topic VARCHAR(50));", + "sql": "SELECT topic, COUNT(*) AS num_articles FROM articles GROUP BY topic;", + "sql_explanation": "The SQL query groups the articles table by the topic of the article. It uses the GROUP BY clause to group the table by the topic. Then, it uses the COUNT function to count the number of articles for each group." +}, { + "id": "5122", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of readers in each country?", + "sql_context": "CREATE TABLE readers (id INT, name VARCHAR(50), age INT, gender VARCHAR(10), country VARCHAR(50));", + "sql": "SELECT country, COUNT(*) as count FROM readers GROUP BY country;", + "sql_explanation": "The SQL query groups the \u0027readers\u0027 table by the \u0027country\u0027 column, counts the number of records in each group using the COUNT() function, and returns the results for each group." +}, { + "id": "5353", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "find the earliest published date for each unique news topic", + "sql_context": "CREATE TABLE News (id INT, topic VARCHAR(50), published_date DATE); INSERT INTO News (id, topic, published_date) VALUES (1, \u0027Politics\u0027, \u00272022-01-01\u0027); INSERT INTO News (id, topic, published_date) VALUES (2, \u0027Sports\u0027, \u00272022-01-02\u0027); INSERT INTO News (id, topic, published_date) VALUES (3, \u0027Entertainment\u0027, \u00272022-01-03\u0027); INSERT INTO News (id, topic, published_date) VALUES (4, \u0027Politics\u0027, \u00272022-01-04\u0027);", + "sql": "SELECT topic, MIN(published_date) FROM News GROUP BY topic;", + "sql_explanation": "This query selects the minimum published date for each unique topic in the News table using the GROUP BY clause." +}, { + "id": "289", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "What is the total number of volunteers who signed up in each quarter of the last two years?", + "sql_context": "CREATE TABLE Volunteers (VolunteerID INT, SignUpDate DATE); INSERT INTO Volunteers (VolunteerID, SignUpDate) VALUES (1, \u00272022-01-15\u0027), (2, \u00272022-02-20\u0027), (3, \u00272022-03-05\u0027), (4, \u00272021-12-31\u0027), (5, \u00272021-06-01\u0027);", + "sql": "SELECT EXTRACT(QUARTER FROM SignUpDate) as Quarter, COUNT(*) as NumVolunteers FROM Volunteers WHERE SignUpDate \u003e\u003d DATE_TRUNC(\u0027year\u0027, CURRENT_DATE - INTERVAL \u00272 years\u0027) AND SignUpDate \u003c DATE_TRUNC(\u0027year\u0027, CURRENT_DATE) GROUP BY Quarter ORDER BY Quarter;", + "sql_explanation": "This query extracts the quarter from the SignUpDate field in the Volunteers table for all records in the last two years, groups them by quarter, and returns the count of volunteers for each quarter. It uses the EXTRACT function to extract the quarter and the GROUP BY clause to group the records by quarter." +}, { + "id": "845", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the average donation amount in 2021 by quarter?", + "sql_context": "CREATE TABLE donations (id INT, amount DECIMAL, donation_date DATE);", + "sql": "SELECT DATE_FORMAT(donation_date, \u0027%Y-%V\u0027) as quarter, AVG(amount) as avg_donation_amount FROM donations WHERE donation_date \u003e\u003d \u00272021-01-01\u0027 AND donation_date \u003c \u00272022-01-01\u0027 GROUP BY quarter;", + "sql_explanation": "The SQL query extracts the year and quarter from the donation_date column and calculates the average donation amount by grouping by the quarter column and averaging the amount column." +}, { + "id": "913", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which programs had the most donations in Q3 2021?", + "sql_context": "CREATE TABLE program_donations (id INT, program TEXT, amount DECIMAL, donation_date DATE);", + "sql": "SELECT program, SUM(amount) as total_donations FROM program_donations WHERE donation_date \u003e\u003d \u00272021-07-01\u0027 AND donation_date \u003c \u00272021-10-01\u0027 GROUP BY program ORDER BY total_donations DESC;", + "sql_explanation": "The SQL query filters the program_donations table for Q3 2021 and calculates the total donations for each program by grouping by the program column and summing the amount column. The results are then ordered in descending order by total donations." +}, { + "id": "1018", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many volunteers signed up in each month of 2023?", + "sql_context": "CREATE TABLE volunteers (volunteer_id INT, volunteer_name TEXT, volunteer_date DATE); INSERT INTO volunteers (volunteer_id, volunteer_name, volunteer_date) VALUES (1, \u0027Sana Patel\u0027, \u00272023-01-15\u0027), (2, \u0027Mateo Alvarez\u0027, \u00272023-02-20\u0027), (3, \u0027Xiao Wang\u0027, \u00272023-03-05\u0027);", + "sql": "SELECT DATE_PART(\u0027month\u0027, volunteer_date) as month, COUNT(volunteer_id) as num_volunteers FROM volunteers WHERE volunteer_date BETWEEN \u00272023-01-01\u0027 AND \u00272023-12-31\u0027 GROUP BY month;", + "sql_explanation": "This query extracts the month from the volunteer_date column, groups the records by month, and calculates the number of volunteers for each month." +}, { + "id": "1020", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many volunteers signed up in each month of 2021?", + "sql_context": "CREATE TABLE volunteers (volunteer_id INT, volunteer_name TEXT, volunteer_date DATE);", + "sql": "SELECT DATE_PART(\u0027month\u0027, volunteer_date) as month, COUNT(volunteer_id) as num_volunteers FROM volunteers WHERE volunteer_date BETWEEN \u00272021-01-01\u0027 AND \u00272021-12-31\u0027 GROUP BY month;", + "sql_explanation": "This query extracts the month from the volunteer_date column, groups the records by month, and calculates the number of volunteers for each month." +}, { + "id": "1475", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many volunteers signed up in Q3 2021, categorized by their signup location?", + "sql_context": "CREATE TABLE volunteers (volunteer_id INT, volunteer_name TEXT, signup_location TEXT, signup_date DATE); INSERT INTO volunteers (volunteer_id, volunteer_name, signup_location, signup_date) VALUES (1, \u0027Alice\u0027, \u0027San Francisco\u0027, \u00272021-07-01\u0027); INSERT INTO volunteers (volunteer_id, volunteer_name, signup_location, signup_date) VALUES (2, \u0027Bob\u0027, \u0027New York\u0027, \u00272021-09-02\u0027);", + "sql": "SELECT signup_location, COUNT(volunteer_id) as num_volunteers FROM volunteers WHERE EXTRACT(MONTH FROM signup_date) BETWEEN 7 AND 9 GROUP BY signup_location;", + "sql_explanation": "This query filters volunteers who signed up in Q3 2021 (July, August, and September), groups them by their signup location, and counts the number of volunteers for each location." +}, { + "id": "1625", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of hours volunteered by each volunteer in the last month?", + "sql_context": "CREATE TABLE volunteer_hours (hour_id INT, volunteer_id INT, hours_volunteered DECIMAL(10,2), volunteer_date DATE);", + "sql": "SELECT volunteer_id, SUM(hours_volunteered) FROM volunteer_hours WHERE volunteer_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH) GROUP BY volunteer_id;", + "sql_explanation": "The query calculates the sum of hours_volunteered from the volunteer_hours table where the volunteer_date is within the last one month and groups the results by volunteer_id." +}, { + "id": "1800", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many volunteers signed up for each program in Q1 of 2022?", + "sql_context": "CREATE TABLE volunteers (volunteer_id INT, volunteer_name TEXT, program TEXT, volunteer_date DATE); INSERT INTO volunteers (volunteer_id, volunteer_name, program, volunteer_date) VALUES (1, \u0027Alice\u0027, \u0027Feeding the Homeless\u0027, \u00272022-01-05\u0027); INSERT INTO volunteers (volunteer_id, volunteer_name, program, volunteer_date) VALUES (2, \u0027Bob\u0027, \u0027Tree Planting\u0027, \u00272022-03-20\u0027);", + "sql": "SELECT program, COUNT(volunteer_name) as num_volunteers FROM volunteers WHERE EXTRACT(MONTH FROM volunteer_date) BETWEEN 1 AND 3 GROUP BY program;", + "sql_explanation": "The SQL query extracts the month from the volunteer_date and filters the records to only include volunteers who signed up in Q1 of 2022. It then groups the records by program and calculates the count of volunteer_name for each group." +}, { + "id": "1987", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which donors have contributed the most to a specific program?", + "sql_context": "CREATE TABLE donations (id INT, donor_name VARCHAR, donation_amount DECIMAL, donation_date DATE, program VARCHAR); INSERT INTO donations (id, donor_name, donation_amount, donation_date, program) VALUES (1, \u0027John Doe\u0027, 100, \u00272021-01-01\u0027, \u0027Education\u0027);", + "sql": "SELECT donor_name, SUM(donation_amount) FROM donations WHERE program \u003d \u0027Education\u0027 GROUP BY donor_name ORDER BY SUM(donation_amount) DESC;", + "sql_explanation": "This query calculates the total donation amount for each donor to a specific program, and returns the donors with the highest total donation amounts." +}, { + "id": "2142", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many donors from each country have made a donation in the last month?", + "sql_context": "CREATE TABLE Donor (DonorID int, DonorName varchar(50), Country varchar(50), DonationDate date);", + "sql": "SELECT Country, COUNT(DISTINCT DonorID) as NumDonors FROM Donor WHERE DonationDate \u003e\u003d DATEADD(month, -1, GETDATE()) GROUP BY Country;", + "sql_explanation": "The SQL query filters for donations made in the last month using the WHERE clause and the GETDATE() function, and calculates the number of distinct donors from each country using the COUNT function and the DISTINCT keyword, and the GROUP BY clause." +}, { + "id": "2160", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which volunteers have contributed the most hours to a specific program, and what is the total number of hours they have contributed?", + "sql_context": "CREATE TABLE VolunteerHours (VolunteerID INT, VolunteerName TEXT, Program TEXT, Hours DECIMAL(5,2)); INSERT INTO VolunteerHours (VolunteerID, VolunteerName, Program, Hours) VALUES (1, \u0027Alice\u0027, \u0027ProgramA\u0027, 25.00), (2, \u0027Bob\u0027, \u0027ProgramB\u0027, 30.00), (3, \u0027Charlie\u0027, \u0027ProgramA\u0027, 40.00);", + "sql": "SELECT VolunteerName, Program, SUM(Hours) AS TotalHours FROM VolunteerHours GROUP BY VolunteerName, Program ORDER BY TotalHours DESC;", + "sql_explanation": "This SQL query calculates the total number of hours each volunteer has contributed to each program by using the SUM function on the Hours column and grouping by VolunteerName and Program. The results are then ordered in descending order based on the total number of hours." +}, { + "id": "2537", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total donation amount by age group in 2021?", + "sql_context": "CREATE TABLE donors (donor_id INT, donor_age INT, donation_amount DECIMAL(10,2), donation_date DATE);", + "sql": "SELECT donor_age, SUM(donation_amount) as total_donation FROM donors WHERE YEAR(donation_date) \u003d 2021 GROUP BY donor_age;", + "sql_explanation": "The SQL query calculates the total donation amount by age group in 2021. It groups the data by donor_age and calculates the total donation amount for each group." +}, { + "id": "2557", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average program impact score for each program in Asia?", + "sql_context": "CREATE TABLE Program (ProgramID int, ProgramName varchar(50), ProgramLocation varchar(50), ImpactScore int);", + "sql": "SELECT ProgramName, AVG(ImpactScore) as AvgImpactScore FROM Program WHERE ProgramLocation \u003d \u0027Asia\u0027 GROUP BY ProgramName;", + "sql_explanation": "The SQL query filters for programs in Asia using the WHERE clause, and calculates the average impact score for each program using the AVG function and the GROUP BY clause." +}, { + "id": "2591", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 5 donors by total donation amount?", + "sql_context": "CREATE TABLE Donations (DonationID INT, DonorID INT, DonationAmount DECIMAL(10,2)); INSERT INTO Donations (DonationID, DonorID, DonationAmount) VALUES (1, 1, 100.00), (2, 1, 200.00), (3, 2, 150.00);", + "sql": "SELECT DonorID, SUM(DonationAmount) AS TotalDonated FROM Donations GROUP BY DonorID ORDER BY TotalDonated DESC LIMIT 5;", + "sql_explanation": "This query calculates the total donation amount for each donor by grouping the Donations table by DonorID and calculating the sum of DonationAmount, then ordering the results by total donated in descending order and limiting to the top 5 donors." +}, { + "id": "2791", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 3 donors based on the total amount donated?", + "sql_context": "CREATE TABLE donors (id INT, name VARCHAR(100), donation_amount DECIMAL(10,2)); INSERT INTO donors (id, name, donation_amount) VALUES (1, \u0027Jane Smith\u0027, 1500.00), (2, \u0027John Doe\u0027, 800.00), (3, \u0027Bob Brown\u0027, 1200.00), (4, \u0027Charlie Green\u0027, 900.00);", + "sql": "SELECT name, SUM(donation_amount) AS total_donated FROM donors GROUP BY name ORDER BY total_donated DESC LIMIT 3;", + "sql_explanation": "This query lists the top 3 donors based on the total amount donated by grouping the donors table by the name column and calculating the sum of the donation_amount column for each group. The results are then sorted by the total_donated column in descending order and limited to the top 3 rows." +}, { + "id": "3105", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount donated by each donor, and which donors have donated more than $1000 in total?", + "sql_context": "CREATE TABLE donors (id INT, name VARCHAR(100), donation_amount DECIMAL(10,2)); INSERT INTO donors (id, name, donation_amount) VALUES (1, \u0027Jane Smith\u0027, 1500.00), (2, \u0027John Doe\u0027, 800.00), (3, \u0027Bob Brown\u0027, 1200.00);", + "sql": "SELECT name, SUM(donation_amount) AS total_donated FROM donors GROUP BY name HAVING total_donated \u003e 1000;", + "sql_explanation": "This query calculates the total amount donated by each donor and lists only the donors who have donated more than $1000 by grouping the donors table by the name column and calculating the sum of the donation_amount column for each group. The HAVING clause filters the results to only include groups where the total_donated is greater than $1000." +}, { + "id": "3576", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many volunteers are there in each region who joined in 2020?", + "sql_context": "CREATE TABLE Volunteers (VolunteerID INT, VolunteerName VARCHAR(50), Region VARCHAR(50), JoinDate DATE);", + "sql": "SELECT Region, COUNT(VolunteerID) FROM Volunteers WHERE YEAR(JoinDate) \u003d 2020 GROUP BY Region;", + "sql_explanation": "This query counts the number of volunteers in each region who joined in 2020 by using the COUNT function on the VolunteerID column, filtering the Volunteers table to only include rows where the YEAR of JoinDate is 2020, and grouping the results by the Region column." +}, { + "id": "4501", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all unique categories of programs and their respective total budgets?", + "sql_context": "CREATE TABLE Programs (ProgramID INT, ProgramName TEXT, Category TEXT, Budget DECIMAL(10,2)); INSERT INTO Programs (ProgramID, ProgramName, Category, Budget) VALUES (1, \u0027Arts Education\u0027, \u0027Education\u0027, 3000.00), (2, \u0027Health Awareness\u0027, \u0027Health\u0027, 7000.00);", + "sql": "SELECT Category, SUM(Budget) AS TotalBudget FROM Programs GROUP BY Category;", + "sql_explanation": "The SQL query calculates the total budget for each category by grouping the Programs table using the Category column and summing the Budget column values." +}, { + "id": "4545", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many volunteers are there in each region?", + "sql_context": "CREATE TABLE Volunteers (VolunteerID INT, VolunteerName TEXT, Region TEXT); INSERT INTO Volunteers (VolunteerID, VolunteerName, Region) VALUES (1, \u0027Alex Brown\u0027, \u0027North\u0027), (2, \u0027Bella Johnson\u0027, \u0027South\u0027), (3, \u0027Charlie Davis\u0027, \u0027East\u0027), (4, \u0027David White\u0027, \u0027West\u0027), (5, \u0027Eva Green\u0027, \u0027North\u0027);", + "sql": "SELECT Region, COUNT(*) as TotalVolunteers FROM Volunteers GROUP BY Region;", + "sql_explanation": "This SQL query groups the volunteers by their region and calculates the total number of volunteers in each region." +}, { + "id": "5420", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many volunteers signed up in each program?", + "sql_context": "CREATE TABLE Volunteers (VolunteerID INT, Name TEXT, Program TEXT);", + "sql": "SELECT Program, COUNT(*) FROM Volunteers GROUP BY Program;", + "sql_explanation": "This SQL query groups the Volunteers table by the Program column and counts the number of volunteers for each program." +}, { + "id": "383", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 2 donors from each country in December 2022?", + "sql_context": "CREATE TABLE DonorCountry (DonorID INT, DonorName TEXT, DonationAmount DECIMAL(10,2), Country TEXT, DonationDate DATE); INSERT INTO DonorCountry (DonorID, DonorName, DonationAmount, Country, DonationDate) VALUES (1, \u0027Sophia Lee\u0027, 1500.00, \u0027South Korea\u0027, \u00272022-12-15\u0027);", + "sql": "SELECT DonorID, DonorName, SUM(DonationAmount) FROM DonorCountry WHERE DonationDate BETWEEN \u00272022-12-01\u0027 AND \u00272022-12-31\u0027 GROUP BY DonorID, DonorName, Country HAVING COUNT(DISTINCT Country) \u003d 1 ORDER BY SUM(DonationAmount) DESC LIMIT 2;", + "sql_explanation": "The SQL query lists the top 2 donors from each country in December 2022 by selecting the DonorID, DonorName, and sum of DonationAmount from the DonorCountry table, grouped by DonorID, DonorName, and Country. It then filters the records having a count of distinct Country equal to 1 to ensure that the result is for each country, and orders the result by the sum of DonationAmount in descending order. The query limits the result to 2 records." +}, { + "id": "938", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total amount donated by each top 5 donors in 2021?", + "sql_context": "CREATE TABLE Donations (donor_id INT, donation_amount DECIMAL(10,2), donation_date DATE); INSERT INTO Donations (donor_id, donation_amount, donation_date) VALUES (1, 5000, \u00272021-01-01\u0027), (2, 3500, \u00272021-02-01\u0027), (3, 7000, \u00272021-03-01\u0027), (4, 2800, \u00272021-04-01\u0027), (5, 6000, \u00272021-05-01\u0027);", + "sql": "SELECT donor_id, SUM(donation_amount) as total_donation FROM Donations WHERE donation_date BETWEEN \u00272021-01-01\u0027 AND \u00272021-12-31\u0027 GROUP BY donor_id ORDER BY total_donation DESC LIMIT 5;", + "sql_explanation": "The SQL query groups donations by donor_id and calculates the sum of donation_amount for each donor in 2021. It then orders the results by total_donation in descending order and limits the output to the top 5 donors." +}, { + "id": "1045", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who volunteered the most hours in program B in 2022?", + "sql_context": "CREATE TABLE Volunteers (id INT, user_id INT, program VARCHAR(50), hours DECIMAL(10, 2), volunteer_date DATE); INSERT INTO Volunteers (id, user_id, program, hours, volunteer_date) VALUES (1, 201, \u0027program A\u0027, 3.00, \u00272021-02-01\u0027); INSERT INTO Volunteers (id, user_id, program, hours, volunteer_date) VALUES (2, 202, \u0027program B\u0027, 2.50, \u00272021-03-05\u0027);", + "sql": "SELECT user_id, SUM(hours) FROM Volunteers WHERE program \u003d \u0027program B\u0027 AND volunteer_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-12-31\u0027 GROUP BY user_id ORDER BY SUM(hours) DESC LIMIT 1;", + "sql_explanation": "The SQL query identifies the user who volunteered the most hours in program B in 2022 by summing the \u0027hours\u0027 column where the \u0027program\u0027 is \u0027program B\u0027 and \u0027volunteer_date\u0027 falls between \u00272022-01-01\u0027 and \u00272022-12-31\u0027, grouping by \u0027user_id\u0027, and ordering in descending order. The query then returns the user with the highest summed hours." +}, { + "id": "1429", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total amount donated in each region, along with the corresponding number of donors, in the year 2021?", + "sql_context": "CREATE TABLE Donors (DonorID INT, DonorName TEXT, DonorRegion TEXT, DonationAmount FLOAT); INSERT INTO Donors (DonorID, DonorName, DonorRegion, DonationAmount) VALUES (1, \u0027John Doe\u0027, \u0027North\u0027, 5000.00), (2, \u0027Jane Smith\u0027, \u0027South\u0027, 3500.00);", + "sql": "SELECT DonorRegion, SUM(DonationAmount) as TotalDonated, COUNT(DISTINCT DonorID) as DonorCount FROM Donors WHERE YEAR(DonationDate) \u003d 2021 GROUP BY DonorRegion;", + "sql_explanation": "The SQL query retrieves the DonorRegion, TotalDonated, and DonorCount columns by filtering rows based on the YEAR() function for the DonationDate column and grouping by the DonorRegion column. The SUM() function is used to calculate the total donation amount for each region, and the COUNT(DISTINCT DonorID) function is used to count the number of unique donors for each region. The results display the total amount donated in each region, along with the corresponding number of donors, in the year 2021." +}, { + "id": "1504", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 5 volunteers with the highest total hours contributed, including partial hours and across all programs?", + "sql_context": "CREATE TABLE Volunteers (VolunteerID INT, VolunteerName TEXT, TotalHours DECIMAL(5,2));CREATE TABLE VolunteerHours (VolunteerHoursID INT, VolunteerID INT, Program TEXT, Hours DECIMAL(5,2), Partial BOOLEAN);", + "sql": "SELECT V.VolunteerName, SUM(VH.Hours) as TotalHours FROM VolunteerHours VH WHERE VH.Partial \u003d true GROUP BY VH.VolunteerID ORDER BY TotalHours DESC LIMIT 5;", + "sql_explanation": "This query retrieves the top 5 volunteers with the highest total hours contributed, including partial hours, by summing the \u0027Hours\u0027 field in the \u0027VolunteerHours\u0027 table, grouped by \u0027VolunteerID\u0027 and ordered by \u0027TotalHours\u0027 in descending order." +}, { + "id": "1643", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of volunteers in ProgramA and ProgramB, and what is the average hours they have volunteered in these programs?", + "sql_context": "CREATE TABLE Volunteers (VolunteerID int, Name varchar(50), Program varchar(50), Hours numeric(5,2)); INSERT INTO Volunteers (VolunteerID, Name, Program, Hours) VALUES (1, \u0027Alice\u0027, \u0027ProgramA\u0027, 20.00), (2, \u0027Bob\u0027, \u0027ProgramB\u0027, 30.00), (3, \u0027Charlie\u0027, \u0027ProgramA\u0027, 25.00), (4, \u0027David\u0027, \u0027ProgramB\u0027, 15.00);", + "sql": "SELECT Program, COUNT(VolunteerID) AS NumVolunteers, AVG(Hours) AS AvgHours FROM Volunteers WHERE Program IN (\u0027ProgramA\u0027, \u0027ProgramB\u0027) GROUP BY Program;", + "sql_explanation": "The SQL query calculates the total number of volunteers in ProgramA and ProgramB and the average hours they have volunteered in these programs. The COUNT function is used to calculate the number of volunteers, and the AVG function is used to calculate the average hours volunteered. The GROUP BY clause is used to group the result set by the Program column. The WHERE clause filters the result set to include only ProgramA and ProgramB. The query does not use the ORDER BY clause, so the result set is not sorted." +}, { + "id": "1751", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount donated by each organization in Q1 of 2023?", + "sql_context": "CREATE TABLE Donations (DonationID INT, Organization VARCHAR(50), Amount DECIMAL(10,2), DonationDate DATE); INSERT INTO Donations (DonationID, Organization, Amount, DonationDate) VALUES (1, \u0027ABC Corp\u0027, 5000, \u00272023-01-05\u0027), (2, \u0027XYZ Foundation\u0027, 8000, \u00272023-01-12\u0027), (3, \u0027DEF Inc\u0027, 3000, \u00272023-03-20\u0027), (4, \u0027ABC Corp\u0027, 7000, \u00272023-03-31\u0027), (5, \u0027GHI Org\u0027, 6000, \u00272023-01-01\u0027);", + "sql": "SELECT Organization, SUM(Amount) as TotalDonated FROM Donations WHERE YEAR(DonationDate) \u003d 2023 AND MONTH(DonationDate) \u003c\u003d 3 GROUP BY Organization;", + "sql_explanation": "This query calculates the total amount donated by each organization in Q1 of 2023. It sums the \u0027Amount\u0027 for each \u0027Organization\u0027 where the \u0027DonationDate\u0027 is in 2023 and the month is less than or equal to 3 (Q1), and groups the result by \u0027Organization\u0027." +}, { + "id": "2070", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount donated by each donor in 2023, ranked in ascending order?", + "sql_context": "CREATE TABLE Donors (DonorID INT, DonorName VARCHAR(50), DonationDate DATE, Amount DECIMAL(10,2)); INSERT INTO Donors (DonorID, DonorName, DonationDate, Amount) VALUES (1, \u0027Jane Smith\u0027, \u00272023-01-01\u0027, 50.00), (2, \u0027John Doe\u0027, \u00272023-02-01\u0027, 100.00), (3, \u0027Alice Johnson\u0027, \u00272023-01-15\u0027, 75.00);", + "sql": "SELECT DonorName, SUM(Amount) AS TotalDonated FROM Donors WHERE YEAR(DonationDate) \u003d 2023 GROUP BY DonorName ORDER BY TotalDonated ASC;", + "sql_explanation": "The SQL query selects the DonorName and calculates the sum of the Amount for each donor who donated in 2023, groups the result by DonorName, and orders the result by the total donated in ascending order." +}, { + "id": "2312", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which programs had the highest total donations, excluding those associated with the \u0027Arts\u0027 category?", + "sql_context": "CREATE TABLE program_donations (program_category VARCHAR(20), donation_amount INT);INSERT INTO program_donations VALUES (\u0027Arts\u0027, 15000), (\u0027Education\u0027, 20000), (\u0027Health\u0027, 30000), (\u0027Arts\u0027, 10000), (\u0027Education\u0027, 25000);", + "sql": "SELECT program_category, SUM(donation_amount) FROM program_donations WHERE program_category !\u003d \u0027Arts\u0027 GROUP BY program_category;", + "sql_explanation": "The SQL query calculates the total donation amount for each program category, excluding the \u0027Arts\u0027 category, by using the WHERE clause to filter out unwanted records and the SUM and GROUP BY functions to aggregate the data." +}, { + "id": "2848", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average donation amount for each quarter", + "sql_context": "CREATE TABLE DonationAmounts (DonationID INT, DonorID INT, DonationDate DATE, DonationAmount DECIMAL(10,2));", + "sql": "SELECT DATE_FORMAT(DonationDate, \u0027%Y-%m\u0027) AS Quarter, AVG(DonationAmount) FROM DonationAmounts GROUP BY Quarter;", + "sql_explanation": "This query groups the records in the \u0027DonationAmounts\u0027 table by the quarter of the DonationDate and calculates the average DonationAmount for each group." +}, { + "id": "3580", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the programs with more than 75% success rate in 2021?", + "sql_context": "CREATE TABLE programs_2021 (id INT, program_name VARCHAR(50), participants INT, success INT); INSERT INTO programs_2021 (id, program_name, participants, success) VALUES (1, \u0027Program G\u0027, 20, 15), (2, \u0027Program H\u0027, 25, 22), (3, \u0027Program I\u0027, 30, 27), (4, \u0027Program J\u0027, 35, 32), (5, \u0027Program K\u0027, 40, 37), (6, \u0027Program L\u0027, 45, 42);", + "sql": "SELECT program_name FROM programs_2021 WHERE success * 4 \u003e participants GROUP BY program_name;", + "sql_explanation": "The SQL query lists the programs with more than 75% success rate in 2021 by filtering records where success multiplied by 4 is greater than the participants." +}, { + "id": "4235", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average donation amount per zip code?", + "sql_context": "CREATE TABLE Donors (id INT, donor_name VARCHAR(50), donation_amount DECIMAL(10,2), donation_date DATE, zip VARCHAR(10)); INSERT INTO Donors (id, donor_name, donation_amount, donation_date, zip) VALUES (1, \u0027Alex Brown\u0027, 200.00, \u00272021-01-01\u0027, \u002710001\u0027);", + "sql": "SELECT zip, AVG(donation_amount) as avg_donation_amount FROM Donors GROUP BY zip;", + "sql_explanation": "This query groups donations by zip code and calculates the average donation amount for each zip code." +}, { + "id": "4537", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount donated by each donor in the \u0027Donations\u0027 table?", + "sql_context": "CREATE TABLE Donations (DonorID INT, DonationDate DATE, Amount DECIMAL(10, 2)); INSERT INTO Donations (DonorID, DonationDate, Amount) VALUES (1, \u00272020-01-01\u0027, 50.00), (2, \u00272019-12-31\u0027, 100.00);", + "sql": "SELECT DonorID, SUM(Amount) as TotalDonated FROM Donations GROUP BY DonorID;", + "sql_explanation": "This query calculates the total amount donated by each donor in the \u0027Donations\u0027 table. It uses the SUM aggregate function to add up all donations for each unique DonorID and groups them accordingly." +}, { + "id": "4641", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total donation amount for each donor", + "sql_context": "CREATE TABLE DonationAmounts (DonationID INT, DonorID INT, DonationDate DATE, DonationAmount DECIMAL(10,2));", + "sql": "SELECT DonorID, SUM(DonationAmount) FROM DonationAmounts GROUP BY DonorID;", + "sql_explanation": "This query groups the records in the \u0027DonationAmounts\u0027 table by DonorID and calculates the sum of the DonationAmount for each group." +}, { + "id": "5164", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the total donation amount per country?", + "sql_context": "CREATE TABLE Donors (DonorID INT, Name TEXT, Country TEXT, TotalDonation FLOAT); INSERT INTO Donors (DonorID, Name, Country, TotalDonation) VALUES (1, \u0027John Smith\u0027, \u0027USA\u0027, 500.00), (2, \u0027Jane Doe\u0027, \u0027Canada\u0027, 350.00);", + "sql": "SELECT Country, SUM(TotalDonation) FROM Donors GROUP BY Country;", + "sql_explanation": "This query calculates the total donation amount per country by grouping the data using the Country column and then applying the SUM function on the TotalDonation column." +}, { + "id": "1835", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which social good technology organizations in the environmental sector have received the least funding in the past 3 years?", + "sql_context": "CREATE TABLE org_funding_env (org_name TEXT, funding_amount INT, funding_year INT, sector TEXT); INSERT INTO org_funding_env (org_name, funding_amount, funding_year, sector) VALUES (\u0027SocialTech6\u0027, 50000, 2020, \u0027environment\u0027), (\u0027SocialTech7\u0027, 70000, 2019, \u0027environment\u0027), (\u0027SocialTech8\u0027, 60000, 2018, \u0027environment\u0027), (\u0027SocialTech9\u0027, 80000, 2021, \u0027environment\u0027), (\u0027SocialTech10\u0027, 90000, 2017, \u0027environment\u0027);", + "sql": "SELECT org_name, MIN(funding_amount) FROM org_funding_env WHERE sector \u003d \u0027environment\u0027 AND funding_year BETWEEN 2018 AND 2020 GROUP BY org_name;", + "sql_explanation": "This SQL query finds the social good technology organizations in the environmental sector that have received the least funding in the past 3 years. It does this by using the MIN function to find the lowest funding amount for each organization in the org_funding_env table, and filtering the results to only include rows where the sector column is equal to \u0027environment\u0027 and the funding_year column is between 2018 and 2020. It then groups the results by org_name, so that the organizations with the least funding are identified." +}, { + "id": "2192", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many accessible technology projects were launched in each year in Asia?", + "sql_context": "CREATE TABLE AccessibleTech (project_id INT, launch_date DATE, location VARCHAR(20)); INSERT INTO AccessibleTech (project_id, launch_date, location) VALUES (1, \u00272005-02-17\u0027, \u0027Asia\u0027), (2, \u00272007-11-09\u0027, \u0027Asia\u0027), (3, \u00272009-06-23\u0027, \u0027Asia\u0027), (4, \u00272011-08-04\u0027, \u0027Asia\u0027), (5, \u00272013-01-15\u0027, \u0027Asia\u0027), (6, \u00272015-07-01\u0027, \u0027Asia\u0027), (7, \u00272017-02-20\u0027, \u0027Asia\u0027), (8, \u00272019-09-01\u0027, \u0027Asia\u0027);", + "sql": "SELECT YEAR(launch_date) AS year, COUNT(*) AS project_count FROM AccessibleTech WHERE location \u003d \u0027Asia\u0027 GROUP BY year ORDER BY year;", + "sql_explanation": "This query groups the accessible technology projects launched in Asia by launch year and counts the number of projects for each year by selecting the YEAR function of the launch_date and grouping by that value, then counting the number of records in each group. The results are ordered by year." +}, { + "id": "2273", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of ethical AI research papers by publication year?", + "sql_context": "CREATE TABLE ethical_ai_research (id INT, publication_year INT, is_ethical BOOLEAN);", + "sql": "SELECT publication_year, COUNT(*) as num_publications FROM ethical_ai_research WHERE is_ethical \u003d TRUE GROUP BY publication_year;", + "sql_explanation": "This SQL query groups the ethical AI research papers by publication year and counts the number of publications for each year." +}, { + "id": "2661", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many technology for social good initiatives have been implemented in each continent?", + "sql_context": "CREATE TABLE social_good_initiatives_continent (id INT, initiative_name VARCHAR(255), location VARCHAR(255)); CREATE VIEW social_good_initiatives_by_continent AS SELECT location, COUNT(*) FROM social_good_initiatives_continent GROUP BY location; ALTER TABLE social_good_initiatives_continent ADD COLUMN initiative_type VARCHAR(255);", + "sql": "SELECT location, initiative_type, COUNT(*) FROM social_good_initiatives_continent GROUP BY location, initiative_type;", + "sql_explanation": "This query calculates the number of technology for social good initiatives in each continent by adding the initiative_type column to the social_good_initiatives_continent table, then grouping the results by location and initiative_type and calculating the count." +}, { + "id": "2798", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum digital divide index for each region?", + "sql_context": "CREATE TABLE RegionDigitalDivide (RegionID INT PRIMARY KEY, RegionName VARCHAR(100), DigitalDivideIndex DECIMAL(5,2)); INSERT INTO RegionDigitalDivide (RegionID, RegionName, DigitalDivideIndex) VALUES (1, \u0027North America\u0027, 0.35), (2, \u0027South America\u0027, 0.45), (3, \u0027Europe\u0027, 0.25), (4, \u0027Asia\u0027, 0.15), (5, \u0027Africa\u0027, 0.55);", + "sql": "SELECT RegionName, MIN(DigitalDivideIndex) as MinDigitalDivideIndex FROM RegionDigitalDivide GROUP BY RegionName;", + "sql_explanation": "This query calculates the minimum digital divide index for each region. It groups the data by region name and then calculates the minimum digital divide index for each group." +}, { + "id": "2842", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of social good technology initiatives by technology category?", + "sql_context": "CREATE TABLE CategorySocialGood (CategoryID INT PRIMARY KEY, CategoryName VARCHAR(100), InitiativeCount INT); INSERT INTO CategorySocialGood (CategoryID, CategoryName, InitiativeCount) VALUES (1, \u0027AI\u0027, 2000), (2, \u0027Data Science\u0027, 1500), (3, \u0027Cloud Computing\u0027, 3000);", + "sql": "SELECT CategoryName, SUM(InitiativeCount) as TotalInitiativeCount FROM CategorySocialGood GROUP BY CategoryName;", + "sql_explanation": "This query calculates the total number of social good technology initiatives by technology category. It groups the data by technology category and then calculates the total number of initiatives for each group." +}, { + "id": "2972", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show a pivot table of the number of accessible technology initiatives by type and region in the \u0027accessibility_data\u0027 table", + "sql_context": "CREATE TABLE accessibility_data (id INT PRIMARY KEY, initiative_name VARCHAR(50), type VARCHAR(50), region VARCHAR(50));", + "sql": "SELECT type, region, COUNT(*) as num_initiatives FROM accessibility_data GROUP BY type, region ORDER BY type;", + "sql_explanation": "This query creates a pivot table showing the number of accessible technology initiatives by type and region from the \u0027accessibility_data\u0027 table." +}, { + "id": "3850", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average salary of developers in the \"tech4good\" company, grouped by their department and only for those departments with more than 10 developers?", + "sql_context": "CREATE TABLE developers (id INT, name VARCHAR(50), salary FLOAT, department VARCHAR(50)); INSERT INTO developers (id, name, salary, department) VALUES (1, \u0027Alice\u0027, 70000, \u0027AI Research\u0027), (2, \u0027Bob\u0027, 75000, \u0027AI Research\u0027), (3, \u0027Charlie\u0027, 80000, \u0027Accessibility\u0027), (4, \u0027Dave\u0027, 85000, \u0027Accessibility\u0027), (5, \u0027Eve\u0027, 90000, \u0027Social Good\u0027);", + "sql": "SELECT department, AVG(salary) FROM developers GROUP BY department HAVING COUNT(*) \u003e 10;", + "sql_explanation": "The SQL query calculates the average salary for each department by grouping the records using the GROUP BY clause. The HAVING clause is used to filter the groups that have more than 10 developers (COUNT(*) \u003e 10). The AVG function calculates the average salary for each group." +}, { + "id": "4613", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of accessible technology products per company?", + "sql_context": "CREATE TABLE Accessible_Tech (company VARCHAR(50), product VARCHAR(50)); INSERT INTO Accessible_Tech (company, product) VALUES (\u0027Google\u0027, \u0027Screen Reader\u0027), (\u0027Microsoft\u0027, \u0027Adaptive Keyboard\u0027), (\u0027Apple\u0027, \u0027Voice Control\u0027), (\u0027IBM\u0027, \u0027Accessibility Checker\u0027);", + "sql": "SELECT company, AVG(COUNT(product)) FROM Accessible_Tech GROUP BY company;", + "sql_explanation": "This query calculates the average number of accessible technology products per company by grouping the data by company and finding the average count of products for each group." +}, { + "id": "5228", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of technology for social good projects in each region?", + "sql_context": "CREATE TABLE Social_Good (region VARCHAR(50), projects INT); INSERT INTO Social_Good (region, projects) VALUES (\u0027Asia\u0027, 1000), (\u0027Africa\u0027, 700), (\u0027Europe\u0027, 1500), (\u0027South America\u0027, 800);", + "sql": "SELECT region, SUM(projects) FROM Social_Good GROUP BY region;", + "sql_explanation": "This query calculates the total number of technology for social good projects in each region by grouping the data by region and finding the sum of projects for each group." +}, { + "id": "620", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the difference in temperature between the maximum and minimum temperature for each day in July 2020?", + "sql_context": "CREATE TABLE Temperature (id INT, timestamp DATE, temperature REAL);", + "sql": "SELECT EXTRACT(DAY FROM timestamp) as day, MAX(temperature) - MIN(temperature) as temp_difference FROM Temperature WHERE EXTRACT(MONTH FROM timestamp) \u003d 7 AND EXTRACT(YEAR FROM timestamp) \u003d 2020 GROUP BY day;", + "sql_explanation": "Calculates the difference between the maximum and minimum temperature for each day in July 2020 by extracting the day, calculating the maximum and minimum temperature, and subtracting the minimum from the maximum temperature." +}, { + "id": "1088", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the difference in temperature between the highest and lowest temperature for each month in 2019?", + "sql_context": "CREATE TABLE Temperature (id INT, timestamp DATE, temperature REAL);", + "sql": "SELECT EXTRACT(MONTH FROM timestamp) as month, MAX(temperature) - MIN(temperature) as temp_difference FROM Temperature WHERE EXTRACT(YEAR FROM timestamp) \u003d 2019 GROUP BY month;", + "sql_explanation": "Calculates the difference between the highest and lowest temperature for each month in 2019 by extracting the month, calculating the maximum and minimum temperature, and subtracting the minimum from the maximum temperature." +}, { + "id": "2205", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 3 countries with the most organic farms?", + "sql_context": "CREATE TABLE OrganicFarms (id INT, name TEXT, country TEXT); INSERT INTO OrganicFarms (id, name, country) VALUES (1, \u0027Farm1\u0027, \u0027Country1\u0027), (2, \u0027Farm2\u0027, \u0027Country2\u0027), (3, \u0027Farm3\u0027, \u0027Country3\u0027), (4, \u0027Farm4\u0027, \u0027Country1\u0027), (5, \u0027Farm5\u0027, \u0027Country2\u0027), (6, \u0027Farm6\u0027, \u0027Country4\u0027);", + "sql": "SELECT country, COUNT(DISTINCT id) as num_organic_farms FROM OrganicFarms GROUP BY country ORDER BY num_organic_farms DESC LIMIT 3;", + "sql_explanation": "Finds the top 3 countries with the most organic farms by grouping by country, counting the number of distinct farms, and ordering by number of organic farms in descending order." +}, { + "id": "3083", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many urban agriculture projects were initiated per year since 2015?", + "sql_context": "CREATE TABLE urban_agriculture_projects (project_name TEXT, initiation_year NUMERIC); INSERT INTO urban_agriculture_projects (project_name, initiation_year) VALUES (\u0027Urban Farm 1\u0027, 2015), (\u0027Urban Garden 1\u0027, 2016), (\u0027Urban Orchard 1\u0027, 2017), (\u0027Urban Farm 2\u0027, 2018), (\u0027Urban Garden 2\u0027, 2019), (\u0027Urban Orchard 2\u0027, 2020);", + "sql": "SELECT initiation_year, COUNT(*) as num_projects FROM urban_agriculture_projects GROUP BY initiation_year;", + "sql_explanation": "The SQL query calculates the number of urban agriculture projects initiated per year since 2015 using the COUNT() aggregate function and the GROUP BY clause. The final result will include the initiation year and the corresponding number of projects." +}, { + "id": "3717", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average yield of crops for each farm type, ranked by the highest average yield?", + "sql_context": "CREATE TABLE Farm (FarmID int, FarmType varchar(20), Yield int); INSERT INTO Farm (FarmID, FarmType, Yield) VALUES (1, \u0027Organic\u0027, 150), (2, \u0027Conventional\u0027, 200), (3, \u0027Urban\u0027, 100);", + "sql": "SELECT FarmType, AVG(Yield) as AvgYield FROM Farm GROUP BY FarmType ORDER BY AvgYield DESC;", + "sql_explanation": "The SQL query calculates the average yield for each farm type by using the AVG function and grouping by FarmType. It then orders the results by the average yield in descending order." +}, { + "id": "3876", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which crop has the highest yield by farmer in \u0027crop_comparison\u0027 table?", + "sql_context": "CREATE TABLE crop_comparison (farmer VARCHAR(50), crop VARCHAR(50), yield INT); INSERT INTO crop_comparison (farmer, crop, yield) VALUES (\u0027FarmerA\u0027, \u0027corn\u0027, 100), (\u0027FarmerA\u0027, \u0027wheat\u0027, 80), (\u0027FarmerB\u0027, \u0027corn\u0027, 110), (\u0027FarmerB\u0027, \u0027wheat\u0027, 90), (\u0027FarmerC\u0027, \u0027corn\u0027, 95), (\u0027FarmerC\u0027, \u0027wheat\u0027, 75);", + "sql": "SELECT farmer, crop, MAX(yield) as max_yield FROM crop_comparison GROUP BY farmer, crop;", + "sql_explanation": "The SQL query identifies the crop with the highest yield by farmer by grouping the records by farmer and crop and calculating the maximum yield using the MAX() function." +}, { + "id": "4488", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total crop yield by crop in \u0027crop_distribution\u0027 table?", + "sql_context": "CREATE TABLE crop_distribution (country VARCHAR(50), crop VARCHAR(50), yield INT); INSERT INTO crop_distribution (country, crop, yield) VALUES (\u0027Canada\u0027, \u0027corn\u0027, 1000), (\u0027Canada\u0027, \u0027wheat\u0027, 2000), (\u0027USA\u0027, \u0027corn\u0027, 3000), (\u0027USA\u0027, \u0027wheat\u0027, 4000), (\u0027Mexico\u0027, \u0027corn\u0027, 2500), (\u0027Mexico\u0027, \u0027wheat\u0027, 1500);", + "sql": "SELECT crop, SUM(yield) as total_yield FROM crop_distribution GROUP BY crop;", + "sql_explanation": "The SQL query calculates the total crop yield by crop by grouping the records by crop and calculating the sum of yield using the SUM() function." +}, { + "id": "4738", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average production of crops in a given year?", + "sql_context": "CREATE TABLE crops (id INT, name VARCHAR(50), location VARCHAR(50), year INT, production INT); INSERT INTO crops (id, name, location, year, production) VALUES (1, \u0027Corn\u0027, \u0027US\u0027, 2020, 5000), (2, \u0027Wheat\u0027, \u0027US\u0027, 2020, 7000), (3, \u0027Soybean\u0027, \u0027Canada\u0027, 2020, 3000), (4, \u0027Barley\u0027, \u0027Canada\u0027, 2020, 4000), (5, \u0027Corn\u0027, \u0027US\u0027, 2019, 6000), (6, \u0027Wheat\u0027, \u0027US\u0027, 2019, 8000), (7, \u0027Soybean\u0027, \u0027Canada\u0027, 2019, 4000), (8, \u0027Barley\u0027, \u0027Canada\u0027, 2019, 5000);", + "sql": "SELECT year, AVG(production) as avg_production FROM crops GROUP BY year;", + "sql_explanation": "This query groups crops by year and then calculates the average production of each year using the AVG() function. It does this by using the GROUP BY clause on the year column." +}, { + "id": "5144", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average yield of crops for each farm in \u0027farming\u0027 table?", + "sql_context": "CREATE TABLE farming (id INT, name TEXT, location TEXT, crop TEXT, yield INT); INSERT INTO farming VALUES (1, \u0027Smith Farm\u0027, \u0027Colorado\u0027, \u0027Corn\u0027, 120), (2, \u0027Brown Farm\u0027, \u0027Nebraska\u0027, \u0027Soybeans\u0027, 45), (3, \u0027Jones Farm\u0027, \u0027Iowa\u0027, \u0027Wheat\u0027, 80);", + "sql": "SELECT crop, AVG(yield) as avg_yield FROM farming GROUP BY crop;", + "sql_explanation": "This query calculates the average yield for each crop by grouping the farming table by the \u0027crop\u0027 column and then calculating the average yield for each group." +}, { + "id": "5602", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of crops by family across all regions?", + "sql_context": "CREATE TABLE crops (id INT PRIMARY KEY, name VARCHAR(50), scientific_name VARCHAR(50), growth_season VARCHAR(20), family VARCHAR(25), region VARCHAR(25)); INSERT INTO crops (id, name, scientific_name, growth_season, family, region) VALUES (1, \u0027Wheat\u0027, \u0027Triticum aestivum\u0027, \u0027Cool\u0027, \u0027Poaceae\u0027, \u0027Northwest\u0027); INSERT INTO crops (id, name, scientific_name, growth_season, family, region) VALUES (2, \u0027Rice\u0027, \u0027Oryza sativa\u0027, \u0027Warm\u0027, \u0027Poaceae\u0027, \u0027Southeast\u0027);", + "sql": "SELECT family, COUNT(*) FROM crops GROUP BY family;", + "sql_explanation": "This SQL query groups the crops table by family and counts the number of crops in each family." +}, { + "id": "1790", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average oxygen level for each month in 2021 for a specific fish farm?", + "sql_context": "CREATE TABLE fish_farms (id INT, name VARCHAR(50), location VARCHAR(50)); CREATE TABLE water_temperatures (id INT, fish_farm_id INT, date DATE, temperature DECIMAL(5,2)); CREATE TABLE water_oxygen (id INT, fish_farm_id INT, date DATE, oxygen DECIMAL(5,2)); INSERT INTO fish_farms (id, name, location) VALUES (1, \u0027Farm A\u0027, \u0027Country X\u0027); INSERT INTO water_temperatures (id, fish_farm_id, date, temperature) VALUES (1, 1, \u00272021-01-01\u0027, 12.5), (2, 1, \u00272021-01-02\u0027, 12.7); INSERT INTO water_oxygen (id, fish_farm_id, date, oxygen) VALUES (1, 1, \u00272021-01-01\u0027, 8.0), (2, 1, \u00272021-01-02\u0027, 8.2);", + "sql": "SELECT EXTRACT(MONTH FROM date) AS month, AVG(oxygen) AS avg_oxygen FROM water_oxygen WHERE YEAR(date) \u003d 2021 AND fish_farm_id \u003d 1 GROUP BY month;", + "sql_explanation": "This query calculates the average oxygen level for each month in 2021 for a specific fish farm. It extracts the month from the date, groups the records by month, filters the records by fish_farm_id, and calculates the average oxygen level for each group." +}, { + "id": "1825", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum dissolved oxygen level for aquaculture sites located in Asia, partitioned by farm type?", + "sql_context": "CREATE TABLE aquaculture_sites (site_id INT, region VARCHAR(50), farm_type VARCHAR(50), dissolved_oxygen FLOAT); INSERT INTO aquaculture_sites VALUES (1, \u0027Asia\u0027, \u0027Freshwater\u0027, 8.5), (2, \u0027Asia\u0027, \u0027Marine\u0027, 9.2), (3, \u0027Africa\u0027, \u0027Freshwater\u0027, 7.8), (4, \u0027Europe\u0027, \u0027Marine\u0027, 7.5);", + "sql": "SELECT region, farm_type, MAX(dissolved_oxygen) AS max_dissolved_oxygen FROM aquaculture_sites WHERE region \u003d \u0027Asia\u0027 GROUP BY region, farm_type;", + "sql_explanation": "Calculate the maximum dissolved oxygen level for each unique combination of region (Asia) and farm type." +}, { + "id": "2184", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average dissolved oxygen level for each monitoring station in the past month?", + "sql_context": "CREATE TABLE monitoring_stations (id INT, name TEXT, location TEXT); INSERT INTO monitoring_stations (id, name, location) VALUES (1, \u0027Station A\u0027, \u0027Coast of California\u0027), (2, \u0027Station B\u0027, \u0027Seattle Coast\u0027); CREATE TABLE oxygen_readings (id INT, station_id INT, reading DATE, level DECIMAL(5,2)); INSERT INTO oxygen_readings (id, station_id, reading, level) VALUES (1, 1, \u00272022-06-01\u0027, 7.5), (2, 1, \u00272022-06-15\u0027, 7.3), (3, 2, \u00272022-06-05\u0027, 7.8), (4, 2, \u00272022-06-20\u0027, 7.6);", + "sql": "SELECT AVG(level) avg_oxygen, station_id FROM oxygen_readings WHERE reading \u003e\u003d DATEADD(month, -1, CURRENT_DATE) GROUP BY station_id;", + "sql_explanation": "Calculate the average dissolved oxygen level for each monitoring station in the past month." +}, { + "id": "2396", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many aquaculture farms are located in each country and their total production in metric tons?", + "sql_context": "CREATE TABLE Farm (farm_id INT, country VARCHAR(50), production DECIMAL(10,2)); INSERT INTO Farm (farm_id, country, production) VALUES (1, \u0027Norway\u0027, 5000.5), (2, \u0027Chile\u0027, 3500.3), (3, \u0027Canada\u0027, 2000.0);", + "sql": "SELECT Farm.country, COUNT(Farm.farm_id) as NumFarms, SUM(Farm.production) as TotalProduction FROM Farm GROUP BY Farm.country;", + "sql_explanation": "This query groups the records in the Farm table based on the country column and calculates the number of farms and total production in each country by using the COUNT and SUM functions respectively." +}, { + "id": "2684", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average feed conversion ratio for fish in recirculating aquaculture systems, grouped by species?", + "sql_context": "CREATE TABLE feed_conversion (farm_id INT, system_type VARCHAR(20), species VARCHAR(20), feed_conversion_ratio FLOAT); INSERT INTO feed_conversion VALUES (1, \u0027Recirculating\u0027, \u0027Salmon\u0027, 1.5), (2, \u0027Pond\u0027, \u0027Tilapia\u0027, 2.0), (3, \u0027Recirculating\u0027, \u0027Trout\u0027, 1.8), (4, \u0027Cage\u0027, \u0027Carp\u0027, 2.5);", + "sql": "SELECT species, AVG(feed_conversion_ratio) FROM feed_conversion WHERE system_type \u003d \u0027Recirculating\u0027 GROUP BY species;", + "sql_explanation": "This SQL query calculates the average feed conversion ratio for fish in recirculating aquaculture systems, grouped by species. It first filters the data to only include recirculating aquaculture systems using the WHERE clause. Then, it uses the AVG function to calculate the average feed conversion ratio for each unique species. Finally, it groups the results by species using the GROUP BY clause." +}, { + "id": "2701", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many farms in Region3 have a harvest yield above the average for that region?", + "sql_context": "CREATE TABLE FarmsRegion (farm_id INT, region VARCHAR(10), harvest_yield FLOAT); INSERT INTO FarmsRegion (farm_id, region, harvest_yield) VALUES (1, \u0027Region3\u0027, 900), (2, \u0027Region3\u0027, 850), (3, \u0027Region3\u0027, 950), (4, \u0027Region3\u0027, 700);", + "sql": "SELECT farm_id FROM FarmsRegion WHERE region \u003d \u0027Region3\u0027 GROUP BY farm_id HAVING AVG(harvest_yield) \u003c harvest_yield;", + "sql_explanation": "The SQL query calculates the average harvest yield for farms in Region3 using the AVG function and the HAVING clause to filter farms in Region3 that have a harvest yield above the average for that region." +}, { + "id": "3011", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average water temperature in each region for the past week?", + "sql_context": "CREATE TABLE water_temps (id INT, region TEXT, date DATE, temp FLOAT); INSERT INTO water_temps (id, region, date, temp) VALUES (1, \u0027North\u0027, \u00272022-01-01\u0027, 12.3), (2, \u0027South\u0027, \u00272022-01-01\u0027, 14.2), (3, \u0027North\u0027, \u00272022-01-02\u0027, 12.4), (4, \u0027South\u0027, \u00272022-01-02\u0027, 14.1);", + "sql": "SELECT region, AVG(temp) FROM water_temps WHERE date \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 7 DAY) GROUP BY region;", + "sql_explanation": "This query calculates the average water temperature for each region for the past week. It uses the AVG function to find the average temperature for each region and the GROUP BY clause to group the results by region. The WHERE clause filters the results to show only the records from the past 7 days." +}, { + "id": "3119", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a view that lists fish species and their average biomass", + "sql_context": "CREATE TABLE fish_stock (fish_id INT PRIMARY KEY, species VARCHAR(50), location VARCHAR(50), biomass FLOAT); INSERT INTO fish_stock (fish_id, species, location, biomass) VALUES (1, \u0027Tuna\u0027, \u0027Pacific\u0027, 250.5), (2, \u0027Salmon\u0027, \u0027Atlantic\u0027, 180.3), (3, \u0027Cod\u0027, \u0027Baltic\u0027, 120.0);", + "sql": "CREATE VIEW fish_biomass AS SELECT species, AVG(biomass) as avg_biomass FROM fish_stock GROUP BY species;", + "sql_explanation": "Create a view named \u0027fish_biomass\u0027 that displays the average \u0027biomass\u0027 for each species in the \u0027fish_stock\u0027 table." +}, { + "id": "3263", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find countries with no sustainable seafood certifications.", + "sql_context": "CREATE TABLE seafood_certifications (id INT, country VARCHAR(50), certification VARCHAR(50)); INSERT INTO seafood_certifications (id, country, certification) VALUES (1, \u0027Norway\u0027, \u0027MSC\u0027), (2, \u0027Norway\u0027, \u0027ASC\u0027), (3, \u0027Canada\u0027, \u0027MSC\u0027);", + "sql": "SELECT country FROM seafood_certifications GROUP BY country HAVING COUNT(DISTINCT certification) \u003d 0;", + "sql_explanation": "This query groups seafood_certifications by country and returns countries with no sustainable seafood certifications by checking for a count of distinct certifications equal to zero." +}, { + "id": "3284", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the farms with \u0027Salmon\u0027 species and their total biomass.", + "sql_context": "CREATE TABLE Farm (id INT, farm_name TEXT, species TEXT, weight FLOAT, age INT); INSERT INTO Farm (id, farm_name, species, weight, age) VALUES (1, \u0027OceanPacific\u0027, \u0027Tilapia\u0027, 500.3, 2), (2, \u0027SeaBreeze\u0027, \u0027Salmon\u0027, 300.1, 1), (3, \u0027OceanPacific\u0027, \u0027Tilapia\u0027, 600.5, 3), (4, \u0027FarmX\u0027, \u0027Salmon\u0027, 700.2, 4);", + "sql": "SELECT farm_name, SUM(weight) as total_biomass FROM Farm WHERE species \u003d \u0027Salmon\u0027 GROUP BY farm_name;", + "sql_explanation": "This SQL query lists the farms with the Salmon species and their total biomass by using the SUM function on the weight column, while filtering the data with the WHERE clause to only consider rows with the species \u0027Salmon\u0027, and grouping the data by farm_name to get the total biomass per farm." +}, { + "id": "3353", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average biomass of fish in all farms, grouped by location, where the biomass is greater than 3000 tons?", + "sql_context": "CREATE TABLE farm_biomass_by_location (farm_id INT, location VARCHAR(255), biomass INT); INSERT INTO farm_biomass_by_location (farm_id, location, biomass) VALUES (1, \u0027Pacific Ocean\u0027, 4000), (2, \u0027Atlantic Ocean\u0027, 5000), (3, \u0027Mediterranean Sea\u0027, 3000), (4, \u0027Pacific Ocean\u0027, 6000), (5, \u0027Atlantic Ocean\u0027, 4000);", + "sql": "SELECT location, AVG(biomass) FROM farm_biomass_by_location WHERE biomass \u003e 3000 GROUP BY location;", + "sql_explanation": "This SQL query calculates the average biomass of fish in all farms, grouped by location, where the biomass is greater than 3000 tons. It filters out any records where the biomass is less than or equal to 3000 tons using the WHERE clause. Then, it uses the GROUP BY clause to group the results by location. Finally, it calculates the average biomass for each location using the AVG function." +}, { + "id": "3359", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum stocking density of fish for each species in the \u0027fish_stock\u0027 table, where the total biomass exceeds 50000?", + "sql_context": "CREATE TABLE fish_stock (id INT, species VARCHAR(255), stocking_density FLOAT, biomass FLOAT); INSERT INTO fish_stock (id, species, stocking_density, biomass) VALUES (1, \u0027Salmon\u0027, 50.0, 60000.0), (2, \u0027Salmon\u0027, 45.0, 45000.0), (3, \u0027Tilapia\u0027, 30.0, 30000.0), (4, \u0027Tilapia\u0027, 35.0, 55000.0);", + "sql": "SELECT species, MIN(stocking_density) FROM fish_stock GROUP BY species HAVING SUM(biomass) \u003e 50000;", + "sql_explanation": "The SQL query finds the minimum stocking density for each species in the \u0027fish_stock\u0027 table where the total biomass exceeds 50000. It groups the records by \u0027species\u0027 and uses the MIN() function to find the minimum \u0027stocking_density\u0027 value for each group. The HAVING clause filters the groups based on the total biomass." +}, { + "id": "3710", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average weight of fish, grouped by farm name and species.", + "sql_context": "CREATE TABLE Farm (id INT, farm_name TEXT, species TEXT, weight FLOAT, age INT); INSERT INTO Farm (id, farm_name, species, weight, age) VALUES (1, \u0027OceanPacific\u0027, \u0027Tilapia\u0027, 500.3, 2), (2, \u0027SeaBreeze\u0027, \u0027Salmon\u0027, 300.1, 1), (3, \u0027OceanPacific\u0027, \u0027Tilapia\u0027, 600.5, 3), (4, \u0027FarmX\u0027, \u0027Salmon\u0027, 700.2, 4), (5, \u0027OceanPacific\u0027, \u0027Salmon\u0027, 800.1, 5);", + "sql": "SELECT farm_name, species, AVG(weight) as avg_weight FROM Farm GROUP BY farm_name, species;", + "sql_explanation": "This SQL query calculates the average weight of fish, grouped by farm name and species, by using the AVG function on the weight column, and grouping the data by farm_name and species to get the average weight per farm and species." +}, { + "id": "4081", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average water temperature for each species in the \u0027fish_tanks\u0027 table?", + "sql_context": "CREATE TABLE fish_tanks (tank_id INT, species VARCHAR(255), water_temperature DECIMAL(5,2)); INSERT INTO fish_tanks (tank_id, species, water_temperature) VALUES (1, \u0027Tilapia\u0027, 26.5), (2, \u0027Salmon\u0027, 12.0), (3, \u0027Tilapia\u0027, 27.3), (4, \u0027Catfish\u0027, 24.6), (5, \u0027Salmon\u0027, 12.5);", + "sql": "SELECT species, AVG(water_temperature) as avg_temp FROM fish_tanks GROUP BY species;", + "sql_explanation": "The SQL query calculates the average water temperature for each species in the \u0027fish_tanks\u0027 table by grouping records by \u0027species\u0027 and calculating the average \u0027water_temperature\u0027 for each group." +}, { + "id": "4146", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average biomass of fish farmed in each country, excluding fish from Norway?", + "sql_context": "CREATE TABLE FarmF (country VARCHAR(20), species VARCHAR(20), biomass FLOAT); INSERT INTO FarmF (country, species, biomass) VALUES (\u0027Norway\u0027, \u0027Salmon\u0027, 5000); INSERT INTO FarmF (country, species, biomass) VALUES (\u0027Norway\u0027, \u0027Cod\u0027, 3000); INSERT INTO FarmF (country, species, biomass) VALUES (\u0027Scotland\u0027, \u0027Herring\u0027, 2000); INSERT INTO FarmF (country, species, biomass) VALUES (\u0027Scotland\u0027, \u0027Mackerel\u0027, 1000); INSERT INTO FarmF (country, species, biomass) VALUES (\u0027Canada\u0027, \u0027Halibut\u0027, 4000);", + "sql": "SELECT country, AVG(biomass) FROM FarmF WHERE country !\u003d \u0027Norway\u0027 GROUP BY country;", + "sql_explanation": "This SQL query calculates the average biomass of fish farmed in each country, excluding fish from Norway, by selecting the country and average of biomass where the country is not Norway, grouped by country." +}, { + "id": "4220", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average feed conversion ratio for each fish species in the \u0027feeds\u0027 table?", + "sql_context": "CREATE TABLE feeds (id INT, fish_species TEXT, feed_conversion_ratio FLOAT); INSERT INTO feeds (id, fish_species, feed_conversion_ratio) VALUES (1, \u0027Salmon\u0027, 1.2); INSERT INTO feeds (id, fish_species, feed_conversion_ratio) VALUES (2, \u0027Tilapia\u0027, 1.5); INSERT INTO feeds (id, fish_species, feed_conversion_ratio) VALUES (3, \u0027Salmon\u0027, 1.3); INSERT INTO feeds (id, fish_species, feed_conversion_ratio) VALUES (4, \u0027Tilapia\u0027, 1.4); INSERT INTO feeds (id, fish_species, feed_conversion_ratio) VALUES (5, \u0027Salmon\u0027, 1.1);", + "sql": "SELECT fish_species, AVG(feed_conversion_ratio) FROM feeds GROUP BY fish_species;", + "sql_explanation": "This query calculates the average feed conversion ratio for each fish species in the \u0027feeds\u0027 table by using the AVG function on the feed_conversion_ratio column, and grouping by the fish_species column." +}, { + "id": "4356", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue for each product in the \u0027sales_data\u0027 table?", + "sql_context": "CREATE TABLE sales_data (salesperson VARCHAR(255), product VARCHAR(255), revenue DECIMAL(8,2), quantity INT); INSERT INTO sales_data (salesperson, product, revenue, quantity) VALUES (\u0027John\u0027, \u0027Tilapia\u0027, 1500.00, 200), (\u0027Jane\u0027, \u0027Salmon\u0027, 3500.00, 350), (\u0027John\u0027, \u0027Catfish\u0027, 750.00, 150), (\u0027Mike\u0027, \u0027Tilapia\u0027, 2000.00, 250), (\u0027Jane\u0027, \u0027Catfish\u0027, 500.00, 100), (\u0027Mike\u0027, \u0027Salmon\u0027, 1500.00, 300);", + "sql": "SELECT product, SUM(revenue) as total_revenue FROM sales_data GROUP BY product;", + "sql_explanation": "The SQL query calculates the total revenue for each product in the \u0027sales_data\u0027 table by grouping records by \u0027product\u0027 and calculating the sum of \u0027revenue\u0027 for each group." +}, { + "id": "4778", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum dissolved oxygen level for each species in the fish_stock table?", + "sql_context": "CREATE TABLE fish_stock (species VARCHAR(50), dissolved_oxygen FLOAT); INSERT INTO fish_stock (species, dissolved_oxygen) VALUES (\u0027Tilapia\u0027, 6.5), (\u0027Tilapia\u0027, 8.0), (\u0027Salmon\u0027, 7.5);", + "sql": "SELECT species, MIN(dissolved_oxygen) FROM fish_stock GROUP BY species;", + "sql_explanation": "The SQL query calculates the minimum dissolved oxygen level for each species in the fish_stock table. It does this by using the MIN() function on the dissolved_oxygen column and grouping the results by the species column." +}, { + "id": "4853", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many farms of each type are there, grouped by farm type?", + "sql_context": "CREATE TABLE farm_count_by_type (farm_id INT, farm_type VARCHAR(255)); INSERT INTO farm_count_by_type (farm_id, farm_type) VALUES (1, \u0027Pond\u0027), (2, \u0027Cage\u0027), (3, \u0027Recirculating\u0027), (4, \u0027Pond\u0027), (5, \u0027Cage\u0027), (6, \u0027Pond\u0027), (7, \u0027Cage\u0027), (8, \u0027Recirculating\u0027);", + "sql": "SELECT farm_type, COUNT(*) FROM farm_count_by_type GROUP BY farm_type;", + "sql_explanation": "This SQL query calculates how many farms of each type there are, grouped by farm type. It uses the GROUP BY clause to group the results by farm type. Then, it uses the COUNT function to count the number of records for each farm type." +}, { + "id": "4966", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of fish in each region?", + "sql_context": "CREATE TABLE Fish_Farms (Farm_ID INT, Farm_Name TEXT, Region TEXT, Number_of_Fish INT); INSERT INTO Fish_Farms (Farm_ID, Farm_Name, Region, Number_of_Fish) VALUES (1, \u0027Farm S\u0027, \u0027Northern\u0027, 5000); INSERT INTO Fish_Farms (Farm_ID, Farm_Name, Region, Number_of_Fish) VALUES (2, \u0027Farm T\u0027, \u0027Southern\u0027, 6000); INSERT INTO Fish_Farms (Farm_ID, Farm_Name, Region, Number_of_Fish) VALUES (3, \u0027Farm U\u0027, \u0027Eastern\u0027, 7000); INSERT INTO Fish_Farms (Farm_ID, Farm_Name, Region, Number_of_Fish) VALUES (4, \u0027Farm V\u0027, \u0027Western\u0027, 8000);", + "sql": "SELECT Region, SUM(Number_of_Fish) FROM Fish_Farms GROUP BY Region;", + "sql_explanation": "This query calculates the total number of fish in each region by using the SUM function on the Number_of_Fish column of the Fish_Farms table and grouping the results using the GROUP BY clause to combine the results by region." +}, { + "id": "5241", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum biomass of fish for each species?", + "sql_context": "CREATE TABLE fish_stock (id INT, species VARCHAR, biomass FLOAT); INSERT INTO fish_stock (id, species, biomass) VALUES (1, \u0027Tilapia\u0027, 500.0), (2, \u0027Salmon\u0027, 800.0), (3, \u0027Trout\u0027, 300.0), (4, \u0027Bass\u0027, 700.0), (5, \u0027Tilapia\u0027, 600.0);", + "sql": "SELECT species, MAX(biomass) FROM fish_stock GROUP BY species;", + "sql_explanation": "This query calculates the maximum biomass of fish for each species by grouping the records by the species column and finding the maximum biomass value for each group." +}, { + "id": "2168", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the local economic impact of sustainable tourism in the Asia-Pacific region?", + "sql_context": "CREATE TABLE sustainable_tourism (country VARCHAR(50), region VARCHAR(50), economic_impact FLOAT); INSERT INTO sustainable_tourism (country, region, economic_impact) VALUES (\u0027Japan\u0027, \u0027Asia-Pacific\u0027, 1000000), (\u0027Australia\u0027, \u0027Asia-Pacific\u0027, 1200000), (\u0027New Zealand\u0027, \u0027Asia-Pacific\u0027, 800000);", + "sql": "SELECT region, SUM(economic_impact) AS total_economic_impact FROM sustainable_tourism WHERE region \u003d \u0027Asia-Pacific\u0027 GROUP BY region;", + "sql_explanation": "The SQL query calculates the total economic impact of sustainable tourism in the Asia-Pacific region by summing the economic impact for all countries in the region." +}, { + "id": "2237", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of sustainable transportation projects in each country?", + "sql_context": "CREATE TABLE sustainable_transportation (project_id INT, project_name TEXT, country TEXT, project_type TEXT); INSERT INTO sustainable_transportation (project_id, project_name, country, project_type) VALUES (1, \u0027Bicycle Lanes Initiative\u0027, \u0027USA\u0027, \u0027Transportation\u0027), (2, \u0027Electric Bus Program\u0027, \u0027Canada\u0027, \u0027Transportation\u0027);", + "sql": "SELECT country, COUNT(*) AS total_projects FROM sustainable_transportation WHERE project_type \u003d \u0027Transportation\u0027 GROUP BY country;", + "sql_explanation": "Select the country and total number of sustainable transportation projects by using a SELECT statement with a WHERE clause that filters for transportation projects. Group the result by country." +}, { + "id": "2587", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 3 countries contributing to cultural heritage site revenue.", + "sql_context": "CREATE TABLE heritage_sites (site_id INT, country VARCHAR(50), revenue FLOAT);", + "sql": "SELECT country, SUM(revenue) AS total_revenue FROM heritage_sites GROUP BY country ORDER BY total_revenue DESC LIMIT 3;", + "sql_explanation": "The query calculates the total revenue for each country with cultural heritage sites and orders the results in descending order. It then limits the output to the top 3 countries with the highest revenue." +}, { + "id": "3307", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which cultural heritage sites have more than 1000 reviews?", + "sql_context": "CREATE TABLE attractions (id INT PRIMARY KEY, name VARCHAR(255), type VARCHAR(255), num_reviews INT);", + "sql": "SELECT name, type FROM attractions WHERE num_reviews \u003e 1000 GROUP BY name, type HAVING COUNT(*) \u003e 1;", + "sql_explanation": "This query selects the names and types of cultural heritage sites with more than 1000 reviews by grouping by name and type, and filtering." +}, { + "id": "3316", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the total revenue generated by eco-friendly hotels in Germany and Italy.", + "sql_context": "CREATE TABLE eco_hotels (hotel_id INT, name TEXT, country TEXT, revenue FLOAT); INSERT INTO eco_hotels VALUES (1, \u0027Green Hotel\u0027, \u0027Germany\u0027, 250000), (2, \u0027Eco Lodge\u0027, \u0027Italy\u0027, 300000), (3, \u0027Sustainable Resort\u0027, \u0027Germany\u0027, 400000);", + "sql": "SELECT country, SUM(revenue) FROM eco_hotels WHERE country IN (\u0027Germany\u0027, \u0027Italy\u0027) GROUP BY country;", + "sql_explanation": "This query identifies the total revenue generated by eco-friendly hotels in Germany and Italy by filtering the results to only show records with country \u0027Germany\u0027 or \u0027Italy\u0027 and then grouping the results by the country. The SUM function is used to calculate the total revenue for each group." +}, { + "id": "3379", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average virtual tour rating for each cultural heritage site?", + "sql_context": "CREATE TABLE cultural_heritage (id INT, name TEXT, location TEXT, virtual_tour_id INT, rating INT); INSERT INTO cultural_heritage (id, name, location, virtual_tour_id, rating) VALUES (1, \u0027Acropolis\u0027, \u0027Greece\u0027, 1, 4); INSERT INTO cultural_heritage (id, name, location, virtual_tour_id, rating) VALUES (2, \u0027Machu Picchu\u0027, \u0027Peru\u0027, 2, 5);", + "sql": "SELECT virtual_tour_id, AVG(rating) as avg_rating FROM cultural_heritage GROUP BY virtual_tour_id;", + "sql_explanation": "We calculate the average rating of virtual tours for each cultural heritage site. We group the records by the virtual_tour_id and apply the AVG function to the rating column." +}, { + "id": "3994", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total revenue for each hotel in 2021?", + "sql_context": "CREATE TABLE hotel_revenue(revenue_id INT, hotel_id INT, year INT, revenue DECIMAL);", + "sql": "SELECT hotel_id, SUM(revenue) FROM hotel_revenue WHERE year \u003d 2021 GROUP BY hotel_id;", + "sql_explanation": "The SQL query calculates the total revenue for each hotel in 2021 by filtering for 2021 revenue and grouping by hotel ID." +}, { + "id": "4586", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue generated by sustainable tourism in each continent?", + "sql_context": "CREATE TABLE sustainable_tourism (tour_id INT, continent VARCHAR(20), revenue DECIMAL(5,2)); INSERT INTO sustainable_tourism (tour_id, continent, revenue) VALUES (1, \u0027Asia\u0027, 7000.00), (2, \u0027Asia\u0027, 8000.00), (3, \u0027Africa\u0027, 6000.00), (4, \u0027Africa\u0027, 5000.00);", + "sql": "SELECT continent, SUM(revenue) FROM sustainable_tourism GROUP BY continent;", + "sql_explanation": "This query calculates the total revenue generated by sustainable tourism in each continent by grouping the results by continent and then summing the revenue for each group." +}, { + "id": "4670", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many cultural heritage sites are there in each country?", + "sql_context": "CREATE TABLE Cultural_Heritage (country TEXT, site_name TEXT); INSERT INTO Cultural_Heritage (country, site_name) VALUES (\u0027Italy\u0027, \u0027Colosseum\u0027), (\u0027France\u0027, \u0027Eiffel Tower\u0027), (\u0027Spain\u0027, \u0027Alhambra\u0027), (\u0027Greece\u0027, \u0027Acropolis\u0027);", + "sql": "SELECT country, COUNT(site_name) FROM Cultural_Heritage GROUP BY country;", + "sql_explanation": "The SQL query calculates the number of cultural heritage sites in each country by grouping the records based on country and then counting the number of site_names for each group." +}, { + "id": "288", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 3 gas-producing wells in the Gulf of Mexico in H1 2019, ordered by average daily gas production.", + "sql_context": "CREATE TABLE wells (well_id INT, well_name VARCHAR(50), oil_production FLOAT, gas_production FLOAT, location VARCHAR(50), timestamp TIMESTAMP); INSERT INTO wells (well_id, well_name, oil_production, gas_production, location, timestamp) VALUES (1, \u0027Well C\u0027, 1500, 2500, \u0027Gulf of Mexico\u0027, \u00272019-01-01 00:00:00\u0027), (2, \u0027Well D\u0027, 1800, 2200, \u0027Gulf of Mexico\u0027, \u00272019-01-02 00:00:00\u0027);", + "sql": "SELECT well_id, well_name, AVG(gas_production) FROM wells WHERE location \u003d \u0027Gulf of Mexico\u0027 AND EXTRACT(MONTH FROM timestamp) BETWEEN 1 AND 6 AND EXTRACT(YEAR FROM timestamp) \u003d 2019 GROUP BY well_id, well_name ORDER BY AVG(gas_production) DESC LIMIT 3;", + "sql_explanation": "The SQL query identifies the top 3 gas-producing wells in the Gulf of Mexico in H1 2019 by calculating the average daily gas production, grouping the data by well_id and well_name, and ordering the results by the average gas production in descending order." +}, { + "id": "1617", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Retrieve the total length and diameter of pipeline segments with an end location in Houston.", + "sql_context": "CREATE TABLE PipelineSegments (SegmentID INT, SegmentName VARCHAR(50), Length DECIMAL(10,2), Diameter DECIMAL(10,2), StartLocation VARCHAR(50), EndLocation VARCHAR(50)); INSERT INTO PipelineSegments (SegmentID, SegmentName, Length, Diameter, StartLocation, EndLocation) VALUES (1, \u0027Alaska Pipeline Segment 1\u0027, 12.34, 34.56, \u0027Prudhoe Bay\u0027, \u0027Houston\u0027); INSERT INTO PipelineSegments (SegmentID, SegmentName, Length, Diameter, StartLocation, EndLocation) VALUES (2, \u0027Alaska Pipeline Segment 2\u0027, 15.67, 45.67, \u0027Valdez\u0027, \u0027Anchorage\u0027); INSERT INTO PipelineSegments (SegmentID, SegmentName, Length, Diameter, StartLocation, EndLocation) VALUES (3, \u0027Gulf Coast Pipeline\u0027, 50.00, 75.00, \u0027Houston\u0027, \u0027New Orleans\u0027);", + "sql": "SELECT EndLocation, SUM(Length) AS Total_Length, AVG(Diameter) AS Avg_Diameter FROM PipelineSegments WHERE EndLocation \u003d \u0027Houston\u0027 GROUP BY EndLocation;", + "sql_explanation": "This query aggregates the Length and Diameter columns for pipeline segments with an end location in Houston, returning the Total_Length and Avg_Diameter for each record along with the EndLocation column." +}, { + "id": "1911", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which company has the most active wells in the \u0027Alberta\u0027 region?", + "sql_context": "CREATE TABLE wells (well_id INT, company VARCHAR(50), region VARCHAR(50), active BOOLEAN); INSERT INTO wells VALUES (1, \u0027Company A\u0027, \u0027Alberta\u0027, TRUE); INSERT INTO wells VALUES (2, \u0027Company B\u0027, \u0027Alberta\u0027, FALSE); INSERT INTO wells VALUES (3, \u0027Company A\u0027, \u0027Gulf of Mexico\u0027, TRUE);", + "sql": "SELECT company, COUNT(*) as num_wells FROM wells WHERE region \u003d \u0027Alberta\u0027 AND active \u003d TRUE GROUP BY company ORDER BY num_wells DESC LIMIT 1;", + "sql_explanation": "1. Filter the wells table to only include rows where the region is \u0027Alberta\u0027 and the well is active. 2. Group the resulting rows by company. 3. Select the company and number of wells columns and limit the output to the top 1 company based on the number of wells." +}, { + "id": "2233", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the total production of oil and gas for all fields in the North Sea in 2019.", + "sql_context": "CREATE TABLE north_sea_fields (field_id INT, field_name VARCHAR(50), oil_production FLOAT, gas_production FLOAT, datetime DATETIME); INSERT INTO north_sea_fields (field_id, field_name, oil_production, gas_production, datetime) VALUES (1, \u0027North Sea Field A\u0027, 1500000, 2000000, \u00272019-01-01 00:00:00\u0027), (2, \u0027North Sea Field B\u0027, 1800000, 2500000, \u00272019-01-01 00:00:00\u0027);", + "sql": "SELECT field_name, SUM(oil_production) + SUM(gas_production) FROM north_sea_fields WHERE YEAR(datetime) \u003d 2019 GROUP BY field_name;", + "sql_explanation": "This query calculates the sum of oil and gas production for each field in the North Sea during 2019 by summing up the oil_production and gas_production column values where the year is 2019, grouped by the field_name." +}, { + "id": "2831", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total daily gas production for each platform", + "sql_context": "CREATE TABLE platforms (platform_id INT, daily_gas_production FLOAT); INSERT INTO platforms (platform_id, daily_gas_production) VALUES (1, 2000000), (2, 3000000), (3, 1000000), (4, 4000000);", + "sql": "SELECT platform_id, SUM(daily_gas_production) as total_daily_gas_production FROM platforms GROUP BY platform_id;", + "sql_explanation": "This query groups the platforms table by platform_id and calculates the total daily gas production for each platform." +}, { + "id": "2925", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the total oil production figures for the United States, broken down by state, for the year 2018?", + "sql_context": "CREATE TABLE us_oil_production (state VARCHAR(255), oil_production DECIMAL(10,2), year INT);", + "sql": "SELECT usp.state, SUM(usp.oil_production) FROM us_oil_production usp WHERE usp.year \u003d 2018 GROUP BY usp.state;", + "sql_explanation": "This query calculates the total oil production figures for the United States, broken down by state, for the year 2018. It does this by selecting the oil_production column from the us_oil_production table, grouping the results by state, and calculating the total oil production for each group using the SUM function." +}, { + "id": "2966", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of wells drilled in each country and the total production for each well", + "sql_context": "CREATE TABLE wells (well_id INT, well_name TEXT, country TEXT, production FLOAT); INSERT INTO wells (well_id, well_name, country, production) VALUES (1, \u0027Well A\u0027, \u0027USA\u0027, 1500000); INSERT INTO wells (well_id, well_name, country, production) VALUES (2, \u0027Well B\u0027, \u0027Canada\u0027, 1200000);", + "sql": "SELECT country, COUNT(well_id) AS num_wells, SUM(production) AS total_production FROM wells GROUP BY country;", + "sql_explanation": "This query groups the wells by country and calculates the number of wells and total production for each country." +}, { + "id": "3158", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Summarize production data for a reservoir", + "sql_context": "CREATE TABLE production_data (reservoir_id INT, year INT, production FLOAT); INSERT INTO production_data (reservoir_id, year, production) VALUES (1, 2015, 50), (1, 2016, 55), (2, 2015, 100), (2, 2016, 120);", + "sql": "SELECT reservoir_id, SUM(production) FROM production_data GROUP BY reservoir_id HAVING reservoir_id \u003d 1;", + "sql_explanation": "This query calculates the total production for the Girassol reservoir (reservoir_id 1) by summing the production column values for the corresponding records, returning a total of 105." +}, { + "id": "3450", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total gas production in the Gulf of Mexico for each year?", + "sql_context": "CREATE TABLE gas_production (year INT, region VARCHAR(255), gas_quantity INT); INSERT INTO gas_production (year, region, gas_quantity) VALUES (2015, \u0027Gulf of Mexico\u0027, 1230000), (2016, \u0027Gulf of Mexico\u0027, 1500000), (2017, \u0027Gulf of Mexico\u0027, 1750000), (2018, \u0027Gulf of Mexico\u0027, 1900000), (2019, \u0027Gulf of Mexico\u0027, 2100000);", + "sql": "SELECT year, SUM(gas_quantity) FROM gas_production WHERE region \u003d \u0027Gulf of Mexico\u0027 GROUP BY year;", + "sql_explanation": "This query calculates the total gas production in the Gulf of Mexico for each year by summing up the gas_quantity values in the gas_production table where the region is Gulf of Mexico, grouped by year." +}, { + "id": "3708", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many wells were drilled in each country in the year 2020?", + "sql_context": "CREATE TABLE wells (well_id INT, well_name TEXT, drill_year INT, country TEXT); INSERT INTO wells (well_id, well_name, drill_year, country) VALUES (1, \u0027Well A\u0027, 2020, \u0027USA\u0027), (2, \u0027Well B\u0027, 2019, \u0027Canada\u0027), (3, \u0027Well C\u0027, 2020, \u0027Mexico\u0027);", + "sql": "SELECT country, COUNT(*) as well_count FROM wells WHERE drill_year \u003d 2020 GROUP BY country;", + "sql_explanation": "This query selects the country column and the count of wells for each country from the wells table where the drill_year is 2020. It then groups the results by country and calculates the count of wells for each group using the COUNT function." +}, { + "id": "4435", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many active oil wells are there in each state in the USA?", + "sql_context": "CREATE TABLE oil_wells (state text, status text, depth real); INSERT INTO oil_wells (state, status, depth) VALUES (\u0027Texas\u0027, \u0027active\u0027, 12000), (\u0027Texas\u0027, \u0027inactive\u0027, 15000), (\u0027North Dakota\u0027, \u0027active\u0027, 9000), (\u0027Alaska\u0027, \u0027inactive\u0027, 8000), (\u0027California\u0027, \u0027active\u0027, 14000), (\u0027California\u0027, \u0027inactive\u0027, 11000), (\u0027Louisiana\u0027, \u0027active\u0027, 10000);", + "sql": "SELECT state, COUNT(*) FROM oil_wells WHERE status \u003d \u0027active\u0027 GROUP BY state;", + "sql_explanation": "This query selects the state and counts the number of rows for each state in the oil_wells table where the status is \u0027active\u0027, groups the results by state to find the number of active oil wells in each state in the USA." +}, { + "id": "4990", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average production rate of wells drilled in the Gulf of Mexico and the North Sea?", + "sql_context": "CREATE TABLE wells (id INT, driller VARCHAR(255), well VARCHAR(255), location VARCHAR(255), production_rate FLOAT); INSERT INTO wells (id, driller, well, location, production_rate) VALUES (1, \u0027DrillerA\u0027, \u0027WellA\u0027, \u0027Gulf of Mexico\u0027, 1000), (2, \u0027DrillerB\u0027, \u0027WellB\u0027, \u0027North Sea\u0027, 1500), (3, \u0027DrillerA\u0027, \u0027WellC\u0027, \u0027Gulf of Mexico\u0027, 1200), (4, \u0027DrillerC\u0027, \u0027WellD\u0027, \u0027North Sea\u0027, 1500);", + "sql": "SELECT location, AVG(production_rate) FROM wells GROUP BY location;", + "sql_explanation": "Calculate the average production rate of wells drilled in the Gulf of Mexico and the North Sea by averaging the production_rate values in the wells table where the location is either \"Gulf of Mexico\" or \"North Sea\"." +}, { + "id": "459", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum and minimum sustainable urbanism score for properties in the \"EcoCity\" schema, grouped by property type?", + "sql_context": "CREATE TABLE Property (id INT, property_type VARCHAR(20), sustainable_score INT, city VARCHAR(20)); INSERT INTO Property (id, property_type, sustainable_score, city) VALUES (1, \u0027Apartment\u0027, 85, \u0027EcoCity\u0027), (2, \u0027House\u0027, 70, \u0027EcoCity\u0027), (3, \u0027Condo\u0027, 90, \u0027EcoCity\u0027);", + "sql": "SELECT Property.property_type, MAX(Property.sustainable_score) AS max_sustainable_score, MIN(Property.sustainable_score) AS min_sustainable_score FROM Property WHERE Property.city \u003d \u0027EcoCity\u0027 GROUP BY Property.property_type;", + "sql_explanation": "This query performs a group by on the Property table by property type and calculates the maximum and minimum sustainable urbanism score for each property type in the \"EcoCity\" schema." +}, { + "id": "2698", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum and maximum number of bedrooms for properties in each property type category?", + "sql_context": "CREATE TABLE Property_Types (name VARCHAR(50), min_bedrooms INT, max_bedrooms INT); INSERT INTO Property_Types (name, min_bedrooms, max_bedrooms) VALUES (\u0027Apartment\u0027, 1, 3), (\u0027Townhouse\u0027, 3, 5), (\u0027Single-Family Home\u0027, 3, 6);", + "sql": "SELECT name, MIN(min_bedrooms) AS min_bedrooms, MAX(max_bedrooms) AS max_bedrooms FROM Property_Types GROUP BY name;", + "sql_explanation": "Calculate the minimum and maximum number of bedrooms for properties in each property type category by grouping by the name and applying the MIN and MAX functions to the min_bedrooms and max_bedrooms columns, respectively." +}, { + "id": "3116", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many properties are there in each city with inclusive housing policies?", + "sql_context": "CREATE TABLE properties (property_id INT, city VARCHAR(20), inclusive_policy BOOLEAN); INSERT INTO properties (property_id, city, inclusive_policy) VALUES (1, \u0027New York\u0027, true), (2, \u0027Chicago\u0027, false), (3, \u0027New York\u0027, true);", + "sql": "SELECT city, COUNT(*) as count_of_properties FROM properties WHERE inclusive_policy \u003d true GROUP BY city;", + "sql_explanation": "Use the COUNT function and GROUP BY clause to find the number of properties in each city with inclusive housing policies. Filter the rows with the inclusive_policy column set to true." +}, { + "id": "3816", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of inclusive housing policies in each city?", + "sql_context": "CREATE TABLE inclusive_housing (id INT, city VARCHAR(20), policy VARCHAR(50), start_date DATE); INSERT INTO inclusive_housing (id, city, policy, start_date) VALUES (1, \u0027Boston\u0027, \u0027Accessible Housing Regulations\u0027, \u00272018-01-01\u0027), (2, \u0027Boston\u0027, \u0027Affordable Housing Requirements\u0027, \u00272019-05-01\u0027), (3, \u0027Chicago\u0027, \u0027Fair Housing Ordinance\u0027, \u00272017-12-15\u0027);", + "sql": "SELECT city, COUNT(DISTINCT policy) as num_policies FROM inclusive_housing GROUP BY city;", + "sql_explanation": "This query counts the total number of inclusive housing policies in each city by selecting the city and COUNT(DISTINCT policy) columns from the inclusive_housing table and then grouping the results by city." +}, { + "id": "3934", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum and minimum property size for each city?", + "sql_context": "CREATE TABLE property (id INT, size FLOAT, city VARCHAR(20)); INSERT INTO property (id, size, city) VALUES (1, 1500, \u0027Denver\u0027), (2, 2000, \u0027Portland\u0027), (3, 1000, \u0027NYC\u0027), (4, 2500, \u0027Austin\u0027);", + "sql": "SELECT city, MAX(size) as max_size, MIN(size) as min_size FROM property GROUP BY city;", + "sql_explanation": "This SQL query calculates the maximum and minimum property size for each city by grouping the property table by city, then using the MAX and MIN functions to find the maximum and minimum size for each city." +}, { + "id": "3943", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the number of properties in each city in the database, ordered by the number of properties in descending order.", + "sql_context": "CREATE TABLE city_properties (city VARCHAR(50), property_id INT);", + "sql": "SELECT city, COUNT(*) AS count FROM city_properties GROUP BY city ORDER BY count DESC;", + "sql_explanation": "This query lists the number of properties in each city in the database, ordered by the number of properties in descending order. It does this by using the COUNT function to count the number of properties, grouped by city, and then ordering the results in descending order by the count." +}, { + "id": "4270", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum property price in each location category for co-ownership properties?", + "sql_context": "CREATE TABLE co_ownership_prices (property_id INT, property_price FLOAT, location VARCHAR(50)); INSERT INTO co_ownership_prices (property_id, property_price, location) VALUES (101, 500000.00, \u0027Urban\u0027), (102, 400000.00, \u0027Suburban\u0027), (103, 300000.00, \u0027Rural\u0027), (104, 700000.00, \u0027Urban\u0027), (105, 600000.00, \u0027Urban\u0027);", + "sql": "SELECT location, MIN(property_price) FROM co_ownership_prices GROUP BY location;", + "sql_explanation": "This query finds the minimum property price in each location category for co-ownership properties by grouping the co_ownership_prices table by location and selecting the minimum property_price in each group." +}, { + "id": "4396", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the average price of properties in each city, grouped by the number of owners?", + "sql_context": "CREATE TABLE properties (property_id INT, price DECIMAL(10,2), size INT, city VARCHAR(50), num_owners INT); INSERT INTO properties (property_id, price, size, city, num_owners) VALUES (1, 500000, 2000, \u0027Oakland\u0027, 1), (2, 600000, 2500, \u0027San Francisco\u0027, 2), (3, 450000, 1000, \u0027Oakland\u0027, 1);", + "sql": "SELECT city, num_owners, AVG(price) FROM properties GROUP BY city, num_owners;", + "sql_explanation": "The SQL query returns the average price of properties in each city, grouped by the number of owners. It does this by using the GROUP BY clause to group the rows by the city and num_owners columns, and then calculating the average price for each group using the AVG aggregate function." +}, { + "id": "4978", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of sustainable urban properties in each city?", + "sql_context": "CREATE TABLE sustainable_cities (id INT, city VARCHAR(20), properties INT); INSERT INTO sustainable_cities (id, city, properties) VALUES (1, \u0027Buenos Aires\u0027, 500), (2, \u0027Rio de Janeiro\u0027, 300), (3, \u0027Santiago\u0027, 400), (4, \u0027Bogota\u0027, 600);", + "sql": "SELECT city, AVG(properties) FROM sustainable_cities GROUP BY city;", + "sql_explanation": "This query calculates the average number of sustainable urban properties in each city by selecting the city column and using the AVG function while grouping the results by the city column." +}, { + "id": "657", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a view named \"top_five_artists_by_donations\" that displays the top 5 artists with the highest total donations to museums.", + "sql_context": "CREATE TABLE museum_donations (donor_artist_id INT, museum_id INT, donation_value DECIMAL(10,2));", + "sql": "CREATE VIEW top_five_artists_by_donations AS SELECT donor_artist_id, SUM(donation_value) AS total_donated FROM museum_donations GROUP BY donor_artist_id ORDER BY total_donated DESC FETCH FIRST 5 ROWS ONLY;", + "sql_explanation": "The query creates a view named \"top_five_artists_by_donations\" that shows the top 5 artists with the highest total donations to museums." +}, { + "id": "1556", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many times has the \u0027Las Meninas\u0027 painting been displayed in the last 2 years?", + "sql_context": "CREATE TABLE artworks (id INT, name TEXT, museum_id INT, display_date DATE); INSERT INTO artworks (id, name, museum_id, display_date) VALUES (1, \u0027Las Meninas\u0027, 1, \u00272020-01-01\u0027), (2, \u0027Mona Lisa\u0027, 1, \u00272020-02-01\u0027), (3, \u0027Guernica\u0027, 2, \u00272022-03-15\u0027), (4, \u0027Las Meninas\u0027, 1, \u00272022-04-01\u0027), (5, \u0027The Persistence of Memory\u0027, 3, \u00272022-05-01\u0027);", + "sql": "SELECT name, COUNT(*) AS display_count FROM artworks WHERE name \u003d \u0027Las Meninas\u0027 AND display_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 2 YEAR) GROUP BY name;", + "sql_explanation": "The SQL query calculates the number of times the \u0027Las Meninas\u0027 painting has been displayed in the last 2 years using the COUNT function and GROUP BY statement. It filters the data for the specified painting and date range using the WHERE clause and the DATE_SUB and CURRENT_DATE functions." +}, { + "id": "1691", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which artist from the \u0027Cubism\u0027 movement has the most art pieces in the collection?", + "sql_context": "CREATE TABLE art_pieces (piece_id INT, artist_name VARCHAR(50), artist_gender VARCHAR(10), artist_ethnicity VARCHAR(20), movement VARCHAR(20)); INSERT INTO art_pieces (piece_id, artist_name, artist_gender, artist_ethnicity, movement) VALUES (1, \u0027Pablo Picasso\u0027, \u0027Male\u0027, \u0027Spanish\u0027, \u0027Cubism\u0027); INSERT INTO art_pieces (piece_id, artist_name, artist_gender, artist_ethnicity, movement) VALUES (2, \u0027Georges Braque\u0027, \u0027Male\u0027, \u0027French\u0027, \u0027Cubism\u0027);", + "sql": "SELECT artist_name, COUNT(*) as art_piece_count FROM art_pieces WHERE movement \u003d \u0027Cubism\u0027 GROUP BY artist_name ORDER BY art_piece_count DESC LIMIT 1;", + "sql_explanation": "This query identifies the Cubism artist with the most art pieces in the collection by selecting all records with a movement of \u0027Cubism\u0027, grouping the results by artist_name, and then returning the artist with the highest count of art pieces." +}, { + "id": "2132", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 3 artists with the most pieces in the modern art collection?", + "sql_context": "CREATE TABLE ArtPieces (id INT, artist VARCHAR(50), collection VARCHAR(50)); INSERT INTO ArtPieces (id, artist, collection) VALUES (1, \u0027Artist C\u0027, \u0027Modern Art\u0027), (2, \u0027Artist D\u0027, \u0027Modern Art\u0027), (3, \u0027Artist E\u0027, \u0027Modern Art\u0027), (4, \u0027Artist C\u0027, \u0027Classic Art\u0027), (5, \u0027Artist F\u0027, \u0027Modern Art\u0027);", + "sql": "SELECT artist, COUNT(*) AS num_pieces FROM ArtPieces WHERE collection \u003d \u0027Modern Art\u0027 GROUP BY artist ORDER BY num_pieces DESC LIMIT 3;", + "sql_explanation": "This SQL query finds the top 3 artists with the most pieces in the modern art collection by using the COUNT function with an asterisk (*), grouping the results by the artist column, and ordering the results by the num_pieces column in descending order with the ORDER BY clause. The LIMIT clause is used to only return three rows." +}, { + "id": "2419", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average attendance at dance performances in New York and Chicago?", + "sql_context": "CREATE TABLE performances (id INT, city TEXT, category TEXT, attendance INT); INSERT INTO performances (id, city, category, attendance) VALUES (1, \u0027New York\u0027, \u0027Dance\u0027, 100), (2, \u0027New York\u0027, \u0027Theater\u0027, 200), (3, \u0027Chicago\u0027, \u0027Dance\u0027, 300), (4, \u0027Chicago\u0027, \u0027Theater\u0027, 150);", + "sql": "SELECT city, AVG(attendance) FROM performances WHERE category \u003d \u0027Dance\u0027 GROUP BY city HAVING city IN (\u0027New York\u0027, \u0027Chicago\u0027);", + "sql_explanation": "The SQL query first filters the \u0027performances\u0027 table to only include rows with a category of \u0027Dance\u0027. It then groups the table by city and uses the AVG() function to calculate the average value of the \u0027attendance\u0027 column for the filtered table. The HAVING clause is used to only include rows with cities as \u0027New York\u0027 or \u0027Chicago\u0027. The result is the average attendance at dance performances in New York and Chicago." +}, { + "id": "3902", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total value of artworks by artist and medium, pivoted to display the artist and medium in separate columns?", + "sql_context": "CREATE TABLE artworks (id INT, artwork VARCHAR(50), artist VARCHAR(50), medium VARCHAR(50), value INT); INSERT INTO artworks (id, artwork, artist, medium, value) VALUES (1, \u0027Painting\u0027, \u0027John Smith\u0027, \u0027Painting\u0027, 10000), (2, \u0027Sculpture\u0027, \u0027Maria Rodriguez\u0027, \u0027Sculpture\u0027, 15000), (3, \u0027Print\u0027, \u0027Jacques Leclerc\u0027, \u0027Print\u0027, 5000);", + "sql": "SELECT artist, medium, SUM(value) as total_value FROM artworks GROUP BY artist, medium;", + "sql_explanation": "This SQL query calculates the total value of artworks by artist and medium, pivoted to display the artist and medium in separate columns. It groups the records in the artworks table by the artist and medium columns and then calculates the sum of the value column for each group using the SUM() function." +}, { + "id": "4202", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue for each art exhibition in each city?", + "sql_context": "CREATE TABLE Exhibitions (id INT, name VARCHAR(255), city VARCHAR(255), start_date DATE, end_date DATE, revenue FLOAT);", + "sql": "SELECT e.city, e.name, SUM(e.revenue) FROM Exhibitions e GROUP BY e.city, e.name;", + "sql_explanation": "This query calculates the total revenue for each art exhibition in each city by summing up the revenue for each exhibition and grouping them by their respective city and name." +}, { + "id": "4237", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of art collections by medium in New York?", + "sql_context": "CREATE TABLE Collections (city VARCHAR(20), medium VARCHAR(20), pieces INT); INSERT INTO Collections (city, medium, pieces) VALUES (\u0027New York\u0027, \u0027Painting\u0027, 500), (\u0027New York\u0027, \u0027Sculpture\u0027, 300), (\u0027New York\u0027, \u0027Print\u0027, 200), (\u0027Los Angeles\u0027, \u0027Painting\u0027, 700);", + "sql": "SELECT medium, COUNT(*) FROM Collections WHERE city \u003d \u0027New York\u0027 GROUP BY medium;", + "sql_explanation": "This query calculates the distribution of art collections by medium in New York, by using the COUNT function on the pieces column, and grouping the result by medium." +}, { + "id": "4562", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of artworks in the \u0027Artworks\u0027 table, grouped by art category?", + "sql_context": "CREATE TABLE Artworks (id INT, art_category VARCHAR(255), artist_name VARCHAR(255), year INT, art_medium VARCHAR(255));", + "sql": "SELECT art_category, COUNT(*) as total FROM Artworks GROUP BY art_category;", + "sql_explanation": "This SQL query groups the Artworks table by the art_category column and calculates the count of records for each group." +}, { + "id": "5366", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of artworks in the \u0027Artworks\u0027 table for each year?", + "sql_context": "CREATE TABLE Artworks (id INT, art_category VARCHAR(255), artist_name VARCHAR(255), year INT, art_medium VARCHAR(255), price DECIMAL(10,2));", + "sql": "SELECT year, COUNT(*) as total FROM Artworks GROUP BY year;", + "sql_explanation": "This SQL query groups the Artworks table by the year column and calculates the count of records for each group." +}, { + "id": "5530", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of visitors to museums in Tokyo and Seoul?", + "sql_context": "CREATE TABLE Museums (city VARCHAR(20), name VARCHAR(30), visitors INT); INSERT INTO Museums (city, name, visitors) VALUES (\u0027Tokyo\u0027, \u0027Tokyo National Museum\u0027, 2000000), (\u0027Tokyo\u0027, \u0027Edo-Tokyo Museum\u0027, 1500000), (\u0027Seoul\u0027, \u0027National Museum of Korea\u0027, 3000000), (\u0027Seoul\u0027, \u0027Gyeongbokgung Palace\u0027, 2500000);", + "sql": "SELECT city, SUM(visitors) FROM Museums GROUP BY city;", + "sql_explanation": "The SQL query calculates the total number of visitors to museums in Tokyo and Seoul by using the SUM function on the visitors column, and grouping the result by city with the GROUP BY clause." +}, { + "id": "883", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the total construction costs and average project timelines for companies that have worked on both government-funded and privately-funded projects in the state of Washington, grouped by sustainability status?", + "sql_context": "CREATE TABLE Company_Projects_WA (Company TEXT, Project_ID INT, Funding TEXT, Sustainable BOOLEAN, Cost FLOAT, Timeline INT); INSERT INTO Company_Projects_WA (Company, Project_ID, Funding, Sustainable, Cost, Timeline) VALUES (\u0027Miller \u0026 Sons\u0027, 1, \u0027Government\u0027, true, 1500000, 365), (\u0027Miller \u0026 Sons\u0027, 2, \u0027Private\u0027, true, 2000000, 420), (\u0027Smith Constructors\u0027, 3, \u0027Government\u0027, true, 1200000, 450), (\u0027Smith Constructors\u0027, 4, \u0027Private\u0027, false, 1800000, 500), (\u0027Eco Builders\u0027, 5, \u0027Government\u0027, true, 900000, 400), (\u0027Eco Builders\u0027, 6, \u0027Private\u0027, true, 1300000, 440), (\u0027Green \u0026 Co.\u0027, 7, \u0027Government\u0027, true, 1000000, 380), (\u0027Green \u0026 Co.\u0027, 8, \u0027Government\u0027, true, 1400000, 425), (\u0027Green \u0026 Co.\u0027, 9, \u0027Private\u0027, false, 1100000, 475);", + "sql": "SELECT cp.Sustainable, cp.Company, AVG(cp.Cost), AVG(cp.Timeline) FROM Company_Projects_WA cp WHERE cp.Funding \u003d \u0027Government\u0027 OR cp.Funding \u003d \u0027Private\u0027 GROUP BY cp.Sustainable, cp.Company;", + "sql_explanation": "This query calculates the total construction costs and average project timelines for companies that have worked on both government-funded and privately-funded projects in the state of Washington, grouped by sustainability status. It does this by grouping the results by sustainability status, company, and calculating the average cost and timeline for each group." +}, { + "id": "1113", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many construction laborers were employed in Texas in 2021, by gender?", + "sql_context": "CREATE TABLE labor (labor_id INT, state VARCHAR(50), employed_date DATE, gender VARCHAR(50), profession VARCHAR(50)); INSERT INTO labor (labor_id, state, employed_date, gender, profession) VALUES (1, \u0027Texas\u0027, \u00272021-01-01\u0027, \u0027Male\u0027, \u0027Construction\u0027); INSERT INTO labor (labor_id, state, employed_date, gender, profession) VALUES (2, \u0027Texas\u0027, \u00272021-01-10\u0027, \u0027Female\u0027, \u0027Construction\u0027);", + "sql": "SELECT state, gender, COUNT(*) FROM labor WHERE profession \u003d \u0027Construction\u0027 AND employed_date BETWEEN \u00272021-01-01\u0027 AND \u00272021-12-31\u0027 AND state \u003d \u0027Texas\u0027 GROUP BY state, gender;", + "sql_explanation": "Counts the number of construction laborers in Texas in 2021, grouped by gender" +}, { + "id": "1674", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average construction cost for non-sustainable building projects in Florida per month in 2022?", + "sql_context": "CREATE TABLE Projects (project_id INT, state VARCHAR(255), is_sustainable BOOLEAN, cost FLOAT, start_date DATE); INSERT INTO Projects (project_id, state, is_sustainable, cost, start_date) VALUES (1, \u0027Florida\u0027, false, 500000, \u00272022-01-01\u0027), (2, \u0027Florida\u0027, true, 700000, \u00272022-02-01\u0027);", + "sql": "SELECT AVG(cost) FROM Projects WHERE state \u003d \u0027Florida\u0027 AND is_sustainable \u003d false AND YEAR(start_date) \u003d 2022 GROUP BY EXTRACT(MONTH FROM start_date);", + "sql_explanation": "This query calculates the average construction cost for non-sustainable building projects in Florida per month in 2022. It filters the data based on the state, is_sustainable, and the year of the start date, then groups the results by month, and finally calculates the average value of the cost for each group." +}, { + "id": "2193", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total construction labor hours per week in California, for each building type", + "sql_context": "CREATE TABLE labor_hours_3 (worker_id INT, state VARCHAR(20), building_type VARCHAR(20), hours_per_week DECIMAL(5,2)); INSERT INTO labor_hours_3 (worker_id, state, building_type, hours_per_week) VALUES (1, \u0027CA\u0027, \u0027Residential\u0027, 25.00), (2, \u0027CA\u0027, \u0027Commercial\u0027, 35.00), (3, \u0027CA\u0027, \u0027Industrial\u0027, 45.00);", + "sql": "SELECT state, building_type, SUM(hours_per_week) as total_hours FROM labor_hours_3 WHERE state \u003d \u0027CA\u0027 GROUP BY state, building_type;", + "sql_explanation": "This query calculates the total construction labor hours per week in California, for each building type by summing the hours_per_week column and grouping the results by state and building_type." +}, { + "id": "2899", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average salary of construction workers in the USA, partitioned by their job titles?", + "sql_context": "CREATE TABLE construction_workers (id INT, job_title VARCHAR(50), salary FLOAT, country VARCHAR(50)); INSERT INTO construction_workers (id, job_title, salary, country) VALUES (1, \u0027Carpenter\u0027, 45000, \u0027USA\u0027), (2, \u0027Electrician\u0027, 55000, \u0027USA\u0027), (3, \u0027Plumber\u0027, 50000, \u0027USA\u0027);", + "sql": "SELECT job_title, AVG(salary) as avg_salary FROM construction_workers WHERE country \u003d \u0027USA\u0027 GROUP BY job_title;", + "sql_explanation": "This query calculates the average salary of construction workers in the USA, partitioned by their job titles. It does so by using the AVG function on the salary column, while grouping by the job_title column and filtering for rows where the country is \u0027USA\u0027." +}, { + "id": "3741", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average project timeline for sustainable projects in the state of California, and what is the average project timeline for non-sustainable projects in the state of California?", + "sql_context": "CREATE TABLE Project_Timelines (Project_ID INT, Sustainable BOOLEAN, Timeline INT); INSERT INTO Project_Timelines (Project_ID, Sustainable, Timeline) VALUES (1, true, 365), (2, true, 420), (3, true, 450), (4, false, 500), (5, true, 400), (6, false, 440), (7, true, 380), (8, true, 425), (9, false, 475), (10, false, 460);", + "sql": "SELECT pt.Sustainable, AVG(pt.Timeline) FROM Project_Timelines pt GROUP BY pt.Sustainable;", + "sql_explanation": "This query calculates the average project timelines for sustainable and non-sustainable projects in the state of California by grouping the results by sustainability status and calculating the average timeline for each group." +}, { + "id": "3755", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many building permits were issued in each state in 2020?", + "sql_context": "CREATE TABLE BuildingPermits (id INT, permit_date DATE, state VARCHAR(20)); INSERT INTO BuildingPermits (id, permit_date, state) VALUES (1, \u00272020-01-01\u0027, \u0027California\u0027), (2, \u00272019-12-31\u0027, \u0027Texas\u0027);", + "sql": "SELECT state, COUNT(*) FROM BuildingPermits WHERE YEAR(permit_date) \u003d 2020 GROUP BY state;", + "sql_explanation": "The SQL query counts the number of building permits issued in each state in 2020. It filters the BuildingPermits table for permits issued in 2020, then groups the results by state and counts the number of rows in each group." +}, { + "id": "1014", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue for each menu category for the current week?", + "sql_context": "CREATE TABLE menu_engineering (menu_category VARCHAR(255), date DATE, revenue DECIMAL(10,2)); INSERT INTO menu_engineering (menu_category, date, revenue) VALUES (\u0027Appetizers\u0027, \u00272022-01-01\u0027, 500.00), (\u0027Entrees\u0027, \u00272022-01-01\u0027, 1000.00), (\u0027Desserts\u0027, \u00272022-01-01\u0027, 600.00), (\u0027Appetizers\u0027, \u00272022-01-02\u0027, 550.00), (\u0027Entrees\u0027, \u00272022-01-02\u0027, 1100.00), (\u0027Desserts\u0027, \u00272022-01-02\u0027, 650.00);", + "sql": "SELECT menu_category, SUM(revenue) as total_revenue FROM menu_engineering WHERE date BETWEEN DATEADD(day, -DATEPART(dw, GETDATE()), GETDATE()) AND GETDATE() GROUP BY menu_category;", + "sql_explanation": "The SQL query calculates the total revenue for each menu category for the current week by summing the revenue for each menu category using the SUM() function and grouping the results by menu_category using the GROUP BY clause. The query filters the data to only include revenue from the current week using the WHERE clause, DATEADD() function, and GETDATE() function." +}, { + "id": "1279", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many food safety violations occurred in each restaurant in 2021?", + "sql_context": "CREATE TABLE inspections (restaurant_name TEXT, violation_count INTEGER, inspection_date DATE); INSERT INTO inspections (restaurant_name, violation_count, inspection_date) VALUES (\u0027ABC Bistro\u0027, 2, \u00272021-04-01\u0027), (\u0027ABC Bistro\u0027, 1, \u00272021-07-01\u0027), (\u0027XYZ CafÊ\u0027, 0, \u00272021-02-01\u0027), (\u0027XYZ CafÊ\u0027, 3, \u00272021-11-01\u0027);", + "sql": "SELECT restaurant_name, SUM(violation_count) as total_violations FROM inspections WHERE inspection_date BETWEEN \u00272021-01-01\u0027 AND \u00272021-12-31\u0027 GROUP BY restaurant_name;", + "sql_explanation": "This query calculates the total number of food safety violations for each restaurant in 2021 by summing the violation count for each inspection and grouping by the restaurant name." +}, { + "id": "1536", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the revenue for each menu category in the \u0027Fusion Flavors\u0027 restaurant for the past week?", + "sql_context": "CREATE TABLE revenue (restaurant_id INT, date DATE, revenue INT, category VARCHAR(50)); INSERT INTO revenue (restaurant_id, date, revenue, category) VALUES (11, \u00272022-06-01\u0027, 5000, \u0027Asian Fusion\u0027), (11, \u00272022-06-02\u0027, 6000, \u0027Mediterranean Fusion\u0027), (11, \u00272022-06-01\u0027, 4000, \u0027Latin Fusion\u0027), (11, \u00272022-06-02\u0027, 7000, \u0027Asian Fusion\u0027);", + "sql": "SELECT category, SUM(revenue) as total_revenue FROM revenue WHERE restaurant_id \u003d 11 AND date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 WEEK) GROUP BY category;", + "sql_explanation": "This query filters revenue for the \u0027Fusion Flavors\u0027 restaurant (WHERE restaurant_id \u003d 11) and the past week (WHERE date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 WEEK)). It calculates the total revenue for each menu category using GROUP BY category." +}, { + "id": "1883", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the total sales for each menu category in January 2020?", + "sql_context": "CREATE TABLE restaurant_sales (date DATE, menu_category VARCHAR(255), sales FLOAT); INSERT INTO restaurant_sales (date, menu_category, sales) VALUES (\u00272020-01-01\u0027, \u0027Appetizers\u0027, 1200), (\u00272020-01-01\u0027, \u0027Entrees\u0027, 3500), (\u00272020-01-01\u0027, \u0027Desserts\u0027, 1800), (\u00272020-01-02\u0027, \u0027Appetizers\u0027, 1400), (\u00272020-01-02\u0027, \u0027Entrees\u0027, 3000), (\u00272020-01-02\u0027, \u0027Desserts\u0027, 1600);", + "sql": "SELECT menu_category, SUM(sales) as total_sales FROM restaurant_sales WHERE date BETWEEN \u00272020-01-01\u0027 AND \u00272020-01-31\u0027 GROUP BY menu_category;", + "sql_explanation": "This query calculates the total sales for each menu category in January 2020. It filters the restaurant_sales table for entries in January, groups the results by menu category, and calculates the sum of sales for each category." +}, { + "id": "2280", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 3 menu items by revenue across all restaurants, displaying the menu item name and total revenue. Use data from the menu_sales table.", + "sql_context": "CREATE TABLE menu_sales (restaurant_id INT, menu_item_name TEXT, revenue INT); INSERT INTO menu_sales (restaurant_id, menu_item_name, revenue) VALUES (1, \u0027Cheeseburger\u0027, 500), (1, \u0027Fries\u0027, 300), (1, \u0027Salad\u0027, 250), (2, \u0027Pad Thai\u0027, 700), (2, \u0027Spring Rolls\u0027, 400), (3, \u0027BBQ Burger\u0027, 600), (3, \u0027Onion Rings\u0027, 200);", + "sql": "SELECT menu_item_name, SUM(revenue) as total_revenue FROM menu_sales GROUP BY menu_item_name ORDER BY total_revenue DESC LIMIT 3;", + "sql_explanation": "The SQL query calculates the total revenue for each menu item by grouping by the menu_item_name and summing the revenue. It then orders the result set by the total_revenue column in descending order and limits the result set to the top 3 rows. This returns the top 3 menu items by revenue." +}, { + "id": "2759", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average revenue of restaurants with more than 3 inspections", + "sql_context": "CREATE TABLE restaurant_data (id INT, restaurant_name VARCHAR(255), revenue INT, number_of_inspections INT); INSERT INTO restaurant_data (id, restaurant_name, revenue, number_of_inspections) VALUES (1, \u0027Bella Italia\u0027, 25000, 4); INSERT INTO restaurant_data (id, restaurant_name, revenue, number_of_inspections) VALUES (2, \u0027Sushi House\u0027, 30000, 2);", + "sql": "SELECT restaurant_name, AVG(revenue) FROM restaurant_data WHERE number_of_inspections \u003e 3 GROUP BY restaurant_name;", + "sql_explanation": "Find the average revenue of restaurants with more than 3 inspections by filtering the number_of_inspections column with the value \u0027\u003e 3\u0027 and then applying the AVG function while grouping the results by restaurant_name." +}, { + "id": "2849", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the top 3 cuisines with the highest average revenue across all cities.", + "sql_context": "CREATE TABLE Restaurants (restaurant_id INT, name TEXT, city TEXT, cuisine TEXT, revenue FLOAT); INSERT INTO Restaurants (restaurant_id, name, city, cuisine, revenue) VALUES (1, \u0027Asian Fusion\u0027, \u0027New York\u0027, \u0027Asian\u0027, 50000.00), (2, \u0027Bella Italia\u0027, \u0027Los Angeles\u0027, \u0027Italian\u0027, 60000.00), (3, \u0027Sushi House\u0027, \u0027New York\u0027, \u0027Asian\u0027, 70000.00), (4, \u0027Pizzeria La Rosa\u0027, \u0027Chicago\u0027, \u0027Italian\u0027, 80000.00), (5, \u0027Taqueria El Sol\u0027, \u0027Los Angeles\u0027, \u0027Mexican\u0027, 40000.00);", + "sql": "SELECT cuisine, AVG(revenue) as avg_revenue FROM Restaurants GROUP BY cuisine ORDER BY avg_revenue DESC LIMIT 3;", + "sql_explanation": "The SQL query displays the top 3 cuisines with the highest average revenue across all cities by using the AVG() function to calculate the average revenue for each cuisine and the GROUP BY clause to separate the results by cuisine. The ORDER BY clause is used to sort the results by average revenue in descending order, and the LIMIT clause is used to limit the results to the top 3 cuisines." +}, { + "id": "2987", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the most common food safety issue?", + "sql_context": "CREATE TABLE Inspections (inspection_id INT, restaurant_id INT, issue VARCHAR(255)); INSERT INTO Inspections (inspection_id, restaurant_id, issue) VALUES (1, 1, \u0027Missing handwashing sign\u0027), (2, 2, \u0027Improper food storage\u0027), (3, 3, \u0027Missing handwashing sign\u0027), (4, 1, \u0027Improper food storage\u0027), (5, 2, \u0027Improper food storage\u0027);", + "sql": "SELECT issue, COUNT(issue) as issue_count FROM Inspections GROUP BY issue ORDER BY issue_count DESC LIMIT 1;", + "sql_explanation": "This query groups the Inspections table by issue, counts the occurrences of each issue, and orders the results in descending order to find the most common food safety issue by selecting the issue column and using the COUNT function on the issue column." +}, { + "id": "3029", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue for restaurants serving Mediterranean cuisine?", + "sql_context": "CREATE TABLE Restaurants (RestaurantID int, CuisineType varchar(255), Revenue int); INSERT INTO Restaurants (RestaurantID, CuisineType, Revenue) VALUES (1, \u0027Italian\u0027, 5000), (2, \u0027Mexican\u0027, 6000), (3, \u0027Indian\u0027, 7000), (4, \u0027Chinese\u0027, 8000), (5, \u0027French\u0027, 9000), (6, \u0027Thai\u0027, 10000), (7, \u0027Mediterranean\u0027, 11000), (8, \u0027Mediterranean\u0027, 12000);", + "sql": "SELECT CuisineType, SUM(Revenue) FROM Restaurants WHERE CuisineType \u003d \u0027Mediterranean\u0027 GROUP BY CuisineType;", + "sql_explanation": "This query groups the data by the cuisine type and calculates the total revenue for Mediterranean restaurants using the SUM function." +}, { + "id": "3598", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of restaurants that passed their food safety inspections in each region?", + "sql_context": "CREATE TABLE FoodInspections (restaurant_id INT, region VARCHAR(50), passed BOOLEAN); INSERT INTO FoodInspections (restaurant_id, region, passed) VALUES (1, \u0027North\u0027, TRUE), (1, \u0027North\u0027, FALSE), (2, \u0027North\u0027, TRUE), (2, \u0027South\u0027, TRUE), (2, \u0027South\u0027, FALSE);", + "sql": "SELECT region, AVG(IF(passed, 1, 0)) as pass_percentage FROM foodinspections GROUP BY region;", + "sql_explanation": "This query calculates the percentage of restaurants that passed their food safety inspections in each region." +}, { + "id": "3912", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average health score for each restaurant category in the \u0027fast_food\u0027 schema.", + "sql_context": "CREATE TABLE fast_food.health_inspections (restaurant_id INT, category TEXT, health_score INT); INSERT INTO fast_food.health_inspections (restaurant_id, category, health_score) VALUES (1, \u0027Burger\u0027, 90), (2, \u0027Pizza\u0027, 85), (3, \u0027Fried Chicken\u0027, 80);", + "sql": "SELECT category, AVG(health_score) FROM fast_food.health_inspections GROUP BY category;", + "sql_explanation": "The SQL query calculates the average health score for each restaurant category in the \u0027fast_food\u0027 schema by using the GROUP BY clause to group records by the \u0027category\u0027 column and the AVG() function to calculate the average health score for each group." +}, { + "id": "5008", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average food safety score for restaurants in each city?", + "sql_context": "CREATE TABLE food_safety_inspections(restaurant_id INT, city TEXT, score FLOAT); INSERT INTO food_safety_inspections(restaurant_id, city, score) VALUES (1, \u0027New York\u0027, 95.0), (2, \u0027New York\u0027, 90.0), (3, \u0027Los Angeles\u0027, 85.0), (4, \u0027Los Angeles\u0027, 92.0);", + "sql": "SELECT city, AVG(score) FROM food_safety_inspections GROUP BY city;", + "sql_explanation": "The SQL query calculates the average food safety score for restaurants in each city by using the AVG() function to find the average score for each city and the GROUP BY clause to group the results by city." +}, { + "id": "5170", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue for each cuisine category?", + "sql_context": "CREATE TABLE restaurants (restaurant_id INT, name VARCHAR(50), cuisine VARCHAR(50), revenue INT); INSERT INTO restaurants VALUES (1, \u0027Asian Fusion\u0027, \u0027Asian\u0027, 5000), (2, \u0027Tuscan Bistro\u0027, \u0027Italian\u0027, 7000), (3, \u0027Baja Coast\u0027, \u0027Mexican\u0027, 4000);", + "sql": "SELECT cuisine, SUM(revenue) FROM restaurants GROUP BY cuisine;", + "sql_explanation": "This SQL query calculates the total revenue for each cuisine category by grouping the records based on the cuisine column and then applying the SUM function to the revenue column." +}, { + "id": "5419", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average revenue generated per day from catering orders?", + "sql_context": "CREATE TABLE Orders (id INT, order_channel VARCHAR(50), price DECIMAL(10,2), date DATE); CREATE VIEW Catering_Orders AS SELECT price FROM Orders WHERE order_channel \u003d \u0027catering\u0027;", + "sql": "SELECT AVG(SUM(price)) FROM Catering_Orders GROUP BY date;", + "sql_explanation": "The SQL query calculates the average revenue generated per day from catering orders by selecting from the \u0027Catering_Orders\u0027 view, grouping by \u0027date\u0027, and calculating the average of the sum of the \u0027price\u0027 column." +}, { + "id": "1782", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average pollution level for the last 6 months in the Pacific Ocean and the Atlantic Ocean", + "sql_context": "CREATE TABLE ocean_pollution (id INT, location VARCHAR(255), pollution_level INT, measurement_date DATE); INSERT INTO ocean_pollution (id, location, pollution_level, measurement_date) VALUES (1, \u0027Pacific Ocean\u0027, 50, \u00272021-01-01\u0027), (2, \u0027Pacific Ocean\u0027, 45, \u00272021-02-01\u0027), (3, \u0027Pacific Ocean\u0027, 40, \u00272021-03-01\u0027), (4, \u0027Atlantic Ocean\u0027, 70, \u00272021-01-01\u0027), (5, \u0027Atlantic Ocean\u0027, 75, \u00272021-02-01\u0027), (6, \u0027Atlantic Ocean\u0027, 80, \u00272021-03-01\u0027);", + "sql": "SELECT location, AVG(pollution_level) average_pollution FROM ocean_pollution WHERE measurement_date \u003e\u003d NOW() - INTERVAL 6 MONTH GROUP BY location;", + "sql_explanation": "This query calculates the average pollution level for the last 6 months in the Pacific Ocean and the Atlantic Ocean by using the AVG() aggregate function to calculate the average pollution_level column value based on the location and measurement_date columns. The WHERE clause filters the rows to only those with a measurement_date in the last 6 months using the NOW() and INTERVAL functions. The outer query then groups the rows by location and calculates the average pollution level for each location." +}, { + "id": "2251", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the number of pollution violations in the Caribbean region in the \u0027Compliance\u0027 schema.", + "sql_context": "CREATE SCHEMA Compliance;CREATE TABLE PollutionViolations (id INT, country TEXT, region TEXT, year INT, violations INT); INSERT INTO PollutionViolations (id, country, region, year, violations) VALUES (1, \u0027Bahamas\u0027, \u0027Caribbean\u0027, 2019, 3), (2, \u0027Jamaica\u0027, \u0027Caribbean\u0027, 2020, 5), (3, \u0027Puerto Rico\u0027, \u0027Caribbean\u0027, 2019, 2), (4, \u0027Cuba\u0027, \u0027Caribbean\u0027, 2020, 4), (5, \u0027Haiti\u0027, \u0027Caribbean\u0027, 2019, 6), (6, \u0027Dominican Republic\u0027, \u0027Caribbean\u0027, 2020, 7);", + "sql": "SELECT region, SUM(violations) AS total_violations FROM Compliance.PollutionViolations WHERE region \u003d \u0027Caribbean\u0027 GROUP BY region;", + "sql_explanation": "1. Select the region and the sum of violations for the Caribbean region from the PollutionViolations table." +}, { + "id": "2254", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of whale sightings in the Arctic ocean.", + "sql_context": "CREATE TABLE whale_sightings (species TEXT, ocean TEXT, sighting_count INTEGER); INSERT INTO whale_sightings (species, ocean, sighting_count) VALUES (\u0027Blue Whale\u0027, \u0027Arctic Ocean\u0027, 25), (\u0027Bowhead Whale\u0027, \u0027Arctic Ocean\u0027, 30), (\u0027Beluga Whale\u0027, \u0027Arctic Ocean\u0027, 45);", + "sql": "SELECT species, SUM(sighting_count) FROM whale_sightings WHERE ocean \u003d \u0027Arctic Ocean\u0027 AND species \u003d \u0027Blue Whale\u0027 GROUP BY species;", + "sql_explanation": "This SQL query filters records to only those in the \u0027Arctic Ocean\u0027 and only the \u0027Blue Whale\u0027 species. The query then calculates the total sighting count for the matching records." +}, { + "id": "2716", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total area of ocean floor mapping projects in a specific region.", + "sql_context": "CREATE TABLE mapping_projects (project_id INT, name VARCHAR(255), area_km FLOAT, region VARCHAR(255));", + "sql": "SELECT region, SUM(area_km) AS total_area FROM mapping_projects WHERE region \u003d \u0027Caribbean\u0027 GROUP BY region;", + "sql_explanation": "The query filters the results for projects in the Caribbean region and then groups the results by region, calculating the sum of the \u0027area_km\u0027 column for each group." +}, { + "id": "2847", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average depth of ocean floor mapping projects by country?", + "sql_context": "CREATE SCHEMA oceans;CREATE TABLE oceans.mapping_projects (id INT PRIMARY KEY, country VARCHAR(50), avg_depth DECIMAL(5,2)); INSERT INTO oceans.mapping_projects (id, country, avg_depth) VALUES (1, \u0027Canada\u0027, 4500.00), (2, \u0027Mexico\u0027, 3500.00);", + "sql": "SELECT context.country, AVG(context.avg_depth) FROM oceans.mapping_projects AS context GROUP BY context.country;", + "sql_explanation": "This SQL query calculates the average depth of ocean floor mapping projects for each country in the oceans.mapping_projects table by grouping the records by country and applying the AVG function to the avg_depth column." +}, { + "id": "3346", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which countries have more than one marine law?", + "sql_context": "CREATE TABLE Laws (id INT, country VARCHAR(255), name VARCHAR(255), description TEXT); INSERT INTO Laws (id, country, name, description) VALUES (5, \u0027UK\u0027, \u0027Maritime Law\u0027, \u0027Regulates navigation and commerce in the UK waters\u0027); INSERT INTO Laws (id, country, name, description) VALUES (6, \u0027Germany\u0027, \u0027Marine Protection Act\u0027, \u0027Protects the marine environment in Germany\u0027);", + "sql": "SELECT country, COUNT(*) FROM Laws WHERE name LIKE \u0027%Marine%\u0027 GROUP BY country HAVING COUNT(*) \u003e 1;", + "sql_explanation": "The SQL query retrieves the countries with more than one marine law in the Laws table. It first filters the records to only those with name containing \u0027Marine\u0027 and then applies the aggregation function COUNT to calculate the number of laws. The HAVING clause filters the groups to only those with more than one record." +}, { + "id": "3688", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the number of marine species records per family in the Atlantic ocean.", + "sql_context": "CREATE TABLE marine_species (family TEXT, species_name TEXT, ocean TEXT); INSERT INTO marine_species (family, species_name, ocean) VALUES (\u0027Delphinidae\u0027, \u0027Bottlenose Dolphin\u0027, \u0027Atlantic Ocean\u0027), (\u0027Carcharodon\u0027, \u0027Great White Shark\u0027, \u0027Atlantic Ocean\u0027), (\u0027Moridae\u0027, \u0027Greenland Halibut\u0027, \u0027Atlantic Ocean\u0027);", + "sql": "SELECT family, COUNT(*) FROM marine_species WHERE ocean \u003d \u0027Atlantic Ocean\u0027 GROUP BY family;", + "sql_explanation": "This query filters records to only those in the \u0027Atlantic Ocean\u0027 from the \u0027marine_species\u0027 table, then groups the results by the \u0027family\u0027 column and counts the number of records within each group." +}, { + "id": "4234", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total biomass for marine life in each type of habitat, along with the habitat type?", + "sql_context": "CREATE TABLE life_habitat (habitat VARCHAR(50), biomass FLOAT); INSERT INTO life_habitat VALUES (\u0027Habitat 1\u0027, 123.4), (\u0027Habitat 1\u0027, 234.5), (\u0027Habitat 2\u0027, 345.6);", + "sql": "SELECT habitat, SUM(biomass) as total_biomass FROM life_habitat GROUP BY habitat;", + "sql_explanation": "The SQL query calculates the total biomass for marine life in each type of habitat and orders them from the smallest to the largest biomass. It groups the records by habitat." +}, { + "id": "4459", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of pollution control initiatives in each country?", + "sql_context": "CREATE TABLE pollution_control_initiatives (id INT, initiative_name TEXT, country TEXT);", + "sql": "SELECT country, COUNT(*) FROM pollution_control_initiatives GROUP BY country;", + "sql_explanation": "The SQL query calculates the total number of pollution control initiatives in each country by using the COUNT() function along with the GROUP BY clause to group the results by country." +}, { + "id": "4683", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the number of pollution incidents in each country\u0027s coastal waters.", + "sql_context": "CREATE SCHEMA MarinePollution(pollution_id INT, country_name TEXT, incident_date DATE);INSERT INTO MarinePollution(pollution_id, country_name, incident_date) VALUES (1, \u0027Canada\u0027, \u00272021-06-01\u0027), (2, \u0027Mexico\u0027, \u00272021-07-15\u0027), (3, \u0027USA\u0027, \u00272021-08-05\u0027), (4, \u0027Canada\u0027, \u00272021-09-20\u0027);", + "sql": "SELECT country_name, COUNT(*) FROM MarinePollution GROUP BY country_name;", + "sql_explanation": "The SQL query identifies the number of pollution incidents in each country\u0027s coastal waters by grouping the records in the \u0027MarinePollution\u0027 table by \u0027country_name\u0027 and counting the number of records in each group using the COUNT function." +}, { + "id": "4736", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average depth of the ocean floor for each mapping project, along with the project name?", + "sql_context": "CREATE TABLE ocean_map (project VARCHAR(50), depth FLOAT); INSERT INTO ocean_map VALUES (\u0027Project 1\u0027, 123.4), (\u0027Project 1\u0027, 234.5), (\u0027Project 2\u0027, 345.6);", + "sql": "SELECT project, AVG(depth) as avg_depth FROM ocean_map GROUP BY project;", + "sql_explanation": "The SQL query calculates the average depth of the ocean floor for each mapping project and orders them from the smallest to the largest biomass. It groups the records by project." +}, { + "id": "4747", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many unique marine species are present in each ocean?", + "sql_context": "CREATE TABLE marine_species (species VARCHAR(255), ocean VARCHAR(255)); INSERT INTO marine_species (species, ocean) VALUES (\u0027Species1\u0027, \u0027Atlantic Ocean\u0027), (\u0027Species2\u0027, \u0027Indian Ocean\u0027);", + "sql": "SELECT ocean, COUNT(DISTINCT species) FROM marine_species GROUP BY ocean", + "sql_explanation": "The SQL query counts the number of unique marine species in each ocean by grouping the species column and aggregating the DISTINCT values within each ocean group." +}, { + "id": "474", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which countries have the same number of donors in the impact investing and effective altruism sectors?", + "sql_context": "CREATE TABLE sector_donors (country VARCHAR(50), sector VARCHAR(50), donor_count INT); INSERT INTO sector_donors (country, sector, donor_count) VALUES (\u0027United States\u0027, \u0027Impact Investing\u0027, 5000), (\u0027Canada\u0027, \u0027Impact Investing\u0027, 3000), (\u0027United Kingdom\u0027, \u0027Impact Investing\u0027, 4000), (\u0027United States\u0027, \u0027Effective Altruism\u0027, 5000), (\u0027Canada\u0027, \u0027Effective Altruism\u0027, 3000), (\u0027United Kingdom\u0027, \u0027Effective Altruism\u0027, 4000);", + "sql": "SELECT a.country FROM sector_donors a, sector_donors b WHERE a.sector \u003d \u0027Impact Investing\u0027 AND b.sector \u003d \u0027Effective Altruism\u0027 AND a.country \u003d b.country GROUP BY a.country HAVING COUNT(a.country) \u003d COUNT(DISTINCT a.sector);", + "sql_explanation": "The SQL query joins the sector_donors table with itself and filters the results for rows with the same country name in both the impact investing and effective altruism sectors. The query then groups the results by country and uses the HAVING clause to filter the results for countries with the same number of donors in both sectors." +}, { + "id": "1368", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 3 donors by total donation amount in Canada for the year 2019, in descending order.", + "sql_context": "CREATE TABLE Donors (DonorID INT, DonorName VARCHAR(50), Country VARCHAR(50), Amount DECIMAL(10,2), DonationYear INT); INSERT INTO Donors (DonorID, DonorName, Country, Amount, DonationYear) VALUES (1, \u0027John Doe\u0027, \u0027Canada\u0027, 800.00, 2019), (2, \u0027Jane Smith\u0027, \u0027Canada\u0027, 500.00, 2019), (3, \u0027Alice Johnson\u0027, \u0027Canada\u0027, 1200.00, 2019), (4, \u0027Bob Brown\u0027, \u0027Canada\u0027, 900.00, 2019);", + "sql": "SELECT DonorName, SUM(Amount) AS TotalDonation FROM Donors WHERE Country \u003d \u0027Canada\u0027 AND DonationYear \u003d 2019 GROUP BY DonorName ORDER BY TotalDonation DESC LIMIT 3;", + "sql_explanation": "This query lists the top 3 donors in Canada for the year 2019, based on their total donation amount. It first calculates the total donation amount for each donor using the SUM function and the GROUP BY clause. Then, it orders the results in descending order using the ORDER BY clause and limits the output to the top 3 donors using the LIMIT clause." +}, { + "id": "1397", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the number of donations per month for the year 2022.", + "sql_context": "CREATE TABLE donations (id INT, donation_date DATE); INSERT INTO donations (id, donation_date) VALUES (1, \u00272022-01-01\u0027), (2, \u00272022-01-15\u0027), (3, \u00272022-02-01\u0027), (4, \u00272022-02-15\u0027), (5, \u00272022-03-01\u0027), (6, \u00272022-12-31\u0027);", + "sql": "SELECT EXTRACT(MONTH FROM donation_date) as month, COUNT(*) as donations FROM donations WHERE donation_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-12-31\u0027 GROUP BY month;", + "sql_explanation": "The SQL query calculates the number of donations per month for the year 2022 by using the EXTRACT function to get the month from the donation_date, GROUP BY clause to group the data by month, and the COUNT function to count the number of donations. The WHERE clause filters the data for donations made in 2022 using the BETWEEN operator." +}, { + "id": "1447", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many donors made donations in each quarter of 2021?", + "sql_context": "CREATE TABLE donations (donor_id INT, donation_date DATE); INSERT INTO donations (donor_id, donation_date) VALUES (1, \u00272021-01-01\u0027), (1, \u00272021-04-01\u0027), (2, \u00272021-02-01\u0027), (2, \u00272021-03-01\u0027), (3, \u00272021-04-01\u0027);", + "sql": "SELECT CONCAT(\u0027Q\u0027, QUARTER(donation_date)) as quarter, COUNT(DISTINCT donor_id) as num_donors FROM donations WHERE YEAR(donation_date) \u003d 2021 GROUP BY quarter;", + "sql_explanation": "This SQL query calculates the number of donors who made donations in each quarter of 2021. It starts by filtering the donations table for donations made in 2021 using the YEAR() function, and then groups the results by quarter. It then calculates the number of distinct donor IDs for each quarter, effectively pivoting the data to show the number of donors who made donations in each quarter." +}, { + "id": "1641", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total donation amount by donors located in Southeast Asia in 2022, broken down by donor type?", + "sql_context": "CREATE TABLE Donors (DonorID int, DonorType varchar(50), Country varchar(50), AmountDonated numeric(18,2), DonationDate date); INSERT INTO Donors (DonorID, DonorType, Country, AmountDonated, DonationDate) VALUES (1, \u0027Organization\u0027, \u0027Indonesia\u0027, 12000, \u00272022-01-01\u0027), (2, \u0027Individual\u0027, \u0027Malaysia\u0027, 5000, \u00272022-02-01\u0027), (3, \u0027Organization\u0027, \u0027Philippines\u0027, 15000, \u00272022-03-01\u0027), (4, \u0027Individual\u0027, \u0027Thailand\u0027, 8000, \u00272022-04-01\u0027);", + "sql": "SELECT DonorType, SUM(AmountDonated) as TotalDonated FROM Donors WHERE Country LIKE \u0027Southeast Asia%\u0027 AND YEAR(DonationDate) \u003d 2022 GROUP BY DonorType;", + "sql_explanation": "This query calculates the total donation amount by donors located in Southeast Asia in 2022, broken down by donor type. It uses the SUM function to add up the AmountDonated column for each donor type in Southeast Asia in 2022, and then groups the results by donor type." +}, { + "id": "2170", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total donation amount per cause in Q1 2022?", + "sql_context": "CREATE TABLE Donations (id INT, donor_id INT, cause VARCHAR(255), amount DECIMAL(10, 2), donation_date DATE); INSERT INTO Donations (id, donor_id, cause, amount, donation_date) VALUES (1, 1001, \u0027Education\u0027, 5000, \u00272022-01-05\u0027), (2, 1002, \u0027Health\u0027, 3000, \u00272022-03-15\u0027), (3, 1003, \u0027Environment\u0027, 7000, \u00272022-01-30\u0027);", + "sql": "SELECT cause, SUM(amount) as total_donation FROM Donations WHERE donation_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-03-31\u0027 GROUP BY cause;", + "sql_explanation": "The SQL query groups donations by cause and filters for the first quarter of 2022. It then calculates the total donation amount per cause by using the SUM function." +}, { + "id": "2612", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which regions have the highest impact investing?", + "sql_context": "CREATE TABLE Investments (InvestmentID int, Region varchar(20), InvestmentAmount decimal(10,2)); INSERT INTO Investments (InvestmentID, Region, InvestmentAmount) VALUES (1, \u0027Africa\u0027, 25000.00), (2, \u0027Asia\u0027, 30000.00), (3, \u0027Latin America\u0027, 27500.00);", + "sql": "SELECT Region, SUM(InvestmentAmount) AS TotalInvestment FROM Investments GROUP BY Region ORDER BY TotalInvestment DESC;", + "sql_explanation": "Summarize the total investment amount for each region and order them in descending order to find the regions with the highest impact investing." +}, { + "id": "2976", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the total donation amount per category, sorted by the highest amount first?", + "sql_context": "CREATE TABLE donations (id INT, category VARCHAR(255), amount DECIMAL(10, 2)); INSERT INTO donations (id, category, amount) VALUES (1, \u0027climate change\u0027, 5000), (2, \u0027poverty reduction\u0027, 8000), (3, \u0027healthcare\u0027, 3000), (4, \u0027climate change\u0027, 7000);", + "sql": "SELECT category, SUM(amount) AS total_donation FROM donations GROUP BY category ORDER BY total_donation DESC;", + "sql_explanation": "This SQL query groups the donations table by the category column, then calculates the sum of the amount for each category. Finally, it orders the results in descending order by the total donation amount." +}, { + "id": "3403", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the total donation amount for each organization.", + "sql_context": "CREATE TABLE donations (id INT, donation_amount DECIMAL, organization TEXT); INSERT INTO donations (id, donation_amount, organization) VALUES (1, 150.00, \u0027Charity A\u0027), (2, 250.00, \u0027Charity A\u0027), (3, 300.00, \u0027Charity B\u0027), (4, 50.00, \u0027Charity C\u0027), (5, 100.00, \u0027Charity C\u0027);", + "sql": "SELECT organization, SUM(donation_amount) as total_donations FROM donations GROUP BY organization;", + "sql_explanation": "The SQL query calculates the total donation amount for each organization by using the SUM function on the donation_amount column, and the GROUP BY clause to group the data by organization." +}, { + "id": "3490", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many unique donors are there for each cause?", + "sql_context": "CREATE TABLE unique_donors (donor_id INT, cause_id INT); INSERT INTO unique_donors (donor_id, cause_id) VALUES (1, 1), (1, 2), (2, 2), (3, 3), (3, 3), (4, 1);", + "sql": "SELECT cause_id, COUNT(DISTINCT donor_id) AS unique_donors FROM unique_donors GROUP BY cause_id;", + "sql_explanation": "This SQL query calculates the number of unique donors for each cause by grouping the cause_id and counting the number of distinct donor_id." +}, { + "id": "4354", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average donation amount in each country?", + "sql_context": "CREATE TABLE Donations (DonationID INT, DonorID INT, RecipientID INT, Amount DECIMAL(10,2), Country TEXT); INSERT INTO Donations (DonationID, DonorID, RecipientID, Amount, Country) VALUES (1, 1, 101, 1000.00, \u0027USA\u0027), (2, 1, 102, 2000.00, \u0027Canada\u0027), (3, 2, 101, 500.00, \u0027USA\u0027), (4, 3, 103, 3000.00, \u0027Mexico\u0027);", + "sql": "SELECT Country, AVG(Amount) AS AverageDonation FROM Donations GROUP BY Country;", + "sql_explanation": "This query calculates the average donation amount in each country by summing the donation amounts and dividing by the number of donations in each country. It then groups the results by country." +}, { + "id": "4408", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many donors are from each country, in the \u0027Donors\u0027 table?", + "sql_context": "CREATE TABLE Donors (id INT, name VARCHAR(50), city VARCHAR(50), state VARCHAR(50), country VARCHAR(50), type VARCHAR(10), donation_amount DECIMAL(10, 2));", + "sql": "SELECT country, COUNT(DISTINCT id) as num_donors FROM Donors GROUP BY country;", + "sql_explanation": "This query groups donors by country and calculates the count of unique donors per country." +}, { + "id": "4693", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many donations have been made in each country?", + "sql_context": "CREATE TABLE Donations (DonationID INT, DonorID INT, DonationDate DATE, DonationAmount DECIMAL(10,2), DonationCountry VARCHAR(50)); CREATE TABLE Donors (DonorID INT, DonorName VARCHAR(50), DonationType VARCHAR(50));", + "sql": "SELECT DonationCountry, COUNT(*) FROM Donations GROUP BY DonationCountry;", + "sql_explanation": "The SQL query calculates the number of donations made in each country. It uses the COUNT function to count the number of records for each group specified by the DonationCountry column." +}, { + "id": "647", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the number of rural hospitals and clinics in Peru, Chile, and Bolivia that were established between 1990 and 2010.", + "sql_context": "CREATE TABLE hospitals_southamerica (name TEXT, location TEXT, country TEXT, establishment_date DATE); INSERT INTO hospitals_southamerica (name, location, country, establishment_date) VALUES (\u0027Hospital 1\u0027, \u0027Rural Peru\u0027, \u0027Peru\u0027, \u00271995-01-01\u0027), (\u0027Hospital 2\u0027, \u0027Rural Chile\u0027, \u0027Chile\u0027, \u00272002-01-01\u0027), (\u0027Clinic 3\u0027, \u0027Rural Bolivia\u0027, \u0027Bolivia\u0027, \u00272005-01-01\u0027), (\u0027Hospital 4\u0027, \u0027Rural Peru\u0027, \u0027Peru\u0027, \u00272011-01-01\u0027);", + "sql": "SELECT country, COUNT(*) FROM hospitals_southamerica WHERE (country IN (\u0027Peru\u0027, \u0027Chile\u0027, \u0027Bolivia\u0027) AND location LIKE \u0027Rural%\u0027 AND establishment_date BETWEEN \u00271990-01-01\u0027 AND \u00272010-12-31\u0027) GROUP BY country", + "sql_explanation": "Shows the number of rural hospitals and clinics in Peru, Chile, and Bolivia established between 1990 and 2010." +}, { + "id": "873", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of rural hospitals in each state, ordered from highest to lowest?", + "sql_context": "CREATE SCHEMA RuralHealth; USE RuralHealth; CREATE TABLE States (StateName VARCHAR(50), StateAbbreviation VARCHAR(10)); CREATE TABLE Hospitals (HospitalID INT, HospitalName VARCHAR(50), StateAbbreviation VARCHAR(10), Rural BOOLEAN); INSERT INTO States (StateName, StateAbbreviation) VALUES (\u0027Alabama\u0027, \u0027AL\u0027), (\u0027Alaska\u0027, \u0027AK\u0027); INSERT INTO Hospitals (HospitalID, HospitalName, StateAbbreviation, Rural) VALUES (1, \u0027HospitalA\u0027, \u0027AL\u0027, TRUE), (2, \u0027HospitalB\u0027, \u0027AK\u0027, FALSE);", + "sql": "SELECT StateAbbreviation, (SUM(CASE WHEN Rural THEN 1 ELSE 0 END) * 100.0 / COUNT(*)) as PercentRuralHospitals FROM Hospitals GROUP BY StateAbbreviation ORDER BY PercentRuralHospitals DESC;", + "sql_explanation": "The SQL query calculates the percentage of rural hospitals in each state by grouping hospitals based on StateAbbreviation and calculating the percentage of rural hospitals within each group. The result set is then ordered in descending order based on the percentage of rural hospitals for each state." +}, { + "id": "916", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Update the \"disease_prevalence\" view to show the top 3 most common diseases by region", + "sql_context": "CREATE VIEW disease_prevalence AS SELECT region, disease, COUNT(*) as cases FROM health_data GROUP BY region, disease;", + "sql": "CREATE OR REPLACE VIEW disease_prevalence AS SELECT region, disease, COUNT(*) as cases FROM health_data GROUP BY region, disease ORDER BY region, cases DESC FETCH FIRST 3 ROWS PER GROUP;", + "sql_explanation": "This query creates a new view, replacing the old one, that selects the region, disease, and number of cases from the health_data table, groups them by region and disease, and orders them by region and number of cases in descending order, fetching only the top 3 rows per group (i.e., per region)." +}, { + "id": "1202", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the prevalence of heart disease in each age group, and what is the total population in each age group?", + "sql_context": "CREATE TABLE patients (id INT, name TEXT, age INT, has_heart_disease BOOLEAN); INSERT INTO patients (id, name, age, has_heart_disease) VALUES (1, \u0027John Doe\u0027, 65, true), (2, \u0027Jane Smith\u0027, 45, false), (3, \u0027Bob Johnson\u0027, 35, false);", + "sql": "SELECT FLOOR(patients.age / 10) * 10 AS age_group, COUNT(patients.id), SUM(CASE WHEN patients.has_heart_disease \u003d true THEN 1 ELSE 0 END) FROM patients GROUP BY age_group;", + "sql_explanation": "This query groups the patients table by age group (decades), and calculates the total population in each age group and the number of patients with heart disease in each age group. The query uses the FLOOR function to group patients by age decade (i.e. ages 0-9, 10-19, etc.)." +}, { + "id": "1914", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average travel time to the nearest healthcare facility per rural community?", + "sql_context": "CREATE TABLE healthcare_facilities (id INT, name TEXT, location TEXT, travel_time FLOAT);", + "sql": "SELECT AVG(travel_time) avg_time, location FROM healthcare_facilities WHERE location LIKE \u0027%rural%\u0027 GROUP BY location ORDER BY avg_time DESC;", + "sql_explanation": "The SQL query calculates the average travel time (AVG(travel_time)) to the nearest healthcare facility per rural community (WHERE location LIKE \u0027%rural%\u0027). It orders the results in descending order (ORDER BY avg_time DESC) by the average travel time." +}, { + "id": "2296", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a view summarizing hospital bed capacity by region", + "sql_context": "CREATE TABLE hospital_bed_capacity (id INT PRIMARY KEY, hospital_name VARCHAR(255), region VARCHAR(255), total_beds INT);", + "sql": "CREATE VIEW region_bed_summary AS SELECT region, SUM(total_beds) AS total_region_beds FROM hospital_bed_capacity GROUP BY region;", + "sql_explanation": "Create a view called region_bed_summary that summarizes hospital bed capacity by region. The view calculates the total number of beds per region using the hospital_bed_capacity table and the SUM aggregate function." +}, { + "id": "2398", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of patients diagnosed with cancer in rural areas of each state?", + "sql_context": "CREATE TABLE patient (patient_id INT, age INT, gender TEXT, diagnosis TEXT, state TEXT, location TEXT);", + "sql": "SELECT state, SUM(CASE WHEN diagnosis \u003d \u0027Cancer\u0027 THEN 1 ELSE 0 END) FROM patient WHERE location LIKE \u0027%rural%\u0027 GROUP BY state;", + "sql_explanation": "This query filters the patient table based on the location and state columns to include only patients in rural areas of each state, then calculates the total number of patients diagnosed with cancer in rural areas of each state by grouping the results by state and using a conditional sum to count the number of cancer patients." +}, { + "id": "4441", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of hospital beds in rural hospitals for each state?", + "sql_context": "CREATE TABLE hospitals (id INT, state CHAR(2), num_beds INT, rural BOOLEAN); INSERT INTO hospitals (id, state, num_beds, rural) VALUES (1, \u0027IL\u0027, 50, true), (2, \u0027CA\u0027, 100, false);", + "sql": "SELECT state, AVG(num_beds) FROM hospitals WHERE rural \u003d true GROUP BY state;", + "sql_explanation": "Calculates the average number of hospital beds in rural hospitals for each state by summing up the num_beds column values for rows where rural is true and dividing it by the count of rows where rural is true for each state." +}, { + "id": "4823", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average number of beds in rural hospitals in each state.", + "sql_context": "CREATE TABLE hospitals (id INT, name TEXT, state TEXT, num_beds INT); INSERT INTO hospitals (id, name, state, num_beds) VALUES (1, \u0027Rural General Hospital\u0027, \u0027State A\u0027, 200), (2, \u0027Rural District Hospital\u0027, \u0027State B\u0027, 150), (3, \u0027Rural Specialty Hospital\u0027, \u0027State A\u0027, 100);", + "sql": "SELECT state, AVG(num_beds) as avg_beds FROM hospitals GROUP BY state;", + "sql_explanation": "The SQL query calculates the average number of beds in rural hospitals in each state by grouping the records based on the \u0027state\u0027 column and then calculating the average using the AVG() function." +}, { + "id": "4913", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of clinics per rural county in the United States?", + "sql_context": "CREATE TABLE rural_us_counties (name TEXT, state TEXT, num_clinics INTEGER); INSERT INTO rural_us_counties (name, state, num_clinics) VALUES (\u0027County A\u0027, \u0027Texas\u0027, 5), (\u0027County B\u0027, \u0027California\u0027, 7), (\u0027County C\u0027, \u0027Montana\u0027, 3), (\u0027County D\u0027, \u0027New York\u0027, 6);", + "sql": "SELECT state, AVG(num_clinics) FROM rural_us_counties GROUP BY state;", + "sql_explanation": "This query selects the state and num_clinics columns from the rural_us_counties table, then uses the GROUP BY operator to group the rows by state. It then uses the AVG function to calculate the average value of num_clinics for each group. This provides the average number of clinics per rural county in each state of the United States." +}, { + "id": "4985", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of medical staff members in rural health centers in Indonesia and Thailand?", + "sql_context": "CREATE TABLE medical_staff (country VARCHAR(20), center_name VARCHAR(50), num_staff INT); INSERT INTO medical_staff (country, center_name, num_staff) VALUES (\u0027Indonesia\u0027, \u0027Center LL\u0027, 10), (\u0027Indonesia\u0027, \u0027Center MM\u0027, 15), (\u0027Thailand\u0027, \u0027Center NN\u0027, 20), (\u0027Thailand\u0027, \u0027Center OO\u0027, 25);", + "sql": "SELECT country, SUM(num_staff) FROM medical_staff GROUP BY country;", + "sql_explanation": "This query calculates the total number of medical staff members in rural health centers in Indonesia and Thailand. The GROUP BY clause groups the results by country." +}, { + "id": "4987", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of healthcare workers in each location in the \"rural_clinics\" table?", + "sql_context": "CREATE TABLE rural_clinics (id INT, name VARCHAR(50), location VARCHAR(50), num_workers INT, avg_age INT);", + "sql": "SELECT location, AVG(avg_age) FROM rural_clinics GROUP BY location;", + "sql_explanation": "The SQL query calculates the average age of healthcare workers in each location in the \"rural_clinics\" table using the AVG() function and the GROUP BY clause." +}, { + "id": "435", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of unique users in Australia and New Zealand who have interacted with at least one ad, and what was the total engagement time for these users, broken down by day?", + "sql_context": "CREATE TABLE ad_interactions (user_id INT, ad_id INT, country VARCHAR(2), interaction_date DATE, interaction_time FLOAT); INSERT INTO ad_interactions (user_id, ad_id, country, interaction_date, interaction_time) VALUES (1, 1001, \u0027AU\u0027, \u00272022-02-01\u0027, 25.3), (2, 1002, \u0027NZ\u0027, \u00272022-02-02\u0027, 30.5), (1, 1003, \u0027AU\u0027, \u00272022-02-01\u0027, 15.6);", + "sql": "SELECT interaction_date, COUNT(DISTINCT user_id) as total_users, SUM(interaction_time) as total_engagement_time FROM ad_interactions WHERE country IN (\u0027AU\u0027, \u0027NZ\u0027) GROUP BY interaction_date ORDER BY interaction_date DESC LIMIT 7;", + "sql_explanation": "This SQL query calculates the total number of unique users in Australia and New Zealand who have interacted with at least one ad, and the total engagement time for these users, broken down by day. It achieves this by using the COUNT and DISTINCT keywords to count the number of unique users, and the SUM function to sum the interaction time for the specified countries. The GROUP BY clause is used to group the results by interaction_date, and the ORDER BY clause sorts the results in descending order. The LIMIT clause is used to only retrieve the most recent 7 days." +}, { + "id": "1532", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List users who have used the hashtag \"#fitness\" more than 5 times in the past week.", + "sql_context": "CREATE TABLE posts (id INT, user VARCHAR(255), hashtags TEXT, timestamp TIMESTAMP);", + "sql": "SELECT user FROM posts WHERE hashtags LIKE \u0027%#fitness%\u0027 GROUP BY user HAVING COUNT(*) \u003e 5 AND timestamp BETWEEN DATE_SUB(NOW(), INTERVAL 1 WEEK) AND NOW();", + "sql_explanation": "Filter rows with the desired hashtag and within the specified time range, count the number of occurrences for each user, and then return the users who used the hashtag more than 5 times." +}, { + "id": "1707", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total ad spend per advertiser for the month of June 2022?", + "sql_context": "CREATE TABLE advertisers (advertiser_id INT, advertiser_name VARCHAR(50), spend DECIMAL(10, 2), spend_date DATE); INSERT INTO advertisers VALUES (301, \u0027Advertiser D\u0027, 5000, \u00272022-06-01\u0027), (302, \u0027Advertiser E\u0027, 3000, \u00272022-06-05\u0027), (303, \u0027Advertiser F\u0027, 7000, \u00272022-06-10\u0027);", + "sql": "SELECT advertiser_name, SUM(spend) as total_spend FROM advertisers WHERE MONTH(spend_date) \u003d 6 AND YEAR(spend_date) \u003d 2022 GROUP BY advertiser_name;", + "sql_explanation": "The SQL query calculates the total ad spend per advertiser for the month of June 2022. It filters the records based on the spend_date column using the MONTH() and YEAR() functions and groups the result by the advertiser_name column. Finally, it sums up the spend values for each group." +}, { + "id": "1783", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of ads served to users in the United States and Canada, broken down by day, for the past week?", + "sql_context": "CREATE TABLE ads_served (user_id INT, ad_id INT, country VARCHAR(2), serving_date DATE); INSERT INTO ads_served (user_id, ad_id, country, serving_date) VALUES (1, 1001, \u0027US\u0027, \u00272022-03-01\u0027), (2, 1002, \u0027CA\u0027, \u00272022-03-02\u0027);", + "sql": "SELECT serving_date, COUNT(*) as total_ads FROM ads_served WHERE country IN (\u0027US\u0027, \u0027CA\u0027) GROUP BY serving_date ORDER BY serving_date DESC LIMIT 7;", + "sql_explanation": "This SQL query calculates the total number of ads served to users in the United States and Canada for the past week, broken down by day. It achieves this by selecting the serving_date and counting the number of rows for each date using the GROUP BY clause. The WHERE clause filters the results to only include records for the specified countries, and the ORDER BY clause sorts the results in descending order. The LIMIT clause is used to only retrieve the most recent 7 days." +}, { + "id": "2472", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 3 most mentioned sports teams in user posts from users aged 25-30 in Canada?", + "sql_context": "CREATE TABLE user_posts (user_id INT, age INT, country VARCHAR(255), team VARCHAR(255));", + "sql": "SELECT team, COUNT(*) AS mentions FROM user_posts WHERE age BETWEEN 25 AND 30 AND country \u003d \u0027Canada\u0027 GROUP BY team LIMIT 3;", + "sql_explanation": "The SQL query selects the team column and counts the number of mentions for each team where the user\u0027s age is between 25 and 30 and the country is Canada. It then groups the results by team and limits the output to the top 3 teams with the most mentions." +}, { + "id": "3049", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the sum of all ad spends for ads with the keyword \u0027organicfood\u0027 in the \u0027advertising_data\u0027 table, grouped by ad category?", + "sql_context": "CREATE TABLE advertising_data(ad_id INT, ad_category TEXT, keyword TEXT, ad_spend DECIMAL(10,2));", + "sql": "SELECT ad_category, SUM(ad_spend) FROM advertising_data WHERE keyword \u003d \u0027organicfood\u0027 GROUP BY ad_category;", + "sql_explanation": "This query calculates the sum of all ad spends for ads with the keyword \u0027organicfood\u0027 in the \u0027advertising_data\u0027 table, grouped by ad category. It uses the SUM and GROUP BY functions to find the ad_spend and ad_category values." +}, { + "id": "3855", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of comments per post in the \u0027social_media\u0027 database?", + "sql_context": "CREATE TABLE posts (id INT, user_id INT, content TEXT, timestamp TIMESTAMP, comments INT);", + "sql": "SELECT AVG(COUNT(posts.comments)) AS avg_comments_per_post FROM posts GROUP BY posts.id;", + "sql_explanation": "This query calculates the average number of comments per post by grouping the \u0027posts\u0027 table by \u0027id\u0027 and calculating the count of comments for each group. It then takes the average of those counts." +}, { + "id": "4623", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many users are there in each country?", + "sql_context": "CREATE TABLE users (id INT PRIMARY KEY, name VARCHAR(50), age INT, gender VARCHAR(10), location VARCHAR(50)); INSERT INTO users (id, name, age, gender, location) VALUES (1, \u0027Maria\u0027, 28, \u0027Female\u0027, \u0027Brazil\u0027); INSERT INTO users (id, name, age, gender, location) VALUES (2, \u0027Joao\u0027, 35, \u0027Male\u0027, \u0027Brazil\u0027); INSERT INTO users (id, name, age, gender, location) VALUES (3, \u0027Ahmed\u0027, 40, \u0027Male\u0027, \u0027Egypt\u0027); INSERT INTO users (id, name, age, gender, location) VALUES (4, \u0027Sophia\u0027, 30, \u0027Female\u0027, \u0027USA\u0027);", + "sql": "SELECT users.location, COUNT(users.id) FROM users GROUP BY users.location;", + "sql_explanation": "This SQL query groups the users table by the location column and returns the count of users for each country." +}, { + "id": "4967", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of posts made by a user on a single day in the \u0027social_media\u0027 table, assuming the \u0027post_date\u0027 column is of type DATE?", + "sql_context": "CREATE TABLE social_media (user_id INT, post_id INT, post_date DATE);", + "sql": "SELECT MAX(COUNT(*)) FROM social_media GROUP BY user_id, post_date;", + "sql_explanation": "The query calculates the maximum number of posts made by a user on a single day by grouping the \u0027social_media\u0027 table by \u0027user_id\u0027 and \u0027post_date\u0027 and taking the maximum of the counts for each group." +}, { + "id": "1041", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of workplace safety inspections by month in California in 2022?", + "sql_context": "CREATE TABLE inspections (id INT, workplace_id INT, state VARCHAR, inspection_date DATE); INSERT INTO inspections (id, workplace_id, state, inspection_date) VALUES (1, 2, \u0027California\u0027, \u00272022-01-15\u0027);", + "sql": "SELECT EXTRACT(MONTH FROM inspection_date) as month, COUNT(*) as total_inspections FROM inspections WHERE state \u003d \u0027California\u0027 AND inspection_date \u003e\u003d \u00272022-01-01\u0027 GROUP BY month;", + "sql_explanation": "This SQL query calculates the total number of workplace safety inspections by month in California in 2022. It extracts the month from the inspection_date column and groups the results by month. Then, it calculates the count of rows for each month, which represents the total number of inspections in each month." +}, { + "id": "1728", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get the number of workers and their union membership status in the \u0027finance\u0027 sector", + "sql_context": "CREATE TABLE finance_workers (id INT, sector VARCHAR(20), union_member BOOLEAN); INSERT INTO finance_workers (id, sector, union_member) VALUES (1, \u0027finance\u0027, TRUE), (2, \u0027finance\u0027, FALSE), (3, \u0027finance\u0027, FALSE);", + "sql": "SELECT sector, SUM(union_member::INT) AS union_members, COUNT(*) - SUM(union_member::INT) AS non_union_members FROM finance_workers GROUP BY sector;", + "sql_explanation": "This query gets the number of workers and their union membership status in the \u0027finance\u0027 sector." +}, { + "id": "2815", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display union names and the number of members who are female", + "sql_context": "CREATE TABLE union_members_demographics (id INT, union_name VARCHAR(255), gender VARCHAR(10), member_count INT); INSERT INTO union_members_demographics (id, union_name, gender, member_count) VALUES (1, \u0027American Federation of State, County and Municipal Employees\u0027, \u0027Female\u0027, 900000); INSERT INTO union_members_demographics (id, union_name, gender, member_count) VALUES (2, \u0027International Brotherhood of Teamsters\u0027, \u0027Female\u0027, 600000);", + "sql": "SELECT union_name, SUM(member_count) FROM union_members_demographics WHERE gender \u003d \u0027Female\u0027 GROUP BY union_name;", + "sql_explanation": "This query uses the GROUP BY clause to group the records by union_name, and then calculates the sum of member_count for each group where gender is Female." +}, { + "id": "3821", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of labor rights violations in the \"labor_database\" by industry for the year 2020?", + "sql_context": "CREATE TABLE violations (id INT, year INT, industry VARCHAR(50), num_violations INT); INSERT INTO violations (id, year, industry, num_violations) VALUES (1, 2020, \u0027Manufacturing\u0027, 200), (2, 2020, \u0027Retail\u0027, 150), (3, 2020, \u0027Construction\u0027, 250), (4, 2020, \u0027Healthcare\u0027, 100);", + "sql": "SELECT industry, SUM(num_violations) FROM violations WHERE year \u003d 2020 GROUP BY industry;", + "sql_explanation": "The SQL query calculates the total number of labor rights violations for each industry in the year 2020 by using the SUM() function. The WHERE clause filters the year to only 2020, and the GROUP BY clause groups the results by industry." +}, { + "id": "4115", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the total number of labor rights violations for each type in the \u0027LaborRightsViolations\u0027 table", + "sql_context": "CREATE TABLE LaborRightsViolations (id INT, type VARCHAR(50), violation VARCHAR(50)); INSERT INTO LaborRightsViolations (id, type, violation) VALUES (1, \u0027TypeA\u0027, \u0027Violation1\u0027), (2, \u0027TypeB\u0027, \u0027Violation2\u0027), (3, \u0027TypeA\u0027, \u0027Violation3\u0027);", + "sql": "SELECT type, COUNT(*) as total_violations FROM LaborRightsViolations GROUP BY type;", + "sql_explanation": "This query calculates the total number of labor rights violations for each type in the LaborRightsViolations table by grouping the records based on type and then counting the number of records in each group." +}, { + "id": "4412", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many union members work in each industry?", + "sql_context": "CREATE TABLE union_rosters (roster_id INT, member_id INT, industry VARCHAR(15)); INSERT INTO union_rosters (roster_id, member_id, industry) VALUES (1, 1, \u0027Manufacturing\u0027), (2, 2, \u0027Construction\u0027), (3, 3, \u0027Retail\u0027);", + "sql": "SELECT industry, COUNT(*) as num_members FROM union_rosters GROUP BY industry;", + "sql_explanation": "This SQL query counts the number of union members who work in each industry. It groups the records by industry and counts the number of members in each industry using the COUNT() function." +}, { + "id": "1792", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which biosensor technology has the highest average temperature?", + "sql_context": "CREATE SCHEMA if not exists biosensors;CREATE TABLE if not exists biosensors.temperature (id INT, biosensor_name VARCHAR(255), temperature DECIMAL(10,2)); INSERT INTO biosensors.temperature (id, biosensor_name, temperature) VALUES (1, \u0027BioTherm\u0027, 37.5), (2, \u0027BioSense\u0027, 38.2), (3, \u0027BioTemp\u0027, 37.8);", + "sql": "SELECT biosensor_name, AVG(temperature) avg_temperature FROM biosensors.temperature GROUP BY biosensor_name ORDER BY avg_temperature DESC LIMIT 1;", + "sql_explanation": "Identify the biosensor technology with the highest average temperature by grouping the temperature table by biosensor_name, calculating the average temperature, and ordering the results in descending order, then selecting the top record." +}, { + "id": "2347", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the most common types of biosensors used in Africa and South America?", + "sql_context": "CREATE SCHEMA if not exists biosensor_types;CREATE TABLE if not exists biosensor_types.sensors (id INT, name VARCHAR(100), continent VARCHAR(50));INSERT INTO biosensor_types.sensors (id, name, continent) VALUES (1, \u0027TypeA\u0027, \u0027Africa\u0027), (2, \u0027TypeB\u0027, \u0027South America\u0027), (3, \u0027TypeA\u0027, \u0027Africa\u0027), (4, \u0027TypeC\u0027, \u0027South America\u0027);", + "sql": "SELECT continent, name, COUNT(*) as count FROM biosensor_types.sensors GROUP BY continent, name ORDER BY continent, count DESC;", + "sql_explanation": "This query groups the biosensor types by continent and name, then counts the occurrences of each type in each continent. The results are ordered by continent and the count in descending order." +}, { + "id": "2393", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 3 biosensor technology patents by total citations in the UK?", + "sql_context": "CREATE SCHEMA if not exists biosensor;CREATE TABLE if not exists biosensor.patents (id INT PRIMARY KEY, name VARCHAR(100), location VARCHAR(50), total_citations INT);INSERT INTO biosensor.patents (id, name, location, total_citations) VALUES (1, \u0027PatentX\u0027, \u0027UK\u0027, 150), (2, \u0027PatentY\u0027, \u0027USA\u0027, 200), (3, \u0027PatentZ\u0027, \u0027Canada\u0027, 120), (4, \u0027PatentW\u0027, \u0027UK\u0027, 170), (5, \u0027PatentV\u0027, \u0027France\u0027, 180), (6, \u0027PatentU\u0027, \u0027UK\u0027, 200);", + "sql": "SELECT name, total_citations FROM biosensor.patents WHERE location \u003d \u0027UK\u0027 GROUP BY name ORDER BY total_citations DESC LIMIT 3;", + "sql_explanation": "This SQL query retrieves the top 3 biosensor technology patents by total citations in the UK by using the GROUP BY clause on the name column, and the ORDER BY clause with the total_citations column to sort the results in descending order. The query then limits the results to the top 3 using the LIMIT clause. The query also uses the WHERE clause to filter the rows based on the location column." +}, { + "id": "2457", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total funding received by African biotech startups, grouped by year?", + "sql_context": "CREATE TABLE startups (id INT, name VARCHAR(255), country VARCHAR(255), funding FLOAT, date DATE); INSERT INTO startups (id, name, country, funding, date) VALUES (1, \u0027StartupA\u0027, \u0027Kenya\u0027, 5000000, \u00272020-01-01\u0027); INSERT INTO startups (id, name, country, funding, date) VALUES (2, \u0027StartupB\u0027, \u0027Nigeria\u0027, 7000000, \u00272019-01-01\u0027);", + "sql": "SELECT country, YEAR(date) AS year, SUM(funding) FROM startups WHERE country IN (\u0027Kenya\u0027, \u0027Nigeria\u0027) GROUP BY year, country;", + "sql_explanation": "This SQL query calculates the total funding received by African biotech startups, grouped by year. It first filters the data to only include startups from Kenya and Nigeria and extracts the year from the date. Then, it groups the data by year and country, and calculates the sum of the funding for each group." +}, { + "id": "2518", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which genetic research projects have the highest average funding?", + "sql_context": "CREATE SCHEMA if not exists genetics; CREATE TABLE if not exists genetics.research_projects (id INT, name VARCHAR(50), funding DECIMAL(10, 2)); INSERT INTO genetics.research_projects (id, name, funding) VALUES (1, \u0027CRISPR\u0027, 3000000.00), (2, \u0027Genome Sequencing\u0027, 2500000.00), (3, \u0027Stem Cell Research\u0027, 4000000.00);", + "sql": "SELECT name, AVG(funding) as avg_funding FROM genetics.research_projects GROUP BY name ORDER BY avg_funding DESC LIMIT 1;", + "sql_explanation": "This query identifies the genetic research project with the highest average funding by grouping the genetics.research_projects table by the name column, calculating the average of the funding column for each group, and then returning the group with the highest average value, sorted in descending order." +}, { + "id": "3167", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum and minimum funding received by biotech startups in each country?", + "sql_context": "CREATE SCHEMA if not exists biotech;USE biotech;CREATE TABLE if not exists startups (name VARCHAR(255), country VARCHAR(255), funding FLOAT);INSERT INTO startups (name, country, funding) VALUES (\u0027Startup1\u0027, \u0027USA\u0027, 5000000), (\u0027Startup2\u0027, \u0027Canada\u0027, 7000000), (\u0027Startup3\u0027, \u0027USA\u0027, 3000000), (\u0027Startup4\u0027, \u0027UK\u0027, 8000000), (\u0027Startup5\u0027, \u0027USA\u0027, 1000000), (\u0027Startup6\u0027, \u0027Canada\u0027, 4000000);", + "sql": "SELECT country, MAX(funding) as max_funding, MIN(funding) as min_funding FROM startups GROUP BY country;", + "sql_explanation": "This query calculates the maximum and minimum funding received by biotech startups in each country by using the MAX and MIN functions and the GROUP BY clause to group the rows by country." +}, { + "id": "3265", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the number of bioprocess engineering jobs in France and Spain.", + "sql_context": "CREATE SCHEMA if not exists engineering;CREATE TABLE if not exists engineering.jobs (id INT, title VARCHAR(50), country VARCHAR(50)); INSERT INTO engineering.jobs (id, title, country) VALUES (1, \u0027JobA\u0027, \u0027France\u0027), (2, \u0027JobB\u0027, \u0027Spain\u0027), (3, \u0027JobC\u0027, \u0027France\u0027), (4, \u0027JobD\u0027, \u0027USA\u0027);", + "sql": "SELECT COUNT(*), country FROM engineering.jobs WHERE country IN (\u0027France\u0027, \u0027Spain\u0027) GROUP BY country;", + "sql_explanation": "This query identifies the number of bioprocess engineering jobs in France and Spain by using the COUNT function and filtering the country column with the IN operator. The results are grouped by country." +}, { + "id": "3483", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average cost of gene sequencing for unique clients who have spent more than $5000?", + "sql_context": "CREATE TABLE GeneSequencing (client_id INT, sequencing_cost FLOAT); INSERT INTO GeneSequencing (client_id, sequencing_cost) VALUES (1, 4500.50), (2, 6200.75), (3, 3000.20), (4, 5800.00), (5, 7000.00);", + "sql": "SELECT AVG(sequencing_cost) FROM GeneSequencing WHERE sequencing_cost \u003e 5000 GROUP BY client_id;", + "sql_explanation": "The SQL query calculates the average sequencing cost for clients who have spent more than $5000. It groups the data by client_id and calculates the average sequencing_cost for each group." +}, { + "id": "3742", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which bioprocesses have the highest energy consumption?", + "sql_context": "CREATE TABLE bioprocesses (bioprocess_id INT, bioprocess_name VARCHAR(20), energy_consumption FLOAT); INSERT INTO bioprocesses (bioprocess_id, bioprocess_name, energy_consumption) VALUES (1, \u0027Fermentation\u0027, 1200), (2, \u0027Cell culturing\u0027, 1800), (3, \u0027Downstream processing\u0027, 1500);", + "sql": "SELECT bioprocess_name, MAX(energy_consumption) FROM bioprocesses GROUP BY bioprocess_name", + "sql_explanation": "This SQL query identifies the bioprocesses with the highest energy consumption by selecting the bioprocess_name and maximum energy_consumption for each bioprocess." +}, { + "id": "3913", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average biosensor output for each biosensor type in India?", + "sql_context": "CREATE SCHEMA if not exists biosensors;CREATE TABLE if not exists biosensors.devices(id INT, name VARCHAR(255), type VARCHAR(255), output DECIMAL(10,2), country VARCHAR(255)); INSERT INTO biosensors.devices VALUES (1, \u0027BioSensorA\u0027, \u0027Type1\u0027, 5.2, \u0027India\u0027); INSERT INTO biosensors.devices VALUES (2, \u0027BioSensorB\u0027, \u0027Type2\u0027, 7.3, \u0027India\u0027); INSERT INTO biosensors.devices VALUES (3, \u0027BioSensorC\u0027, \u0027Type1\u0027, 4.9, \u0027USA\u0027);", + "sql": "SELECT type, AVG(output) FROM biosensors.devices WHERE country \u003d \u0027India\u0027 GROUP BY type;", + "sql_explanation": "The SQL query filters biosensors based in India and calculates the average output per biosensor type using the AVG function and grouping by type." +}, { + "id": "4822", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum funding amount for biotech startups located in each state?", + "sql_context": "CREATE SCHEMA if not exists biotech;USE biotech;CREATE TABLE if not exists startups(id INT, name VARCHAR(255), location VARCHAR(255), funding FLOAT);INSERT INTO startups(id, name, location, funding) VALUES (1, \u0027StartupA\u0027, \u0027California\u0027, 15000000.00), (2, \u0027StartupB\u0027, \u0027New York\u0027, 20000000.00), (3, \u0027StartupC\u0027, \u0027California\u0027, 12000000.00), (4, \u0027StartupD\u0027, \u0027Texas\u0027, 25000000.00), (5, \u0027StartupE\u0027, \u0027Florida\u0027, 18000000.00);", + "sql": "SELECT location, MAX(funding) FROM biotech.startups GROUP BY location;", + "sql_explanation": "This SQL query calculates the maximum funding amount for biotech startups located in each state by using the MAX() function on the funding column and grouping the results based on the location column with the GROUP BY clause." +}, { + "id": "4829", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average funding received by biotech startups in each country.", + "sql_context": "CREATE SCHEMA if not exists biotech;CREATE TABLE if not exists biotech.startups(id INT, name TEXT, location TEXT, funding FLOAT);INSERT INTO biotech.startups (id, name, location, funding) VALUES (1, \u0027StartupA\u0027, \u0027US\u0027, 5000000), (2, \u0027StartupB\u0027, \u0027UK\u0027, 3000000), (3, \u0027StartupC\u0027, \u0027UK\u0027, 4000000), (4, \u0027StartupD\u0027, \u0027Germany\u0027, 6000000);", + "sql": "SELECT location, AVG(funding) FROM biotech.startups GROUP BY location;", + "sql_explanation": "This query groups the data in the biotech.startups table by the location column and then uses the AVG function to calculate the average funding for biotech startups in each country." +}, { + "id": "209", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average severity of vulnerabilities found in the last quarter for each product?", + "sql_context": "CREATE TABLE vulnerabilities (id INT, timestamp TIMESTAMP, product VARCHAR(255), vulnerability_severity VARCHAR(255)); INSERT INTO vulnerabilities (id, timestamp, product, vulnerability_severity) VALUES (1, \u00272020-10-01 12:00:00\u0027, \u0027Product A\u0027, \u0027High\u0027), (2, \u00272020-11-02 10:30:00\u0027, \u0027Product B\u0027, \u0027Medium\u0027);", + "sql": "SELECT product, AVG(case when vulnerability_severity \u003d \u0027High\u0027 then 3 when vulnerability_severity \u003d \u0027Medium\u0027 then 2 when vulnerability_severity \u003d \u0027Low\u0027 then 1 else 0 end) as avg_severity FROM vulnerabilities WHERE timestamp \u003e\u003d NOW() - INTERVAL 3 MONTH GROUP BY product;", + "sql_explanation": "This query selects the product and average severity of vulnerabilities from the vulnerabilities table, filtering for vulnerabilities within the last 3 months. It then calculates the average severity of vulnerabilities for each product by using a case statement to assign a numeric value to each vulnerability severity level (High\u003d3, Medium\u003d2, Low\u003d1), and then calculating the average of these values for each product." +}, { + "id": "366", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 2 vulnerabilities with the most occurrences in the past 6 months, along with the number of affected systems for each, broken down by country.", + "sql_context": "CREATE TABLE vulnerabilities (id INT PRIMARY KEY, vulnerability_name VARCHAR(50), occurrence_time TIMESTAMP, affected_system VARCHAR(50), country VARCHAR(50)); INSERT INTO vulnerabilities (id, vulnerability_name, occurrence_time, affected_system, country) VALUES (1, \u0027CVE-2022-21555\u0027, \u00272022-01-01 10:00:00\u0027, \u0027Web Server\u0027, \u0027USA\u0027), (2, \u0027CVE-2022-22954\u0027, \u00272022-02-01 12:30:00\u0027, \u0027Database Server\u0027, \u0027Canada\u0027);", + "sql": "SELECT vulnerability_name, COUNT(DISTINCT affected_system) as affected_systems, country FROM vulnerabilities WHERE occurrence_time \u003e\u003d NOW() - INTERVAL \u00276 months\u0027 GROUP BY vulnerability_name, country ORDER BY affected_systems DESC LIMIT 2;", + "sql_explanation": "This query filters vulnerabilities within the last 6 months, groups them by vulnerability name and country, calculates the number of unique affected systems per vulnerability and country, and returns the top 2 vulnerabilities with the most affected systems." +}, { + "id": "377", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the distinct threat categories and their total occurrence count for the past year, excluding any threats that occurred more than once in a single day?", + "sql_context": "CREATE TABLE threat_occurrences (threat_category TEXT, occurrence_date DATE, occurrence_count INT);INSERT INTO threat_occurrences (threat_category, occurrence_date, occurrence_count) VALUES (\u0027Ransomware\u0027, \u00272022-03-01\u0027, 1), (\u0027Phishing\u0027, \u00272022-03-02\u0027, 2), (\u0027Malware\u0027, \u00272022-03-03\u0027, 1), (\u0027Ransomware\u0027, \u00272022-03-04\u0027, 1), (\u0027Phishing\u0027, \u00272022-03-05\u0027, 1);", + "sql": "SELECT threat_category, SUM(occurrence_count) as total_occurrences FROM threat_occurrences WHERE occurrence_date \u003e\u003d DATEADD(year, -1, GETDATE()) GROUP BY threat_category HAVING COUNT(DISTINCT occurrence_date) \u003e\u003d COUNT(occurrence_count);", + "sql_explanation": "This SQL query selects threat_category and sum of occurrence_count from the threat_occurrences table, filters the records to those with occurrence_date in the last year, groups the results by threat_category, orders the results by total_occurrences in descending order, and returns the distinct threat categories and their total occurrence count for the past year, excluding any threats that occurred more than once in a single day." +}, { + "id": "492", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the top 5 most common vulnerabilities found in the last 6 months, along with the number of occurrences and affected systems for each.", + "sql_context": "CREATE TABLE vulnerabilities (id INT PRIMARY KEY, vulnerability_name VARCHAR(50), occurrence_time TIMESTAMP, affected_system VARCHAR(50)); INSERT INTO vulnerabilities (id, vulnerability_name, occurrence_time, affected_system) VALUES (1, \u0027CVE-2021-4034\u0027, \u00272022-01-01 10:00:00\u0027, \u0027Web Server\u0027), (2, \u0027CVE-2021-44228\u0027, \u00272022-02-01 12:30:00\u0027, \u0027Application Server\u0027);", + "sql": "SELECT vulnerability_name, COUNT(*) as occurrences, affected_system FROM vulnerabilities WHERE occurrence_time \u003e\u003d NOW() - INTERVAL \u00276 months\u0027 GROUP BY vulnerability_name, affected_system ORDER BY occurrences DESC LIMIT 5;", + "sql_explanation": "This query filters vulnerabilities within the last 6 months, groups them by vulnerability name and affected system, and calculates the number of occurrences per vulnerability and affected system. It then orders the results in descending order based on the number of occurrences and returns the top 5 most common vulnerabilities." +}, { + "id": "522", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 3 most common vulnerabilities in the last month, by severity?", + "sql_context": "CREATE TABLE vulnerabilities (id INT, timestamp TIMESTAMP, severity VARCHAR(255), software VARCHAR(255)); INSERT INTO vulnerabilities (id, timestamp, severity, software) VALUES (1, \u00272021-01-01 10:00:00\u0027, \u0027high\u0027, \u0027Apache\u0027), (2, \u00272021-01-02 12:30:00\u0027, \u0027medium\u0027, \u0027Nginx\u0027), (3, \u00272021-01-03 08:15:00\u0027, \u0027low\u0027, \u0027MySQL\u0027);", + "sql": "SELECT severity, software, COUNT(*) as vulnerability_count FROM vulnerabilities WHERE timestamp \u003e\u003d DATE_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 MONTH) GROUP BY severity, software ORDER BY vulnerability_count DESC LIMIT 3;", + "sql_explanation": "This query groups vulnerabilities by severity and software and filters for the last month. It then counts the number of vulnerabilities per severity and software, orders the results in descending order by count, and returns the top 3 most common vulnerabilities." +}, { + "id": "538", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all the unique threat actors that have been active in the last month, along with the number of unique security incidents associated with each.", + "sql_context": "CREATE TABLE threat_actors (threat_actor VARCHAR(50), incident_count INT, actor_date DATE); INSERT INTO threat_actors (threat_actor, incident_count, actor_date) VALUES (\u0027Threat Actor 1\u0027, 20, \u00272023-01-01\u0027), (\u0027Threat Actor 2\u0027, 15, \u00272023-01-02\u0027), (\u0027Threat Actor 3\u0027, 12, \u00272023-01-03\u0027), (\u0027Threat Actor 1\u0027, 18, \u00272023-01-04\u0027), (\u0027Threat Actor 2\u0027, 10, \u00272023-01-05\u0027);", + "sql": "SELECT threat_actor, COUNT(DISTINCT incident_id) as unique_incidents FROM threat_actors_incidents WHERE actor_date \u003e\u003d DATEADD(day, -30, GETDATE()) AND threat_actor \u003d threat_actors.threat_actor GROUP BY threat_actor;", + "sql_explanation": "This SQL query lists all the unique threat actors that have been active in the last month, along with the number of unique security incidents associated with each. It does this by joining the threat_actors_incidents and threat_actors tables on the threat_actor column, filtering the result for rows with dates within the past month, grouping the results by threat_actor, and calculating the count of distinct incident_id for each threat actor." +}, { + "id": "652", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 3 most common types of security incidents reported by \u0027Finance\u0027 department in the last quarter?", + "sql_context": "CREATE TABLE incident_types (id integer, incident text, department text, timestamp timestamp); INSERT INTO incident_types (id, incident, department, timestamp) VALUES (1, \u0027Phishing\u0027, \u0027Finance\u0027, \u00272022-04-01 10:00:00\u0027), (2, \u0027Malware\u0027, \u0027IT\u0027, \u00272022-04-02 11:00:00\u0027), (3, \u0027Phishing\u0027, \u0027Finance\u0027, \u00272022-04-03 12:00:00\u0027), (4, \u0027Insider Threat\u0027, \u0027HR\u0027, \u00272022-04-04 13:00:00\u0027), (5, \u0027Phishing\u0027, \u0027Finance\u0027, \u00272022-04-05 14:00:00\u0027);", + "sql": "SELECT incident, COUNT(*) as incident_count FROM incident_types WHERE department \u003d \u0027Finance\u0027 AND timestamp \u003e\u003d DATEADD(quarter, -1, CURRENT_TIMESTAMP) GROUP BY incident ORDER BY incident_count DESC LIMIT 3;", + "sql_explanation": "This query returns the top 3 most common types of security incidents reported by \u0027Finance\u0027 department in the last quarter. It does this by grouping the incidents by type, filtering for incidents that occurred in the past quarter and were reported by the \u0027Finance\u0027 department, and ordering them by count in descending order." +}, { + "id": "662", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 3 sources of threat intelligence that provided the most actionable intelligence in the past week, along with the number of actionable intelligence reports.", + "sql_context": "CREATE TABLE threat_intelligence (id INT PRIMARY KEY, source VARCHAR(50), actionable_report BOOLEAN, report_time TIMESTAMP); INSERT INTO threat_intelligence (id, source, actionable_report, report_time) VALUES (1, \u0027FireEye\u0027, TRUE, \u00272022-07-01 10:00:00\u0027), (2, \u0027CrowdStrike\u0027, FALSE, \u00272022-07-02 12:30:00\u0027), (3, \u0027Mandiant\u0027, TRUE, \u00272022-07-03 08:15:00\u0027);", + "sql": "SELECT source, COUNT(*) as actionable_reports FROM threat_intelligence WHERE actionable_report \u003d TRUE AND report_time \u003e\u003d NOW() - INTERVAL \u00271 week\u0027 GROUP BY source ORDER BY actionable_reports DESC LIMIT 3;", + "sql_explanation": "This query identifies the top 3 sources of actionable intelligence in the past week by counting the number of actionable reports for each source within the past week." +}, { + "id": "861", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 3 threat actors with the highest average severity of vulnerabilities in Q2 of 2022?", + "sql_context": "CREATE TABLE vulnerabilities (id INT, threat_actor VARCHAR(50), severity INT, timestamp TIMESTAMP); INSERT INTO vulnerabilities (id, threat_actor, severity, timestamp) VALUES (1, \u0027Lazarus Group\u0027, 8, \u00272022-04-01 10:00:00\u0027), (2, \u0027APT29\u0027, 7, \u00272022-05-02 12:00:00\u0027);", + "sql": "SELECT threat_actor, AVG(severity) as avg_severity FROM vulnerabilities WHERE timestamp \u003e\u003d \u00272022-04-01\u0027 AND timestamp \u003c \u00272022-07-01\u0027 GROUP BY threat_actor ORDER BY avg_severity DESC LIMIT 3;", + "sql_explanation": "The SQL query selects the threat actor and average severity from the vulnerabilities table where the timestamp is greater than or equal to \u00272022-04-01\u0027 and less than \u00272022-07-01\u0027. It then groups the results by threat actor, orders them in descending order based on the average severity, and limits the results to the top 3 threat actors." +}, { + "id": "1046", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the daily count and average severity of high vulnerabilities in North America?", + "sql_context": "CREATE TABLE vulnerabilities(id INT, date DATE, severity VARCHAR(10), country VARCHAR(50), cve_id VARCHAR(50)); INSERT INTO vulnerabilities(id, date, severity, country, cve_id) VALUES (1, \u00272021-01-01\u0027, \u0027medium\u0027, \u0027Canada\u0027, \u0027CVE-2021-0123\u0027), (2, \u00272021-01-02\u0027, \u0027high\u0027, \u0027USA\u0027, \u0027CVE-2021-0124\u0027);", + "sql": "SELECT date, COUNT(*) as total_vulnerabilities, AVG(severity \u003d \u0027high\u0027::int) as avg_high_severity FROM vulnerabilities WHERE country \u003d \u0027North America\u0027 GROUP BY date ORDER BY date;", + "sql_explanation": "This query calculates the number of vulnerabilities per day and the average severity of high vulnerabilities in North America. It uses the date and country columns to filter and group the vulnerabilities, and then counts them. The average severity of high vulnerabilities is calculated by converting the severity column to an integer value (high\u003d1, medium\u003d2, low\u003d3) and averaging the resulting values." +}, { + "id": "1070", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total duration of successful authentications for the user \u0027jane.doe\u0027.", + "sql_context": "CREATE TABLE AuthenticationLogs (id INT PRIMARY KEY, username VARCHAR(255), login_time TIMESTAMP, logout_time TIMESTAMP, authentication_status VARCHAR(50)); INSERT INTO AuthenticationLogs (id, username, login_time, logout_time, authentication_status) VALUES (1, \u0027jane.doe\u0027, \u00272021-03-15 11:00:00\u0027, \u00272021-03-15 12:00:00\u0027, \u0027Success\u0027), (2, \u0027john.doe\u0027, \u00272021-03-15 13:00:00\u0027, \u00272021-03-15 13:30:00\u0027, \u0027Failed\u0027);", + "sql": "SELECT username, SUM(TIMESTAMPDIFF(MINUTE, login_time, logout_time)) FROM AuthenticationLogs WHERE username \u003d \u0027jane.doe\u0027 AND authentication_status \u003d \u0027Success\u0027 GROUP BY username;", + "sql_explanation": "Calculate the total duration of successful authentications for the user \u0027jane.doe\u0027." +}, { + "id": "1124", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which threat intelligence sources reported the most threats in the last year?", + "sql_context": "CREATE TABLE threat_intelligence (id INT, source TEXT, threats INT, reported_date DATE); INSERT INTO threat_intelligence (id, source, threats, reported_date) VALUES (1, \u0027MI6\u0027, 50, \u00272021-02-01\u0027); INSERT INTO threat_intelligence (id, source, threats, reported_date) VALUES (2, \u0027CIA\u0027, 75, \u00272021-03-10\u0027); INSERT INTO threat_intelligence (id, source, threats, reported_date) VALUES (3, \u0027Mossad\u0027, 100, \u00272021-04-15\u0027);", + "sql": "SELECT source, SUM(threats) as total_threats FROM threat_intelligence WHERE reported_date \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 1 YEAR) GROUP BY source ORDER BY total_threats DESC;", + "sql_explanation": "This query lists the threat intelligence sources that reported the most threats in the last year by filtering the threat_intelligence table by reported_date and grouping the results by source, summing the threats, and ordering the results in descending order based on the total number of threats." +}, { + "id": "1146", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Compare the number of high and medium severity vulnerabilities found in the last quarter for web and desktop applications.", + "sql_context": "CREATE TABLE vulnerabilities (id INT, app_type VARCHAR(10), severity VARCHAR(10), timestamp TIMESTAMP);", + "sql": "SELECT app_type, severity, COUNT(*) as total FROM vulnerabilities WHERE severity IN (\u0027high\u0027, \u0027medium\u0027) AND timestamp \u003e\u003d NOW() - INTERVAL 3 MONTH GROUP BY app_type, severity;", + "sql_explanation": "This query calculates the number of high and medium severity vulnerabilities for web and desktop applications in the vulnerabilities table, filters for the last quarter, and groups them by application type and severity. The result provides a comparison of vulnerabilities for both app types and severity levels." +}, { + "id": "1149", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of security incidents and vulnerabilities in the \u0027database\u0027 asset type in the last quarter?", + "sql_context": "CREATE TABLE database_issues (id INT, asset_type VARCHAR(50), issue_count INT, issue_date DATE);", + "sql": "SELECT asset_type, SUM(issue_count) as total_issues FROM database_issues WHERE asset_type \u003d \u0027database\u0027 AND issue_date \u003e\u003d DATEADD(quarter, -1, GETDATE()) GROUP BY asset_type;", + "sql_explanation": "This query filters the database_issues table for the \u0027database\u0027 asset type and the last quarter. It then groups the results by asset type and sums up the issue count." +}, { + "id": "1225", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average time to resolve security incidents for each device type in the last month?", + "sql_context": "CREATE TABLE incident_device_resolution (id INT, incident_id INT, device_type VARCHAR(255), resolution_time INT, incident_date DATE); INSERT INTO incident_device_resolution (id, incident_id, device_type, resolution_time, incident_date) VALUES (1, 1, \u0027Laptop\u0027, 120, \u00272021-01-01\u0027), (2, 1, \u0027Mobile\u0027, 150, \u00272021-01-01\u0027), (3, 2, \u0027Desktop\u0027, 100, \u00272021-01-01\u0027), (4, 3, \u0027Laptop\u0027, 180, \u00272021-01-01\u0027), (5, 3, \u0027Server\u0027, 110, \u00272021-01-01\u0027), (6, 4, \u0027Mobile\u0027, 140, \u00272021-01-01\u0027), (7, 4, \u0027Tablet\u0027, 120, \u00272021-01-01\u0027), (8, 5, \u0027Server\u0027, 150, \u00272021-01-01\u0027);", + "sql": "SELECT device_type, AVG(resolution_time) as avg_resolution_time FROM incident_device_resolution WHERE incident_date \u003e\u003d DATEADD(day, -30, GETDATE()) GROUP BY device_type;", + "sql_explanation": "The SQL query calculates the average time to resolve security incidents for each device type in the last 30 days by grouping and averaging the resolution_time for each device_type. It then returns the device_type and average resolution time for each group." +}, { + "id": "1248", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average severity score of vulnerabilities for each product family in the last 6 months?", + "sql_context": "CREATE TABLE vulnerabilities (product_family VARCHAR(50), severity_score INT, vulnerability_date DATE); INSERT INTO vulnerabilities (product_family, severity_score, vulnerability_date) VALUES (\u0027Product Family A\u0027, 7, \u00272023-01-01\u0027), (\u0027Product Family B\u0027, 5, \u00272023-01-02\u0027), (\u0027Product Family C\u0027, 8, \u00272023-01-03\u0027);", + "sql": "SELECT product_family, AVG(severity_score) as avg_severity_score FROM vulnerabilities WHERE vulnerability_date \u003e\u003d DATEADD(month, -6, GETDATE()) GROUP BY product_family;", + "sql_explanation": "This SQL query calculates the average severity score of vulnerabilities for each product family in the last 6 months. It does this by filtering the vulnerabilities table for rows with dates within the past 6 months, grouping the results by product_family, and calculating the average of severity_score for each product family." +}, { + "id": "1280", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of security incidents and vulnerabilities in the \u0027cloud\u0027 asset type in the last quarter?", + "sql_context": "CREATE TABLE cloud_issues (id INT, asset_type VARCHAR(50), issue_count INT, issue_date DATE);", + "sql": "SELECT asset_type, SUM(issue_count) as total_issues FROM cloud_issues WHERE asset_type \u003d \u0027cloud\u0027 AND issue_date \u003e\u003d DATEADD(quarter, -1, GETDATE()) GROUP BY asset_type;", + "sql_explanation": "This query filters the cloud_issues table for the \u0027cloud\u0027 asset type and the last quarter. It then groups the results by asset type and sums up the issue count." +}, { + "id": "1455", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of security incidents for each severity level in the last month.", + "sql_context": "CREATE TABLE incident_severity (id INT, incident_count INT, severity VARCHAR(50), incident_date DATE); INSERT INTO incident_severity (id, incident_count, severity, incident_date) VALUES (1, 12, \u0027Low\u0027, \u00272022-03-01\u0027), (2, 20, \u0027Medium\u0027, \u00272022-03-02\u0027), (3, 30, \u0027High\u0027, \u00272022-03-03\u0027);", + "sql": "SELECT severity, SUM(incident_count) as total_incidents FROM incident_severity WHERE incident_date \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 1 MONTH) GROUP BY severity;", + "sql_explanation": "The SQL query calculates the total number of incidents for each severity level in the last month by grouping and summing the incident_count. It then orders the results by severity." +}, { + "id": "1483", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 3 countries with the highest number of security incidents in the past year?", + "sql_context": "CREATE TABLE incidents (incident_id INT PRIMARY KEY, incident_date TIMESTAMP, region VARCHAR(50));", + "sql": "SELECT region, COUNT(*) as incident_count FROM incidents WHERE incident_date \u003e\u003d NOW() - INTERVAL 1 YEAR GROUP BY region ORDER BY incident_count DESC LIMIT 3;", + "sql_explanation": "This query filters security incidents based on their \u0027incident_date\u0027 within the past year and groups them by \u0027region\u0027. Then, it counts the number of incidents per region and orders them in descending order. Finally, it returns the top 3 countries with the highest number of incidents." +}, { + "id": "1505", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all countries with more than 10 security incidents in the telecommunications sector in the last year", + "sql_context": "CREATE TABLE incidents (id INT, country VARCHAR(255), sector VARCHAR(255), date DATE); INSERT INTO incidents (id, country, sector, date) VALUES (1, \u0027USA\u0027, \u0027telecommunications\u0027, \u00272022-01-01\u0027); INSERT INTO incidents (id, country, sector, date) VALUES (2, \u0027Canada\u0027, \u0027telecommunications\u0027, \u00272022-01-02\u0027);", + "sql": "SELECT country FROM incidents WHERE sector \u003d \u0027telecommunications\u0027 AND date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) GROUP BY country HAVING COUNT(*) \u003e 10;", + "sql_explanation": "This query lists all countries with more than 10 security incidents in the telecommunications sector in the last year by filtering the incidents table based on the sector and date range, then grouping the results by country and returning only the countries with more than 10 incidents." +}, { + "id": "1594", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the most common threat types in each country?", + "sql_context": "CREATE TABLE security_incidents (id INT, incident_date DATE, country VARCHAR(50), threat_type VARCHAR(50)); INSERT INTO security_incidents (id, incident_date, country, threat_type) VALUES (1, \u00272022-01-01\u0027, \u0027USA\u0027, \u0027Malware\u0027), (2, \u00272022-01-05\u0027, \u0027Canada\u0027, \u0027Phishing\u0027), (3, \u00272022-01-10\u0027, \u0027Mexico\u0027, \u0027Ransomware\u0027);", + "sql": "SELECT country, threat_type, COUNT(*) as incident_count FROM security_incidents GROUP BY country, threat_type HAVING COUNT(*) \u003e 1 ORDER BY COUNT(*) DESC;", + "sql_explanation": "This query groups incidents by country and threat type and filters for rows with a count greater than 1. It then orders the results by the count of incidents in descending order." +}, { + "id": "1677", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of security incidents by type for the last year?", + "sql_context": "CREATE TABLE incident_types (incident_type_id INT, incident_type VARCHAR(255)); INSERT INTO incident_types (incident_type_id, incident_type) VALUES (1, \u0027Phishing\u0027), (2, \u0027Malware\u0027), (3, \u0027Ransomware\u0027), (4, \u0027DDoS\u0027), (5, \u0027Insider Threat\u0027), (6, \u0027Data Breach\u0027);", + "sql": "SELECT incident_type, COUNT(*) as incident_count FROM incidents WHERE incident_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) GROUP BY incident_type;", + "sql_explanation": "The SQL query selects the incident type and counts the number of incidents for each type from the \u0027incidents\u0027 table for the last year. It uses the GROUP BY clause to group the results by incident type." +}, { + "id": "1720", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which cybersecurity policies have been updated in the past week and the number of updates made to each policy?", + "sql_context": "CREATE TABLE policy_updates (id INT, policy_name VARCHAR(255), update_date DATE, updated_by VARCHAR(255), description TEXT); INSERT INTO policy_updates (id, policy_name, update_date, updated_by, description) VALUES (1, \u0027Policy_A\u0027, \u00272022-03-10\u0027, \u0027Admin_1\u0027, \u0027Description_1\u0027);", + "sql": "SELECT policy_name, COUNT(*) as updates_count FROM policy_updates WHERE update_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 WEEK) GROUP BY policy_name;", + "sql_explanation": "This query aggregates the number of updates made to each cybersecurity policy in the past week from the policy_updates table. It filters the rows where the update date is within the past week, groups the results by policy_name, and orders the results by the number of updates in ascending order." +}, { + "id": "1762", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of security incidents by type in the APAC region in the last month?", + "sql_context": "CREATE TABLE incident_types (id INT, incident_type VARCHAR(255), region VARCHAR(255), date DATE); INSERT INTO incident_types (id, incident_type, region, date) VALUES (1, \u0027Phishing\u0027, \u0027APAC\u0027, \u00272021-08-01\u0027); INSERT INTO incident_types (id, incident_type, region, date) VALUES (2, \u0027Malware\u0027, \u0027APAC\u0027, \u00272021-08-05\u0027); INSERT INTO incident_types (id, incident_type, region, date) VALUES (3, \u0027Data Breach\u0027, \u0027APAC\u0027, \u00272021-08-10\u0027);", + "sql": "SELECT incident_type, COUNT(*) as total FROM incident_types WHERE region \u003d \u0027APAC\u0027 AND date \u003e\u003d DATEADD(month, -1, GETDATE()) GROUP BY incident_type;", + "sql_explanation": "This SQL query selects the incident type and counts the occurrences of each type of incident in the APAC region in the last month. It then groups the results by incident type, providing a distribution of incidents by type." +}, { + "id": "1765", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many unique IP addresses have been associated with each threat category in the last week?", + "sql_context": "CREATE TABLE threats (id INT, category VARCHAR(50), ip_address VARCHAR(50), threat_date DATE); INSERT INTO threats (id, category, ip_address, threat_date) VALUES (1, \u0027Malware\u0027, \u0027192.168.0.1\u0027, \u00272022-01-01\u0027), (2, \u0027Phishing\u0027, \u0027192.168.0.2\u0027, \u00272022-01-03\u0027), (3, \u0027Ransomware\u0027, \u0027192.168.0.3\u0027, \u00272022-01-02\u0027);", + "sql": "SELECT category, COUNT(DISTINCT ip_address) as unique_ips FROM threats WHERE threat_date \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 1 WEEK) GROUP BY category;", + "sql_explanation": "This query finds the number of unique IP addresses associated with each threat category in the last week by grouping by category and counting the distinct ip_addresses." +}, { + "id": "2025", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of security incidents reported in the last quarter for each sector?", + "sql_context": "CREATE TABLE incidents (incident_id INT, incident_date DATE, incident_sector VARCHAR(255));", + "sql": "SELECT incident_sector, COUNT(*) as count FROM incidents WHERE incident_date \u003e\u003d DATEADD(quarter, -1, GETDATE()) GROUP BY incident_sector;", + "sql_explanation": "This query creates a table called \u0027incidents\u0027 that contains information about each security incident, including the date it was reported and the sector it belongs to. The SQL query then filters the results to only include incidents that were reported in the last quarter and groups the results by \u0027incident_sector\u0027. The query then counts the number of occurrences for each sector using the \u0027COUNT\u0027 function. This allows us to see the total number of security incidents reported in the last quarter for each sector." +}, { + "id": "2475", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of threat indicators in the \u0027threat_intelligence\u0027 table for each threat category and subcategory?", + "sql_context": "CREATE TABLE threat_intelligence (id INT PRIMARY KEY, threat_category TEXT, threat_subcategory TEXT, indicator TEXT);", + "sql": "SELECT threat_category, threat_subcategory, COUNT(*) FROM threat_intelligence GROUP BY threat_category, threat_subcategory;", + "sql_explanation": "This query selects the \u0027threat_category\u0027 and \u0027threat_subcategory\u0027 columns from the \u0027threat_intelligence\u0027 table and applies the aggregate function \u0027COUNT()\u0027 to count the number of indicators for each category and subcategory. The \u0027GROUP BY\u0027 clause groups the results by both columns." +}, { + "id": "2499", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum severity score of vulnerabilities for each organization in the last 6 months that have at least one vulnerability with a severity score of 9?", + "sql_context": "create table vulnerabilities (id int, organization varchar(255), severity int, date date); insert into vulnerabilities values (1, \u0027Google\u0027, 7, \u00272022-01-01\u0027); insert into vulnerabilities values (2, \u0027Google\u0027, 5, \u00272022-01-05\u0027); insert into vulnerabilities values (3, \u0027Apple\u0027, 8, \u00272022-01-10\u0027); insert into vulnerabilities values (4, \u0027Microsoft\u0027, 2, \u00272022-04-15\u0027); insert into vulnerabilities values (5, \u0027Microsoft\u0027, 9, \u00272022-07-01\u0027);", + "sql": "SELECT organization, MIN(severity) FROM vulnerabilities WHERE severity \u003d 9 AND date \u003e\u003d \u00272022-01-01\u0027 GROUP BY organization;", + "sql_explanation": "This SQL query calculates the minimum severity score of vulnerabilities for each organization in the last 6 months that have at least one vulnerability with a severity score of 9 by using the MIN function to find the minimum severity score for each organization, and the WHERE clause to filter the rows to the organizations that have at least one vulnerability with a severity score of 9. The GROUP BY clause is used to group the results by organization." +}, { + "id": "2555", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average severity level of vulnerabilities in each region?", + "sql_context": "CREATE TABLE vulnerabilities (id INT, name VARCHAR, severity VARCHAR, region VARCHAR); INSERT INTO vulnerabilities (id, name, severity, region) VALUES (1, \u0027SQL Injection\u0027, \u0027High\u0027, \u0027North America\u0027), (2, \u0027Cross-site Scripting\u0027, \u0027Medium\u0027, \u0027Europe\u0027), (3, \u0027Broken Authentication\u0027, \u0027High\u0027, \u0027Asia\u0027), (4, \u0027Remote Code Execution\u0027, \u0027Critical\u0027, \u0027South America\u0027), (5, \u0027Privilege Escalation\u0027, \u0027Medium\u0027, \u0027Africa\u0027);", + "sql": "SELECT region, AVG(severity \u003d \u0027High\u0027 OR severity \u003d \u0027Critical\u0027) AS average_severity FROM vulnerabilities GROUP BY region;", + "sql_explanation": "The SQL query selects the region column and calculates the average severity level for each region by using a case statement to count the number of high and critical severity vulnerabilities. The query then groups the results by region, which will return the average severity level of vulnerabilities in each region." +}, { + "id": "2724", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many high-severity vulnerabilities exist for each software product?", + "sql_context": "CREATE TABLE vulnerabilities (id INT, product VARCHAR(255), severity INT); INSERT INTO vulnerabilities (id, product, severity) VALUES (1, \u0027ProductA\u0027, 5), (2, \u0027ProductB\u0027, 9), (3, \u0027ProductA\u0027, 3);", + "sql": "SELECT product, COUNT(*) as high_severity_vulnerabilities FROM vulnerabilities WHERE severity \u003e\u003d 8 GROUP BY product;", + "sql_explanation": "Count the number of high-severity vulnerabilities for each software product by grouping by the product column and applying a condition on the severity column to filter vulnerabilities with a severity of 8 or greater, then counting the number of records." +}, { + "id": "2781", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of unique threat actors in each region and their associated threat categories.", + "sql_context": "CREATE TABLE threat_actors (id INT, actor_name TEXT, region TEXT, category TEXT); INSERT INTO threat_actors (id, actor_name, region, category) VALUES (1, \u0027Actor1\u0027, \u0027Europe\u0027, \u0027Malware\u0027), (2, \u0027Actor2\u0027, \u0027Asia\u0027, \u0027Phishing\u0027), (3, \u0027Actor3\u0027, \u0027Europe\u0027, \u0027Ransomware\u0027);", + "sql": "SELECT region, category, COUNT(DISTINCT actor_name) as unique_actors FROM threat_actors GROUP BY region, category;", + "sql_explanation": "This query groups the \u0027threat_actors\u0027 table by region and category, and counts the number of unique threat actors for each combination of region and category." +}, { + "id": "2940", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average severity score of vulnerabilities for each software product, ordered by the average severity score in ascending order?", + "sql_context": "CREATE TABLE vulnerabilities (id INT, product VARCHAR(50), severity FLOAT);", + "sql": "SELECT product, AVG(severity) as avg_severity FROM vulnerabilities GROUP BY product ORDER BY avg_severity ASC;", + "sql_explanation": "This query selects the product column and the average severity (aliased as avg_severity) from the vulnerabilities table. The GROUP BY clause groups the records by product. The ORDER BY clause orders the results by the average severity score in ascending order." +}, { + "id": "3155", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of security incidents by severity level for the healthcare sector?", + "sql_context": "CREATE TABLE incidents (incident_id INT, incident_severity VARCHAR(255), incident_sector VARCHAR(255)); CREATE VIEW healthcare_sector_incidents AS SELECT * FROM incidents WHERE incident_sector \u003d \u0027Healthcare\u0027;", + "sql": "SELECT incident_severity, COUNT(*) as count FROM healthcare_sector_incidents GROUP BY incident_severity;", + "sql_explanation": "This query creates two tables, \u0027incidents\u0027 and a view called \u0027healthcare_sector_incidents\u0027. The \u0027incidents\u0027 table contains information about each security incident, including the severity level and the sector it belongs to. The \u0027healthcare_sector_incidents\u0027 view filters the \u0027incidents\u0027 table to only include incidents in the healthcare sector. The SQL query then groups the results by \u0027incident_severity\u0027 and counts the number of occurrences for each severity level. This allows us to see the distribution of security incidents by severity level for the healthcare sector." +}, { + "id": "4409", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many security incidents were recorded in the \u0027security_incidents\u0027 table for each incident type?", + "sql_context": "CREATE TABLE security_incidents (id INT PRIMARY KEY, incident_name TEXT, incident_type TEXT, date_reported DATE);", + "sql": "SELECT incident_type, COUNT(*) FROM security_incidents GROUP BY incident_type;", + "sql_explanation": "This query selects the \u0027incident_type\u0027 column from the \u0027security_incidents\u0027 table and applies the aggregate function \u0027COUNT()\u0027 to count the number of incidents for each type. The \u0027GROUP BY\u0027 clause groups the results by incident type." +}, { + "id": "2464", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the top 3 countries by fair-trade coffee exports in 2020.", + "sql_context": "CREATE TABLE FairTradeCoffeeExports (id INT, country VARCHAR(50), year INT, value INT); INSERT INTO FairTradeCoffeeExports (id, country, year, value) VALUES (1, \u0027Colombia\u0027, 2019, 1000), (2, \u0027Colombia\u0027, 2020, 1500), (3, \u0027Peru\u0027, 2019, 800), (4, \u0027Peru\u0027, 2020, 1200), (5, \u0027Ecuador\u0027, 2019, 700), (6, \u0027Ecuador\u0027, 2020, 900), (7, \u0027Mexico\u0027, 2019, 600), (8, \u0027Mexico\u0027, 2020, 1100);", + "sql": "SELECT country, SUM(value) FROM FairTradeCoffeeExports WHERE year \u003d 2020 GROUP BY country ORDER BY SUM(value) DESC LIMIT 3;", + "sql_explanation": "This SQL query calculates the sum of the \u0027value\u0027 column for each unique value in the \u0027country\u0027 column for records where the \u0027year\u0027 column is 2020. It then orders the result set by the sum of \u0027value\u0027 in descending order and limits the result set to the top 3 records." +}, { + "id": "2792", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average calorie intake per meal for each country in the \u0027nutrition\u0027 table, ordered by average calorie intake in descending order, and limited to the top 2 countries?", + "sql_context": "CREATE TABLE nutrition (country VARCHAR(255), calories INT, meal_time TIME); INSERT INTO nutrition (country, calories, meal_time) VALUES (\u0027USA\u0027, 800, \u0027Breakfast\u0027), (\u0027USA\u0027, 1200, \u0027Lunch\u0027), (\u0027USA\u0027, 500, \u0027Dinner\u0027), (\u0027India\u0027, 500, \u0027Breakfast\u0027), (\u0027India\u0027, 700, \u0027Lunch\u0027), (\u0027India\u0027, 600, \u0027Dinner\u0027), (\u0027Canada\u0027, 600, \u0027Breakfast\u0027), (\u0027Canada\u0027, 800, \u0027Lunch\u0027), (\u0027Canada\u0027, 400, \u0027Dinner\u0027);", + "sql": "SELECT country, AVG(calories) as avg_calories FROM nutrition GROUP BY country ORDER BY avg_calories DESC LIMIT 2;", + "sql_explanation": "The SQL query calculates the average calories per meal for each country using the AVG function and groups them by country. The ORDER BY clause sorts the results in descending order, and the LIMIT clause returns only the top 2 countries with the highest average calorie intake." +}, { + "id": "2807", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify suppliers with a sustainability score below 70, and their average score.", + "sql_context": "CREATE TABLE suppliers (id INT, name VARCHAR(255), sustainability_score INT); INSERT INTO suppliers (id, name, sustainability_score) VALUES (1, \u0027Supplier A\u0027, 85), (2, \u0027Supplier B\u0027, 65), (3, \u0027Supplier C\u0027, 90), (4, \u0027Supplier D\u0027, 70);", + "sql": "SELECT AVG(sustainability_score) as avg_score, name FROM suppliers WHERE sustainability_score \u003c 70 GROUP BY name;", + "sql_explanation": "The SQL query selects the average sustainability_score and name for all suppliers with a score below 70. The GROUP BY statement groups the selected data by the name of each supplier." +}, { + "id": "3027", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which cuisine has the highest average calorie count?", + "sql_context": "CREATE TABLE Meals (MealID INT, MealName VARCHAR(50), Country VARCHAR(50), Calories INT); INSERT INTO Meals (MealID, MealName, Country, Calories) VALUES (1, \u0027Spaghetti Bolognese\u0027, \u0027Italy\u0027, 650), (2, \u0027Chicken Tikka Masala\u0027, \u0027UK\u0027, 850);", + "sql": "SELECT Country, AVG(Calories) as AvgCalories FROM Meals GROUP BY Country ORDER BY AvgCalories DESC LIMIT 1;", + "sql_explanation": "This query determines which cuisine has the highest average calorie count. It does this by selecting the Country and average Calories from the Meals table, groups the results by Country, orders them by the average Calories in descending order, and returns only the top result." +}, { + "id": "3270", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average calorie intake per person for each continent in 2021?", + "sql_context": "CREATE TABLE CountryFoodIntake (CountryName VARCHAR(50), Continent VARCHAR(50), Year INT, CaloriesPerPerson INT); INSERT INTO CountryFoodIntake (CountryName, Continent, Year, CaloriesPerPerson) VALUES (\u0027United States\u0027, \u0027North America\u0027, 2021, 3800), (\u0027Mexico\u0027, \u0027North America\u0027, 2021, 3400), (\u0027Italy\u0027, \u0027Europe\u0027, 2021, 3200), (\u0027Japan\u0027, \u0027Asia\u0027, 2021, 2800), (\u0027India\u0027, \u0027Asia\u0027, 2021, 2500);", + "sql": "SELECT Continent, AVG(CaloriesPerPerson) FROM CountryFoodIntake WHERE Year \u003d 2021 GROUP BY Continent;", + "sql_explanation": "The query calculates the average calorie intake per person for each continent in 2021 by grouping the data by continent and calculating the average calorie intake per person." +}, { + "id": "3527", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the number of times each dish has been ordered, sorted by the most ordered dish", + "sql_context": "CREATE TABLE orders (order_id INT, dish_id INT, order_date DATE); INSERT INTO orders (order_id, dish_id, order_date) VALUES (1, 1, \u00272022-01-01\u0027), (2, 2, \u00272022-01-01\u0027), (3, 3, \u00272022-01-01\u0027), (4, 1, \u00272022-01-02\u0027), (5, 3, \u00272022-01-02\u0027), (6, 1, \u00272022-01-03\u0027);", + "sql": "SELECT dish_id, COUNT(*) as order_count FROM orders GROUP BY dish_id ORDER BY order_count DESC;", + "sql_explanation": "This query displays the number of times each dish has been ordered, sorted by the most ordered dish. It does this by grouping the rows by the dish id and computing the count of these rows, and then ordering the rows by the count of these rows in descending order." +}, { + "id": "3921", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of gluten-free product sales by region?", + "sql_context": "CREATE TABLE sales (id INT, region TEXT, product_id INT, is_gluten_free BOOLEAN, revenue INT); INSERT INTO sales (id, region, product_id, is_gluten_free, revenue) VALUES (1, \u0027Northeast\u0027, 1, true, 200), (2, \u0027Northeast\u0027, 2, false, 150), (3, \u0027Southeast\u0027, 3, true, 250), (4, \u0027Southeast\u0027, 4, false, 180), (5, \u0027Midwest\u0027, 5, true, 180), (6, \u0027Midwest\u0027, 6, false, 120), (7, \u0027Southwest\u0027, 7, true, 300), (8, \u0027Southwest\u0027, 8, false, 200);", + "sql": "SELECT region, is_gluten_free, AVG(revenue) FROM sales GROUP BY region, is_gluten_free;", + "sql_explanation": "Group the sales table by region and is_gluten_free, then calculate the average revenue." +}, { + "id": "4631", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average calorie intake for meals served in \u0027vegan_restaurants\u0027 table, grouped by cuisine type?", + "sql_context": "CREATE TABLE vegan_restaurants (restaurant_id INT, cuisine VARCHAR(255), avg_calories DECIMAL(5,2));", + "sql": "SELECT cuisine, AVG(avg_calories) FROM vegan_restaurants GROUP BY cuisine;", + "sql_explanation": "The SQL query calculates the average calorie intake (AVG(avg_calories)) for each unique cuisine type (GROUP BY cuisine) in the \u0027vegan_restaurants\u0027 table." +}, { + "id": "1189", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many \u0027Vegan\u0027 and \u0027Gluten-free\u0027 menu items are offered by each vendor?", + "sql_context": "CREATE TABLE Menu (MenuID INT, Name VARCHAR(50), Type VARCHAR(50), VendorID INT); INSERT INTO Menu (MenuID, Name, Type, VendorID) VALUES (1, \u0027Veggie Burger\u0027, \u0027Vegan\u0027, 1), (2, \u0027Falafel Wrap\u0027, \u0027Vegan\u0027, 1), (3, \u0027Breadless Sandwich\u0027, \u0027Gluten-free\u0027, 2);", + "sql": "SELECT VendorID, COUNT(CASE WHEN Type \u003d \u0027Vegan\u0027 THEN 1 END) AS VeganCount, COUNT(CASE WHEN Type \u003d \u0027Gluten-free\u0027 THEN 1 END) AS GlutenFreeCount FROM Menu GROUP BY VendorID;", + "sql_explanation": "This query calculates the number of Vegan and Gluten-free menu items for each vendor by grouping the records by vendor and then counting the number of records for each menu type." +}, { + "id": "1511", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the daily revenue for the past week", + "sql_context": "CREATE TABLE sales (sale_id INT, sale_date DATE, dish_id INT, quantity INT, price DECIMAL(5,2)); INSERT INTO sales (sale_id, sale_date, dish_id, quantity, price) VALUES (1, \u00272022-01-01\u0027, 1, 2, 12.99), (2, \u00272022-01-01\u0027, 2, 1, 15.99), (3, \u00272022-01-02\u0027, 3, 3, 9.99);", + "sql": "SELECT SUM(quantity * price) as daily_revenue FROM sales WHERE sale_date BETWEEN DATE_SUB(CURRENT_DATE, INTERVAL 7 DAY) AND CURRENT_DATE GROUP BY sale_date;", + "sql_explanation": "This query calculates the daily revenue for the past week by summing the product of quantity and price for each sale. It then groups the result by sale_date, providing a daily revenue breakdown." +}, { + "id": "1970", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of organic chicken sold per store, ranked by the highest sales?", + "sql_context": "CREATE TABLE Stores (StoreID int, StoreName varchar(50)); INSERT INTO Stores VALUES (1, \u0027Store A\u0027), (2, \u0027Store B\u0027), (3, \u0027Store C\u0027); CREATE TABLE MenuItems (MenuItemID int, MenuItemName varchar(50), IsOrganic bit, StoreID int, QuantitySold int); INSERT INTO MenuItems VALUES (1, \u0027Organic Chicken\u0027, 1, 1, 200), (2, \u0027Organic Chicken\u0027, 1, 2, 350), (3, \u0027Organic Chicken\u0027, 1, 3, 150);", + "sql": "SELECT StoreID, SUM(QuantitySold) AS TotalQuantitySold FROM MenuItems WHERE IsOrganic \u003d 1 GROUP BY StoreID ORDER BY TotalQuantitySold DESC;", + "sql_explanation": "This query calculates the total quantity of organic chicken sold per store by summing the QuantitySold column and grouping by StoreID. The WHERE clause filters for organic items only. The results are ranked by the highest sales using ORDER BY." +}, { + "id": "2033", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of vegan ingredients used in each dish category?", + "sql_context": "CREATE TABLE dish(category VARCHAR(255), ingredient VARCHAR(255), quantity INT); INSERT INTO dish(category, ingredient, quantity) VALUES (\u0027Starter\u0027, \u0027Lettuce\u0027, 100), (\u0027Starter\u0027, \u0027Tofu\u0027, 150), (\u0027Main\u0027, \u0027Chickpeas\u0027, 200), (\u0027Main\u0027, \u0027Tofu\u0027, 250), (\u0027Side\u0027, \u0027Quinoa\u0027, 120), (\u0027Side\u0027, \u0027Lettuce\u0027, 180);", + "sql": "SELECT category, SUM(quantity) as total_vegan_quantity FROM dish WHERE ingredient IN (\u0027Lettuce\u0027, \u0027Tofu\u0027, \u0027Chickpeas\u0027) GROUP BY category;", + "sql_explanation": "This query calculates the total quantity of vegan ingredients for each dish category. It does so by filtering the ingredients to only include vegan options and then grouping by category while summing the quantity." +}, { + "id": "2266", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of food waste generated in the last month, categorized by ingredient type?", + "sql_context": "CREATE TABLE food_waste (id INT, waste_date DATE, ingredient_name TEXT, quantity INT);", + "sql": "SELECT ingredient_name, SUM(quantity) FROM food_waste WHERE waste_date \u003e\u003d DATE(NOW()) - INTERVAL 1 MONTH GROUP BY ingredient_name;", + "sql_explanation": "The SQL query filters the food_waste table to only include records from the last month and then groups the results by ingredient_name. Finally, it calculates the total quantity of food waste generated for each ingredient type in the result." +}, { + "id": "2645", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the most popular vegetarian dish in the dinner category?", + "sql_context": "CREATE TABLE menu (item_id INT, name TEXT, category TEXT, is_vegetarian BOOLEAN, price FLOAT); INSERT INTO menu (item_id, name, category, is_vegetarian, price) VALUES (1, \u0027Chickpea Curry\u0027, \u0027Lunch\u0027, true, 10.5), (2, \u0027Chicken Tikka Masala\u0027, \u0027Lunch\u0027, false, 13.0), (3, \u0027Quinoa Salad\u0027, \u0027Starters\u0027, true, 7.5), (4, \u0027Eggplant Parmigiana\u0027, \u0027Dinner\u0027, true, 12.0);", + "sql": "SELECT name FROM menu WHERE is_vegetarian \u003d true AND category \u003d \u0027Dinner\u0027 GROUP BY name ORDER BY COUNT(*) DESC LIMIT 1;", + "sql_explanation": "The SQL query selects the name from the menu table, filters for vegetarian dishes in the dinner category, groups the results by name, orders them by count, and returns the name of the most popular vegetarian dish in the dinner category." +}, { + "id": "3183", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the revenue generated by each menu category in the APAC region.", + "sql_context": "CREATE TABLE menu_items (item_id INT, item_name VARCHAR(50), category VARCHAR(20), region VARCHAR(20), price DECIMAL(5,2), sales INT); INSERT INTO menu_items (item_id, item_name, category, region, price, sales) VALUES (1, \u0027Vegetable Spring Rolls\u0027, \u0027Appetizers\u0027, \u0027APAC\u0027, 4.99, 300), (2, \u0027Spicy Tofu\u0027, \u0027Entrees\u0027, \u0027APAC\u0027, 12.99, 200), (3, \u0027Mango Sticky Rice\u0027, \u0027Desserts\u0027, \u0027APAC\u0027, 6.99, 250);", + "sql": "SELECT category, SUM(price * sales) AS revenue FROM menu_items WHERE region \u003d \u0027APAC\u0027 GROUP BY category;", + "sql_explanation": "The SQL query calculates the revenue for each menu category in the APAC region by summing the product of the price and sales columns for each group." +}, { + "id": "3436", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the most popular dish in the dinner category?", + "sql_context": "CREATE TABLE menu (item_id INT, name TEXT, category TEXT, is_vegetarian BOOLEAN, price FLOAT, popularity INT); INSERT INTO menu (item_id, name, category, is_vegetarian, price, popularity) VALUES (1, \u0027Chickpea Curry\u0027, \u0027Lunch\u0027, true, 10.5, 50), (2, \u0027Chicken Tikka Masala\u0027, \u0027Lunch\u0027, false, 13.0, 75), (3, \u0027Quinoa Salad\u0027, \u0027Starters\u0027, true, 7.5, 100), (4, \u0027Eggplant Parmigiana\u0027, \u0027Dinner\u0027, true, 12.0, 120), (5, \u0027Bruschetta\u0027, \u0027Starters\u0027, true, 6.0, 80);", + "sql": "SELECT name, MAX(popularity) as max_popularity FROM menu WHERE category \u003d \u0027Dinner\u0027 GROUP BY name;", + "sql_explanation": "The SQL query filters for dishes in the dinner category, calculates the maximum popularity for each dish, and returns the name of the most popular dish in the dinner category." +}, { + "id": "4068", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many units of each menu item are currently in inventory?", + "sql_context": "CREATE TABLE inventory (menu_item VARCHAR(255), quantity INT); INSERT INTO inventory (menu_item, quantity) VALUES (\u0027Burger Buns\u0027, 1200); INSERT INTO inventory (menu_item, quantity) VALUES (\u0027Lettuce\u0027, 2500);", + "sql": "SELECT menu_item, SUM(quantity) as total_quantity FROM inventory GROUP BY menu_item;", + "sql_explanation": "The SQL query selects the menu_item and the sum of quantity (total_quantity) from the inventory table. Then, it groups the result by menu_item." +}, { + "id": "4557", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the number of unique customers by location.", + "sql_context": "CREATE TABLE orders (id INT, customer_id INT, location TEXT); INSERT INTO orders (id, customer_id, location) VALUES (1, 1001, \u0027San Francisco\u0027), (2, 1002, \u0027New York\u0027), (3, 1003, \u0027Chicago\u0027), (4, 1001, \u0027Los Angeles\u0027), (5, 1004, \u0027Austin\u0027), (6, 1005, \u0027Seattle\u0027), (7, 1002, \u0027Miami\u0027), (8, 1001, \u0027Boston\u0027);", + "sql": "SELECT location, COUNT(DISTINCT customer_id) FROM orders GROUP BY location;", + "sql_explanation": "This query groups the orders table by location and calculates the number of unique customers for each location." +}, { + "id": "205", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of recyclable and non-recyclable materials for each category, and the percentage of recyclable materials for each category?", + "sql_context": "CREATE TABLE materials (id INT PRIMARY KEY, name VARCHAR(255), category VARCHAR(255), recyclable BOOLEAN); INSERT INTO materials (id, name, category, recyclable) VALUES (1, \u0027Plastic\u0027, \u0027Packaging\u0027, FALSE), (2, \u0027Aluminum\u0027, \u0027Metals\u0027, TRUE), (3, \u0027Glass\u0027, \u0027Packaging\u0027, TRUE), (4, \u0027Paper\u0027, \u0027Paper\u0027, TRUE);", + "sql": "SELECT category, SUM(CASE WHEN recyclable THEN 1 ELSE 0 END) as recyclable_count, SUM(CASE WHEN NOT recyclable THEN 1 ELSE 0 END) as non_recyclable_count, 100.0 * AVG(CASE WHEN recyclable THEN 1.0 ELSE 0.0 END) as recyclable_percentage FROM materials GROUP BY category;", + "sql_explanation": "This query groups the materials by category, counts the number of recyclable and non-recyclable materials for each category, and calculates the percentage of recyclable materials for each category." +}, { + "id": "2218", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which suppliers provide materials exclusively for the renewable energy sector?", + "sql_context": "CREATE TABLE supplier_data (supplier_name VARCHAR(50), material VARCHAR(50), industry VARCHAR(50)); INSERT INTO supplier_data (supplier_name, material, industry) VALUES (\u0027SupplierH\u0027, \u0027Silicon\u0027, \u0027Renewable\u0027), (\u0027SupplierI\u0027, \u0027Nickel\u0027, \u0027Renewable\u0027), (\u0027SupplierJ\u0027, \u0027Lithium\u0027, \u0027Renewable\u0027), (\u0027SupplierK\u0027, \u0027Steel\u0027, \u0027Automotive\u0027), (\u0027SupplierL\u0027, \u0027Plastic\u0027, \u0027Consumer Goods\u0027);", + "sql": "SELECT supplier_name FROM supplier_data GROUP BY supplier_name HAVING COUNT(DISTINCT industry) \u003d 1 AND MIN(industry) \u003d \u0027Renewable\u0027;", + "sql_explanation": "This query uses the GROUP BY clause to group the results by supplier_name and the HAVING clause to filter rows based on aggregate values. The COUNT(DISTINCT industry) function checks if the supplier provides materials for only one industry, and the MIN function checks if the industry value is \u0027Renewable\u0027." +}, { + "id": "2373", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total number of safety incidents reported in each factory for 2022.", + "sql_context": "CREATE TABLE safety_incidents (id INT, factory_id INT, incident_type VARCHAR(50), date DATE); INSERT INTO safety_incidents (id, factory_id, incident_type, date) VALUES (1, 1, \u0027Chemical Spill\u0027, \u00272022-01-01\u0027), (2, 2, \u0027Equipment Malfunction\u0027, \u00272022-03-01\u0027), (3, 1, \u0027Power Outage\u0027, \u00272022-04-01\u0027);", + "sql": "SELECT factory_id, COUNT(*) as num_incidents FROM safety_incidents WHERE DATE_FORMAT(date, \u0027%Y\u0027) \u003d \u00272022\u0027 GROUP BY factory_id;", + "sql_explanation": "This SQL query extracts the year from the date column and groups the results by this value. It then counts the number of safety incidents reported in each factory for 2022." +}, { + "id": "2410", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of patents filed in the renewable energy sector, by technology, for the past 5 years?", + "sql_context": "CREATE TABLE renewable_patents (year INT, technology VARCHAR(50), patents NUMERIC); INSERT INTO renewable_patents (year, technology, patents) VALUES (2017, \u0027Solar\u0027, 1000), (2017, \u0027Wind\u0027, 800), (2017, \u0027Hydro\u0027, 600), (2018, \u0027Solar\u0027, 1200), (2018, \u0027Wind\u0027, 900), (2018, \u0027Hydro\u0027, 700), (2019, \u0027Solar\u0027, 1400), (2019, \u0027Wind\u0027, 1100), (2019, \u0027Hydro\u0027, 900), (2020, \u0027Solar\u0027, 1600), (2020, \u0027Wind\u0027, 1300), (2020, \u0027Hydro\u0027, 1100), (2021, \u0027Solar\u0027, 1800), (2021, \u0027Wind\u0027, 1500), (2021, \u0027Hydro\u0027, 1300);", + "sql": "SELECT technology, SUM(patents) as total_patents FROM renewable_patents WHERE year BETWEEN 2017 AND 2021 GROUP BY technology;", + "sql_explanation": "This query calculates the total number of patents filed in the renewable energy sector, by technology, for the past 5 years by summing the patents for each technology and year within the specified range." +}, { + "id": "2438", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average lead time for each product category in the \u0027ethical_manufacturing\u0027 schema?", + "sql_context": "CREATE TABLE ethical_manufacturing.lead_time (product_category VARCHAR(50), lead_time INT); INSERT INTO ethical_manufacturing.lead_time (product_category, lead_time) VALUES (\u0027Category A\u0027, 30), (\u0027Category A\u0027, 45), (\u0027Category B\u0027, 60), (\u0027Category B\u0027, 75);", + "sql": "SELECT product_category, AVG(lead_time) as average_lead_time FROM ethical_manufacturing.lead_time GROUP BY product_category;", + "sql_explanation": "This query calculates the average lead time for each product category by grouping the \u0027lead_time\u0027 table by the \u0027product_category\u0027 column and computing the average of the \u0027lead_time\u0027 column for each group." +}, { + "id": "2853", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average time to complete a production cycle for each machine?", + "sql_context": "CREATE TABLE machines(id INT, name TEXT, location TEXT);CREATE TABLE cycles(id INT, machine_id INT, start_time TIMESTAMP, end_time TIMESTAMP);INSERT INTO machines(id, name, location) VALUES (1, \u0027Machine A\u0027, \u0027Location A\u0027), (2, \u0027Machine B\u0027, \u0027Location B\u0027); INSERT INTO cycles(id, machine_id, start_time, end_time) VALUES (1, 1, \u00272021-02-01 09:00:00\u0027, \u00272021-02-01 10:00:00\u0027), (2, 1, \u00272021-02-01 11:00:00\u0027, \u00272021-02-01 12:00:00\u0027), (3, 2, \u00272021-02-01 08:00:00\u0027, \u00272021-02-01 09:30:00\u0027);", + "sql": "SELECT machine_id, AVG(TIMESTAMPDIFF(MINUTE, start_time, end_time)) as avg_time FROM cycles GROUP BY machine_id;", + "sql_explanation": "The query calculates the difference between start_time and end_time for each cycle, and then groups the result by machine_id to calculate the average time to complete a production cycle for each machine." +}, { + "id": "3109", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average salary of engineers in the automotive industry?", + "sql_context": "CREATE TABLE Employees (id INT, industry VARCHAR(255), job_title VARCHAR(255), salary DECIMAL(10, 2)); INSERT INTO Employees (id, industry, job_title, salary) VALUES (1, \u0027Automotive\u0027, \u0027Engineer\u0027, 75000.00), (2, \u0027Automotive\u0027, \u0027Engineer\u0027, 80000.00), (3, \u0027Aerospace\u0027, \u0027Engineer\u0027, 90000.00);", + "sql": "SELECT industry, AVG(salary) as avg_salary FROM Employees WHERE job_title \u003d \u0027Engineer\u0027 GROUP BY industry;", + "sql_explanation": "This SQL query calculates the average salary of engineers in the automotive industry. It uses the GROUP BY clause to group the records by industry and the AVG function to calculate the average salary for each group." +}, { + "id": "3376", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of companies that have adopted industry 4.0 technologies in each region?", + "sql_context": "CREATE TABLE companies (id INT, name TEXT, region TEXT, industry_4_0 BOOLEAN); INSERT INTO companies (id, name, region, industry_4_0) VALUES (1, \u0027Smart Manufacturing\u0027, \u0027North America\u0027, TRUE); INSERT INTO companies (id, name, region, industry_4_0) VALUES (2, \u0027Automated Assembly\u0027, \u0027Europe\u0027, FALSE); INSERT INTO companies (id, name, region, industry_4_0) VALUES (3, \u0027Digital Production\u0027, \u0027Asia\u0027, TRUE); INSERT INTO companies (id, name, region, industry_4_0) VALUES (4, \u0027Robotic R\u0026D\u0027, \u0027Africa\u0027, FALSE); INSERT INTO companies (id, name, region, industry_4_0) VALUES (5, \u0027Connected Construction\u0027, \u0027Australia\u0027, TRUE); INSERT INTO companies (id, name, region, industry_4_0) VALUES (6, \u0027Intelligent Industry\u0027, \u0027South America\u0027, FALSE); INSERT INTO companies (id, name, region, industry_4_0) VALUES (7, \u0027Autonomous Automotive\u0027, \u0027North America\u0027, TRUE); INSERT INTO companies (id, name, region, industry_4_0) VALUES (8, \u0027Cybernetic Creations\u0027, \u0027Europe\u0027, TRUE);", + "sql": "SELECT region, COUNT(*) AS company_count FROM companies WHERE industry_4_0 \u003d TRUE GROUP BY region;", + "sql_explanation": "The SQL query calculates the total number of companies that have adopted industry 4.0 technologies in each region by filtering the \u0027companies\u0027 table for companies with industry 4.0 technologies, grouping the results by region, and then applying the COUNT function to the results." +}, { + "id": "3478", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average reduction percentage for each waste reduction initiative?", + "sql_context": "CREATE TABLE waste_reduction (id INT PRIMARY KEY, initiative_name VARCHAR(255), reduction_percentage DECIMAL(5,2), start_date DATE, end_date DATE); INSERT INTO waste_reduction (id, initiative_name, reduction_percentage, start_date, end_date) VALUES (1, \u0027Waste Sorting\u0027, 12.5, \u00272020-01-01\u0027, \u00272020-12-31\u0027);", + "sql": "SELECT initiative_name, AVG(reduction_percentage) FROM waste_reduction GROUP BY initiative_name;", + "sql_explanation": "The SQL query calculates the average \u0027reduction_percentage\u0027 for each \u0027initiative_name\u0027, effectively determining the average reduction percentage for each waste reduction initiative." +}, { + "id": "3574", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue for each product line in the \u0027finance\u0027 schema?", + "sql_context": "CREATE TABLE finance.revenue (product_line VARCHAR(50), month INT, year INT, revenue DECIMAL(10,2)); INSERT INTO finance.revenue (product_line, month, year, revenue) VALUES (\u0027Product Line A\u0027, 1, 2022, 12000.00), (\u0027Product Line A\u0027, 2, 2022, 24000.00), (\u0027Product Line B\u0027, 1, 2022, 18000.00), (\u0027Product Line B\u0027, 2, 2022, 30000.00);", + "sql": "SELECT product_line, SUM(revenue) as total_revenue FROM finance.revenue GROUP BY product_line;", + "sql_explanation": "This query calculates the total revenue for each product line by grouping the \u0027revenue\u0027 table by the \u0027product_line\u0027 column and computing the sum of the \u0027revenue\u0027 column for each group." +}, { + "id": "3693", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of workers in the aerospace industry by country?", + "sql_context": "CREATE TABLE worker_details (id INT, name VARCHAR(50), country VARCHAR(50), industry VARCHAR(50), age INT);", + "sql": "SELECT country, AVG(age) FROM worker_details WHERE industry \u003d \u0027aerospace\u0027 GROUP BY country;", + "sql_explanation": "The SQL query groups the records by country and calculates the average age of workers in the aerospace industry for each group." +}, { + "id": "3713", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total profit for each product line in the \u0027finance\u0027 schema?", + "sql_context": "CREATE TABLE finance.profit (product_line VARCHAR(50), month INT, year INT, profit DECIMAL(10,2)); INSERT INTO finance.profit (product_line, month, year, profit) VALUES (\u0027Product Line A\u0027, 1, 2022, 2000.00), (\u0027Product Line A\u0027, 2, 2022, 4000.00), (\u0027Product Line B\u0027, 1, 2022, 3000.00), (\u0027Product Line B\u0027, 2, 2022, 5000.00);", + "sql": "SELECT product_line, SUM(profit) as total_profit FROM finance.profit GROUP BY product_line;", + "sql_explanation": "This query calculates the total profit for each product line by grouping the \u0027profit\u0027 table by the \u0027product_line\u0027 column and computing the sum of the \u0027profit\u0027 column for each group." +}, { + "id": "4379", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total waste generated by each department?", + "sql_context": "CREATE TABLE factories (factory_id INT, department VARCHAR(20), waste_generated_kg INT); INSERT INTO factories VALUES (1, \u0027textiles\u0027, 500), (2, \u0027metalwork\u0027, 300), (3, \u0027textiles\u0027, 700), (4, \u0027electronics\u0027, 400), (5, \u0027textiles\u0027, 600);", + "sql": "SELECT department, SUM(waste_generated_kg) FROM factories GROUP BY department;", + "sql_explanation": "The SQL query groups the results by the \u0027department\u0027 column and calculates the sum of the \u0027waste_generated_kg\u0027 column for each group." +}, { + "id": "557", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of patients who have received treatment in each facility and year in the patient_treatment_timeline table, and what is the percentage of patients who have recovered?", + "sql_context": "CREATE TABLE patient_treatment_timeline (patient_id INT, facility_id INT, treatment_year INT, received_treatment BOOLEAN, recovered BOOLEAN);", + "sql": "SELECT facility_id, treatment_year, COUNT(*) AS total_patients, 100.0 * SUM(recovered AND received_treatment) / COUNT(*) AS recovery_percentage FROM patient_treatment_timeline GROUP BY facility_id, treatment_year;", + "sql_explanation": "The SQL query calculates the total number of patients who have received treatment in each facility and year in the patient_treatment_timeline table, and what is the percentage of patients who have recovered. It does this by using the COUNT function to calculate the total number of patients, and the SUM function to calculate the number of patients who have recovered, grouped by the facility_id and treatment_year columns. The percentage is calculated by dividing the number of recovered patients by the total number of patients, and multiplying by 100.0." +}, { + "id": "996", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the most common alternative therapy among female patients in India?", + "sql_context": "CREATE TABLE patients (id INT, age INT, gender TEXT, country TEXT, alternative_therapy TEXT); INSERT INTO patients (id, age, gender, country, alternative_therapy) VALUES (1, 35, \u0027Female\u0027, \u0027India\u0027, \u0027Meditation\u0027); INSERT INTO patients (id, age, gender, country, alternative_therapy) VALUES (2, 42, \u0027Male\u0027, \u0027Australia\u0027, \u0027Yes\u0027);", + "sql": "SELECT alternative_therapy, COUNT(*) AS count FROM patients WHERE patients.gender \u003d \u0027Female\u0027 AND patients.country \u003d \u0027India\u0027 GROUP BY alternative_therapy ORDER BY count DESC LIMIT 1;", + "sql_explanation": "This query retrieves the most common alternative therapy among female patients in India by filtering the patients table for rows with the gender of female and the country of India, grouping by alternative_therapy, and sorting in descending order by count. The query then returns the top row, which represents the most common alternative therapy." +}, { + "id": "1010", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which gender participated the most in depression-related campaigns since 2010?", + "sql_context": "CREATE TABLE campaigns (campaign_id INT, campaign_name TEXT, launch_date DATE, country TEXT, target_demographic TEXT); INSERT INTO campaigns (campaign_id, campaign_name, launch_date, country, target_demographic) VALUES (1, \u0027Campaign A\u0027, \u00272010-01-01\u0027, \u0027India\u0027, \u0027Depression - Female\u0027); INSERT INTO campaigns (campaign_id, campaign_name, launch_date, country, target_demographic) VALUES (2, \u0027Campaign B\u0027, \u00272011-05-15\u0027, \u0027India\u0027, \u0027Depression - Male\u0027);", + "sql": "SELECT target_demographic, COUNT(*) AS campaigns_per_gender FROM campaigns WHERE launch_date \u003e\u003d \u00272010-01-01\u0027 AND target_demographic LIKE \u0027%Depression%\u0027 GROUP BY target_demographic;", + "sql_explanation": "This query identifies which gender participated the most in depression-related campaigns since 2010. It groups the campaigns by target_demographic and filters for campaigns launched since 2010 with a target demographic related to depression. Then, it counts the number of campaigns for each gender-related target_demographic." +}, { + "id": "1391", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average improvement percentage for patients who have shown improvement for each mental health condition by gender.", + "sql_context": "CREATE TABLE patient_outcomes (id INT PRIMARY KEY, patient_id INT, condition_id INT, improvement_percentage FLOAT, gender VARCHAR(255));", + "sql": "SELECT condition_id, AVG(improvement_percentage) AS avg_improvement, gender FROM patient_outcomes WHERE improvement_percentage \u003e 0 GROUP BY condition_id, gender;", + "sql_explanation": "This query calculates the average improvement percentage for patients who have shown improvement (greater than 0%) for each mental health condition, grouped by gender in the \u0027patient_outcomes\u0027 table." +}, { + "id": "2194", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many patients were diagnosed with a mental health condition per month, in 2022?", + "sql_context": "CREATE TABLE patients (id INT, name VARCHAR(50), diagnosis_date DATE); INSERT INTO patients (id, name, diagnosis_date) VALUES (1, \u0027Ella Johnson\u0027, \u00272022-02-15\u0027); INSERT INTO patients (id, name, diagnosis_date) VALUES (2, \u0027Fiona Chen\u0027, \u00272022-03-20\u0027); CREATE TABLE diagnoses (id INT, patient_id INT, condition VARCHAR(50), date DATE); INSERT INTO diagnoses (id, patient_id, condition, date) VALUES (1, 1, \u0027Anxiety\u0027, \u00272022-01-15\u0027); INSERT INTO diagnoses (id, patient_id, condition, date) VALUES (2, 2, \u0027Depression\u0027, \u00272022-02-25\u0027);", + "sql": "SELECT DATE_FORMAT(date, \u0027%Y-%m\u0027) as Month, COUNT(DISTINCT patient_id) as num_patients FROM diagnoses GROUP BY Month ORDER BY Month;", + "sql_explanation": "Counts the number of distinct patients diagnosed with a mental health condition each month in 2022." +}, { + "id": "4387", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many patients received treatment in each location?", + "sql_context": "CREATE TABLE patients (id INT, name VARCHAR(50), location VARCHAR(50), treatment VARCHAR(50)); CREATE TABLE treatments (treatment VARCHAR(50), cost INT);", + "sql": "SELECT p.location, COUNT(DISTINCT p.name) FROM patients p GROUP BY p.location;", + "sql_explanation": "The query groups patients by their location, then counts the number of unique patients for each location, returning a table with each unique location and its respective patient count." +}, { + "id": "4818", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total cost of all mental health conditions treated in 2021?", + "sql_context": "CREATE TABLE treatments (treatment_id INT, year INT, cost DECIMAL(10,2), condition VARCHAR(30)); INSERT INTO treatments (treatment_id, year, cost, condition) VALUES (1, 2021, 500.00, \u0027Anxiety\u0027), (2, 2022, 600.00, \u0027Depression\u0027), (3, 2021, 700.00, \u0027Anxiety\u0027);", + "sql": "SELECT SUM(cost) FROM treatments WHERE year \u003d 2021 GROUP BY condition;", + "sql_explanation": "This query calculates the total cost of all mental health conditions treated in 2021 by filtering the treatments table where the year column is equal to 2021, grouping the results by the condition column, and then summing the cost column for each group." +}, { + "id": "618", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of patients who have received the flu vaccine, grouped by age and gender, in the past year?", + "sql_context": "CREATE TABLE patients (id INT, age INT, gender TEXT, last_vaccination_date DATE); INSERT INTO patients (id, age, gender, last_vaccination_date) VALUES (1, 25, \u0027Female\u0027, \u00272022-02-01\u0027), (2, 45, \u0027Male\u0027, \u00272022-03-15\u0027), (3, 32, \u0027Female\u0027, \u00272022-04-20\u0027), (4, 50, \u0027Male\u0027, \u00272022-05-05\u0027), (5, 60, \u0027Female\u0027, \u00272022-06-10\u0027);", + "sql": "SELECT EXTRACT(YEAR FROM age) as age, gender, COUNT(*) as num_patients FROM patients WHERE last_vaccination_date \u003e\u003d DATEADD(year, -1, CURRENT_DATE) AND vaccine \u003d \u0027Flu\u0027 GROUP BY EXTRACT(YEAR FROM age), gender;", + "sql_explanation": "The SQL query first creates the patients table with relevant columns and data. It then extracts the year from the age column and groups the result by age and gender, counting the number of patients who have received the flu vaccine in the past year." +}, { + "id": "820", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of primary care physicians per capita in rural areas, ordered by the highest average?", + "sql_context": "CREATE TABLE physicians (id INT, name TEXT, specialty TEXT, location TEXT, population INT); INSERT INTO physicians (id, name, specialty, location, population) VALUES (1, \u0027Dr. Smith\u0027, \u0027Primary Care\u0027, \u0027rural\u0027, 5000), (2, \u0027Dr. Johnson\u0027, \u0027Primary Care\u0027, \u0027urban\u0027, 20000);", + "sql": "SELECT AVG(population / NULLIF(specialty \u003d \u0027Primary Care\u0027, 0)) FROM physicians WHERE location \u003d \u0027rural\u0027 GROUP BY location ORDER BY AVG(population / NULLIF(specialty \u003d \u0027Primary Care\u0027, 0)) DESC;", + "sql_explanation": "Calculates the average number of primary care physicians per capita in rural areas and orders the results in descending order." +}, { + "id": "1029", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many cases of Malaria and Tuberculosis were reported in African countries in 2020?", + "sql_context": "CREATE TABLE public.infectious_diseases (id SERIAL PRIMARY KEY, country TEXT, disease TEXT, year INT, cases INT); INSERT INTO public.infectious_diseases (country, disease, year, cases) VALUES (\u0027Nigeria\u0027, \u0027Malaria\u0027, 2020, 12345), (\u0027South Africa\u0027, \u0027Tuberculosis\u0027, 2020, 2345), (\u0027Kenya\u0027, \u0027Malaria\u0027, 2020, 3456);", + "sql": "SELECT country, disease, SUM(cases) FILTER (WHERE disease IN (\u0027Malaria\u0027, \u0027Tuberculosis\u0027) AND year \u003d 2020) AS total_cases FROM public.infectious_diseases GROUP BY country, disease;", + "sql_explanation": "This query aggregates the number of Malaria and Tuberculosis cases in African countries in 2020, then groups the result by \u0027country\u0027 and \u0027disease\u0027." +}, { + "id": "1063", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many cases of Influenza and COVID-19 were reported in the US and Canada in 2021?", + "sql_context": "CREATE TABLE public.infectious_diseases (id SERIAL PRIMARY KEY, country TEXT, disease TEXT, year INT, cases INT); INSERT INTO public.infectious_diseases (country, disease, year, cases) VALUES (\u0027United States\u0027, \u0027Influenza\u0027, 2021, 12345), (\u0027Canada\u0027, \u0027COVID-19\u0027, 2021, 2345), (\u0027United States\u0027, \u0027COVID-19\u0027, 2021, 543);", + "sql": "SELECT country, disease, SUM(cases) FILTER (WHERE disease IN (\u0027Influenza\u0027, \u0027COVID-19\u0027) AND year \u003d 2021) AS total_cases FROM public.infectious_diseases GROUP BY country, disease;", + "sql_explanation": "This query aggregates the number of Influenza and COVID-19 cases in the US and Canada in 2021, then groups the result by \u0027country\u0027 and \u0027disease\u0027." +}, { + "id": "1283", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the difference in the number of patients seen by each primary care physician in the state of New York?", + "sql_context": "CREATE TABLE public.physicians (id SERIAL PRIMARY KEY, name TEXT); INSERT INTO public.physicians (name) VALUES (\u0027Dr. Smith\u0027), (\u0027Dr. Johnson\u0027); CREATE TABLE public.patient_visits (id SERIAL PRIMARY KEY, physician TEXT, visit_date DATE); INSERT INTO public.patient_visits (physician, visit_date) VALUES (\u0027Dr. Smith\u0027, \u00272022-01-01\u0027), (\u0027Dr. Johnson\u0027, \u00272022-01-02\u0027), (\u0027Dr. Smith\u0027, \u00272022-01-03\u0027);", + "sql": "SELECT physician, COUNT(*) FILTER (WHERE visit_date \u003c \u00272022-02-01\u0027) - COUNT(*) FILTER (WHERE visit_date \u003e\u003d \u00272022-02-01\u0027) FROM public.patient_visits GROUP BY physician;", + "sql_explanation": "The SQL query groups the rows in the patient_visits table by the physician column and then calculates the difference between the number of visits before and after February 1, 2022 using the COUNT function with the FILTER clause." +}, { + "id": "1329", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of patients by age range and gender?", + "sql_context": "CREATE TABLE patients (id INT, name VARCHAR(50), age INT, gender VARCHAR(10)); INSERT INTO patients (id, name, age, gender) VALUES (1, \u0027John Doe\u0027, 45, \u0027Male\u0027), (2, \u0027Jane Smith\u0027, 35, \u0027Female\u0027), (3, \u0027Alice Johnson\u0027, 50, \u0027Female\u0027);", + "sql": "SELECT CASE WHEN age \u003c 30 THEN \u0027Under 30\u0027 WHEN age \u003c 50 THEN \u002730-49\u0027 ELSE \u002750 and over\u0027 END AS age_range, gender, COUNT(*) FROM patients GROUP BY age_range, gender;", + "sql_explanation": "This SQL query calculates the total number of patients by age range and gender. It uses the CASE statement to create age ranges based on the patient\u0027s age. The GROUP BY clause is used to group patients by their age range and gender, and the COUNT(*) function is used to count the number of patients in each group." +}, { + "id": "1753", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of healthcare facilities by type and patients served, for facilities serving over 7000 patients?", + "sql_context": "CREATE TABLE public.healthcare_access (id SERIAL PRIMARY KEY, state TEXT, city TEXT, facility_type TEXT, patients_served INT, rating INT); INSERT INTO public.healthcare_access (state, city, facility_type, patients_served, rating) VALUES (\u0027California\u0027, \u0027San Francisco\u0027, \u0027Urgent Care\u0027, 6000, 6), (\u0027New York\u0027, \u0027New York City\u0027, \u0027Hospital\u0027, 15000, 9), (\u0027California\u0027, \u0027Los Angeles\u0027, \u0027Clinic\u0027, 7500, 7);", + "sql": "SELECT facility_type, patients_served, COUNT(*) FROM public.healthcare_access WHERE patients_served \u003e 7000 GROUP BY facility_type, patients_served;", + "sql_explanation": "This query calculates the count of healthcare facilities by type and patients served, for facilities serving over 7000 patients, and groups the result by \u0027facility_type\u0027 and \u0027patients_served\u0027." +}, { + "id": "1796", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many patients have been diagnosed with Malaria in the last 3 months in each state?", + "sql_context": "CREATE TABLE Patients (ID INT, Disease VARCHAR(20), DiagnosisDate DATE, State VARCHAR(20)); INSERT INTO Patients (ID, Disease, DiagnosisDate, State) VALUES (1, \u0027Malaria\u0027, \u00272022-01-01\u0027, \u0027California\u0027), (2, \u0027Malaria\u0027, \u00272022-03-15\u0027, \u0027California\u0027);", + "sql": "SELECT State, COUNT(*) AS CountPerState FROM Patients WHERE Disease \u003d \u0027Malaria\u0027 AND DiagnosisDate \u003e\u003d DATEADD(MONTH, -3, GETDATE()) GROUP BY State;", + "sql_explanation": "This query calculates the number of patients diagnosed with Malaria in the last 3 months in each state by filtering the Patients table based on the Disease and DiagnosisDate columns using the WHERE clause, and then grouping the table by State columns using the GROUP BY clause, and counting the number of rows in each group using the COUNT() function." +}, { + "id": "1852", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of flu cases for each age group in Florida in 2019?", + "sql_context": "CREATE TABLE flu_cases (id INT, age INT, location TEXT, year INT); INSERT INTO flu_cases (id, age, location, year) VALUES (1, 5, \u0027Florida\u0027, 2019); INSERT INTO flu_cases (id, age, location, year) VALUES (2, 25, \u0027Florida\u0027, 2018); INSERT INTO flu_cases (id, age, location, year) VALUES (3, 65, \u0027Florida\u0027, 2019);", + "sql": "SELECT flu_cases.age, COUNT(flu_cases.id) FROM flu_cases WHERE flu_cases.location \u003d \u0027Florida\u0027 AND flu_cases.year \u003d 2019 GROUP BY flu_cases.age;", + "sql_explanation": "This query finds the number of flu cases for each age group in Florida in 2019. It does so by selecting the age and counting the number of cases for each age group in the flu_cases table, and then filtering for Florida and 2019." +}, { + "id": "2044", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the obesity rate among children in each state?", + "sql_context": "CREATE TABLE ObesityData (State VARCHAR(50), AgeGroup VARCHAR(20), Population INT, ObesePopulation INT); INSERT INTO ObesityData (State, AgeGroup, Population, ObesePopulation) VALUES (\u0027California\u0027, \u0027Children\u0027, 6000000, 850000), (\u0027Texas\u0027, \u0027Children\u0027, 5500000, 1000000);", + "sql": "SELECT State, (SUM(ObesePopulation) / SUM(Population)) * 100 AS ObesityRate FROM ObesityData WHERE AgeGroup \u003d \u0027Children\u0027 GROUP BY State;", + "sql_explanation": "This query calculates the obesity rate among children in each state by summing the obese population and population for the children age group, then dividing the obese population by the total population and multiplying by 100. It groups the data by state." +}, { + "id": "2177", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of the population that has access to clean water in each continent?", + "sql_context": "CREATE TABLE water_data (id INT, country VARCHAR(50), continent CHAR(2), access_to_clean_water BOOLEAN); INSERT INTO water_data VALUES (1, \u0027Canada\u0027, \u0027NA\u0027, true), (2, \u0027Mexico\u0027, \u0027NA\u0027, false), (3, \u0027Brazil\u0027, \u0027SA\u0027, true), (4, \u0027Russia\u0027, \u0027EU\u0027, true);", + "sql": "SELECT continent, COUNT(*) FILTER (WHERE access_to_clean_water) * 100.0 / COUNT(*) AS percentage FROM water_data GROUP BY continent;", + "sql_explanation": "This query groups the water_data table by continent and calculates the percentage of the population with access to clean water in each continent." +}, { + "id": "2228", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the obesity rate by ethnicity in Illinois in 2020?", + "sql_context": "CREATE TABLE health_survey_2 (id INT, ethnicity TEXT, state TEXT, year INT, obese BOOLEAN); INSERT INTO health_survey_2 (id, ethnicity, state, year, obese) VALUES (1, \u0027Hispanic\u0027, \u0027Illinois\u0027, 2020, true);", + "sql": "SELECT ethnicity, AVG(obese::INT) as obesity_rate FROM health_survey_2 WHERE state \u003d \u0027Illinois\u0027 AND year \u003d 2020 GROUP BY ethnicity;", + "sql_explanation": "This query calculates the obesity rate by ethnicity in Illinois in 2020." +}, { + "id": "2241", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of TB cases reported in each province of Canada, by year?", + "sql_context": "CREATE TABLE tb_cases (id INT, patient_id INT, report_date DATE, province VARCHAR(255), is_active BOOLEAN);", + "sql": "SELECT YEAR(report_date) AS year, province, COUNT(*) AS num_tb_cases FROM tb_cases WHERE is_active \u003d TRUE GROUP BY year, province;", + "sql_explanation": "The query creates a table \u0027tb_cases\u0027 and calculates the number of TB cases reported in each province of Canada, by year. The query groups the result by year and province to get the number of TB cases for each year and province." +}, { + "id": "2304", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average number of hospital beds per state in California and Texas.", + "sql_context": "CREATE TABLE state_hospitals (state VARCHAR(50), hospital_beds INT); INSERT INTO state_hospitals (state, hospital_beds) VALUES (\u0027California\u0027, 75000), (\u0027Texas\u0027, 85000);", + "sql": "SELECT state, AVG(hospital_beds) AS avg_hospital_beds FROM state_hospitals WHERE state IN (\u0027California\u0027, \u0027Texas\u0027) GROUP BY state;", + "sql_explanation": "This SQL query calculates the average number of hospital beds in California and Texas by using the AVG function and filtering the states using the WHERE clause." +}, { + "id": "2594", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of clinics and average clinic capacity for each province?", + "sql_context": "CREATE TABLE ProvinceClinics (ProvinceName VARCHAR(50), NumClinics INT, AvgCapacity INT); INSERT INTO ProvinceClinics (ProvinceName, NumClinics, AvgCapacity) VALUES (\u0027Ontario\u0027, 4, 225), (\u0027Quebec\u0027, 3, 140), (\u0027British Columbia\u0027, 3, 187.5);", + "sql": "SELECT ProvinceName, COUNT(*) AS NumClinics, AVG(AvgCapacity) AS AvgCapacity FROM ProvinceClinics GROUP BY ProvinceName", + "sql_explanation": "Calculate the number of clinics and average clinic capacity for each province by using the COUNT() and AVG() aggregate functions. Group the result set by the ProvinceName column." +}, { + "id": "2814", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of clinics providing mental health services, by state?", + "sql_context": "CREATE TABLE clinics (state VARCHAR(2), num_clinics INT); INSERT INTO clinics (state, num_clinics) VALUES (\u0027NY\u0027, 250), (\u0027NJ\u0027, 180), (\u0027CA\u0027, 400), (\u0027FL\u0027, 320), (\u0027TX\u0027, 360);", + "sql": "SELECT state, SUM(num_clinics) as total_clinics FROM clinics WHERE service_type \u003d \u0027mental health\u0027 GROUP BY state;", + "sql_explanation": "The SQL query calculates the total number of mental health clinics in each state by summing the num_clinics column for each state where the service_type is \u0027mental health\u0027. The result is grouped by state." +}, { + "id": "3135", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of cases for each infectious disease?", + "sql_context": "CREATE TABLE infectious_diseases (disease_id INT, report_date DATE, disease_name VARCHAR(255), number_of_cases INT); INSERT INTO infectious_diseases (disease_id, report_date, disease_name, number_of_cases) VALUES (1, \u00272020-01-01\u0027, \u0027Flu\u0027, 50), (2, \u00272020-02-15\u0027, \u0027Measles\u0027, 30), (3, \u00272021-06-20\u0027, \u0027COVID-19\u0027, 100);", + "sql": "SELECT disease_name, SUM(number_of_cases) as total_cases FROM infectious_diseases GROUP BY disease_name;", + "sql_explanation": "This query groups the data by disease name, and then sums the number of cases for each disease." +}, { + "id": "3179", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average life expectancy in each country for people with a college degree?", + "sql_context": "CREATE TABLE life_expectancy (country VARCHAR(255), education VARCHAR(255), life_expectancy INT); INSERT INTO life_expectancy (country, education, life_expectancy) VALUES (\u0027US\u0027, \u0027College\u0027, 80), (\u0027US\u0027, \u0027High School\u0027, 75), (\u0027Canada\u0027, \u0027College\u0027, 82), (\u0027Canada\u0027, \u0027High School\u0027, 78);", + "sql": "SELECT country, AVG(life_expectancy) FROM life_expectancy WHERE education \u003d \u0027College\u0027 GROUP BY country;", + "sql_explanation": "The SQL query groups the rows in the life_expectancy table by country and calculates the average life expectancy for people with a college degree in each group." +}, { + "id": "3452", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average life expectancy in each region of Nigeria?", + "sql_context": "CREATE TABLE NigerianRegions (Region VARCHAR(50), Population INT, LifeExpectancy INT); INSERT INTO NigerianRegions (Region, Population, LifeExpectancy) VALUES (\u0027North\u0027, 20000000, 55), (\u0027South\u0027, 25000000, 60), (\u0027East\u0027, 22000000, 58), (\u0027West\u0027, 18000000, 57);", + "sql": "SELECT Region, AVG(LifeExpectancy) AS AverageLifeExpectancy FROM NigerianRegions GROUP BY Region;", + "sql_explanation": "This SQL query calculates the average life expectancy in each region of Nigeria by grouping the NigerianRegions table by the Region column and applying the AVG function to calculate the average life expectancy." +}, { + "id": "3530", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the percentage of the population without health insurance, by age group.", + "sql_context": "CREATE TABLE insurance (id INT, age_group INT, insured BOOLEAN);", + "sql": "SELECT i.age_group, AVG(i.insured) AS avg_insurance_rate FROM insurance i GROUP BY i.age_group;", + "sql_explanation": "This query groups the \u0027insurance\u0027 table by age group, and calculates the average insurance rate for each group. It returns the age group and average insurance rate for each group." +}, { + "id": "3590", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the hospital with the minimum number of beds in each state?", + "sql_context": "CREATE TABLE hospitals (id INT, name TEXT, city TEXT, state TEXT, beds INT); INSERT INTO hospitals (id, name, city, state, beds) VALUES (1, \u0027General Hospital\u0027, \u0027Miami\u0027, \u0027Florida\u0027, 500); INSERT INTO hospitals (id, name, city, state, beds) VALUES (2, \u0027Memorial Hospital\u0027, \u0027Boston\u0027, \u0027Massachusetts\u0027, 600); INSERT INTO hospitals (id, name, city, state, beds) VALUES (3, \u0027Community Hospital\u0027, \u0027Los Angeles\u0027, \u0027California\u0027, 400);", + "sql": "SELECT state, MIN(beds) as min_beds, name as min_beds_hospital FROM hospitals GROUP BY state;", + "sql_explanation": "This query groups hospitals by state and identifies the hospital with the minimum number of beds in each state." +}, { + "id": "3630", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the number of male and female patients diagnosed with any infectious disease in Seattle.", + "sql_context": "CREATE TABLE Genders (GenderID INT, Age INT, Gender VARCHAR(10), City VARCHAR(20), Disease VARCHAR(20)); INSERT INTO Genders (GenderID, Age, Gender, City, Disease) VALUES (1, 50, \u0027Female\u0027, \u0027Seattle\u0027, \u0027Measles\u0027);", + "sql": "SELECT Gender, COUNT(*) as PatientCount FROM Genders WHERE City \u003d \u0027Seattle\u0027 GROUP BY Gender;", + "sql_explanation": "This query calculates the number of male and female patients diagnosed with any infectious disease in Seattle by grouping data by Gender and City columns, counting the number of patients for each gender." +}, { + "id": "4106", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of primary care physicians per 100,000 residents in each county?", + "sql_context": "CREATE TABLE Counties (CountyID INT, CountyName TEXT, State TEXT, Physicians INT, Population INT); INSERT INTO Counties (CountyID, CountyName, State, Physicians, Population) VALUES (1, \u0027Los Angeles County\u0027, \u0027California\u0027, 4500, 10000000);", + "sql": "SELECT State, AVG(Physicians * 100000.0 / Population) FROM Counties GROUP BY State;", + "sql_explanation": "This query calculates the average number of primary care physicians per 100,000 residents in each county. It does so by selecting the \u0027State\u0027 column and the product of the \u0027Physicians\u0027 column and 100000.0, divided by the \u0027Population\u0027 column, grouped by the \u0027State\u0027 column." +}, { + "id": "4162", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many flu vaccinations were administered in each country last year?", + "sql_context": "CREATE TABLE vaccinations (id INT, country VARCHAR(50), year INT, administered INT); INSERT INTO vaccinations (id, country, year, administered) VALUES (1, \u0027United States\u0027, 2021, 120000000), (2, \u0027Canada\u0027, 2021, 10000000), (3, \u0027Mexico\u0027, 2021, 15000000);", + "sql": "SELECT country, administered FROM vaccinations WHERE year \u003d 2021 GROUP BY country;", + "sql_explanation": "This SQL query filters the vaccinations table to only include rows with a year value of 2021. It then groups the results by the country column and calculates the administered value within each group." +}, { + "id": "4189", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of patients with Malaria in each country in Africa?", + "sql_context": "CREATE TABLE patients (id INT, name TEXT, age INT, disease TEXT, country TEXT); INSERT INTO patients (id, name, age, disease, country) VALUES (1, \u0027John Doe\u0027, 35, \u0027Malaria\u0027, \u0027Kenya\u0027), (2, \u0027Jane Smith\u0027, 42, \u0027Malaria\u0027, \u0027Tanzania\u0027), (3, \u0027Bob Johnson\u0027, 50, \u0027Malaria\u0027, \u0027Tanzania\u0027), (4, \u0027Alice Williams\u0027, 60, \u0027Malaria\u0027, \u0027Kenya\u0027), (5, \u0027Eli Jones\u0027, 25, \u0027Typhoid\u0027, \u0027Kenya\u0027), (6, \u0027Mia Lee\u0027, 30, \u0027Malaria\u0027, \u0027Tanzania\u0027);", + "sql": "SELECT country, AVG(age) FROM patients WHERE disease \u003d \u0027Malaria\u0027 GROUP BY country;", + "sql_explanation": "This query calculates the average age of patients with Malaria in each country in Africa by selecting the \u0027country\u0027 and \u0027age\u0027 columns from the \u0027patients\u0027 table where the \u0027disease\u0027 column is equal to \u0027Malaria\u0027 and then grouping the results by the \u0027country\u0027 column. The query then calculates the average of the \u0027age\u0027 column for each group." +}, { + "id": "4221", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the obesity rate in Asia by country?", + "sql_context": "CREATE TABLE asia (country VARCHAR(50), obesity_rate DECIMAL(3,1)); INSERT INTO asia (country, obesity_rate) VALUES (\u0027China\u0027, 4.8), (\u0027India\u0027, 3.3), (\u0027Indonesia\u0027, 6.9);", + "sql": "SELECT country, AVG(obesity_rate) as avg_obesity_rate FROM asia GROUP BY country;", + "sql_explanation": "The SQL query calculates the average obesity rate for each country in Asia by grouping the \u0027asia\u0027 table by the \u0027country\u0027 column and computing the average \u0027obesity_rate\u0027 for each group." +}, { + "id": "4741", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the unique infectious diseases reported in each region?", + "sql_context": "CREATE TABLE InfectiousDiseases (Id INT, Disease TEXT, Region TEXT, Date DATE); INSERT INTO InfectiousDiseases (Id, Disease, Region, Date) VALUES (1, \u0027Measles\u0027, \u0027Region A\u0027, \u00272022-01-01\u0027); INSERT INTO InfectiousDiseases (Id, Disease, Region, Date) VALUES (2, \u0027Mumps\u0027, \u0027Region A\u0027, \u00272022-01-02\u0027); INSERT INTO InfectiousDiseases (Id, Disease, Region, Date) VALUES (3, \u0027Influenza\u0027, \u0027Region B\u0027, \u00272022-01-03\u0027); INSERT INTO InfectiousDiseases (Id, Disease, Region, Date) VALUES (4, \u0027Tuberculosis\u0027, \u0027Region C\u0027, \u00272022-01-04\u0027);", + "sql": "SELECT Region, Disease FROM InfectiousDiseases GROUP BY Region, Disease;", + "sql_explanation": "The query groups the records by region and disease, displaying each region and the unique diseases reported there." +}, { + "id": "4938", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of mental health facilities in each state?", + "sql_context": "CREATE TABLE mental_health_facilities (name VARCHAR(255), state VARCHAR(255), num_beds INT); INSERT INTO mental_health_facilities (name, state, num_beds) VALUES (\u0027Facility A\u0027, \u0027NY\u0027, 100), (\u0027Facility B\u0027, \u0027CA\u0027, 150), (\u0027Facility C\u0027, \u0027TX\u0027, 200);", + "sql": "SELECT state, COUNT(*) FROM mental_health_facilities GROUP BY state;", + "sql_explanation": "The SQL query groups the rows in the mental_health_facilities table by state and counts the number of rows in each group." +}, { + "id": "5023", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of beds in hospitals by state?", + "sql_context": "CREATE TABLE hospitals (id INT, name TEXT, city TEXT, state TEXT, beds INT); INSERT INTO hospitals (id, name, city, state, beds) VALUES (1, \u0027General Hospital\u0027, \u0027Miami\u0027, \u0027Florida\u0027, 500); INSERT INTO hospitals (id, name, city, state, beds) VALUES (2, \u0027Memorial Hospital\u0027, \u0027Boston\u0027, \u0027Massachusetts\u0027, 600);", + "sql": "SELECT state, AVG(beds) as avg_beds FROM hospitals GROUP BY state;", + "sql_explanation": "This query groups hospitals by state and calculates the average number of beds in hospitals in each state." +}, { + "id": "5035", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of hospital beds in each country in the world?", + "sql_context": "CREATE TABLE hospital_beds (id INT, country TEXT, num_beds INT); INSERT INTO hospital_beds (id, country, num_beds) VALUES (1, \u0027United States\u0027, 900000), (2, \u0027Mexico\u0027, 250000), (3, \u0027Canada\u0027, 150000), (4, \u0027Brazil\u0027, 500000), (5, \u0027Australia\u0027, 100000), (6, \u0027Russia\u0027, 1200000), (7, \u0027China\u0027, 4000000), (8, \u0027India\u0027, 2000000), (9, \u0027Germany\u0027, 800000), (10, \u0027France\u0027, 700000);", + "sql": "SELECT country, SUM(num_beds) FROM hospital_beds GROUP BY country;", + "sql_explanation": "This query calculates the total number of hospital beds in each country in the world by grouping the data by country and summing the number of beds for each country." +}, { + "id": "5242", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of COVID-19 cases in each country?", + "sql_context": "CREATE TABLE COVIDCountry (Country VARCHAR(50), Cases INT); INSERT INTO COVIDCountry (Country, Cases) VALUES (\u0027Canada\u0027, 20000), (\u0027USA\u0027, 30000), (\u0027Mexico\u0027, 15000);", + "sql": "SELECT Country, SUM(Cases) FROM COVIDCountry GROUP BY Country;", + "sql_explanation": "This query calculates the total number of COVID-19 cases in each country in the COVIDCountry table. It does this by using the SUM function on the Cases column and grouping the results by the Country column." +}, { + "id": "5632", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of hospital beds in each state?", + "sql_context": "CREATE TABLE Beds (State VARCHAR(50), Beds INT); INSERT INTO Beds (State, Beds) VALUES (\u0027California\u0027, 100000), (\u0027Texas\u0027, 85000), (\u0027New York\u0027, 70000);", + "sql": "SELECT State, SUM(Beds) FROM Beds GROUP BY State;", + "sql_explanation": "This query calculates the total number of hospital beds in each state in the Beds table. It does this by using the SUM function on the Beds column and grouping the results by the State column." +}, { + "id": "286", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average community policing score by region and month for the past year, ranked in descending order.", + "sql_context": "CREATE TABLE Regions (id INT, region_name VARCHAR(255)); CREATE TABLE CommunityPolicing (id INT, region_id INT, policing_score INT, policing_date DATE); INSERT INTO Regions (id, region_name) VALUES (1, \u0027North\u0027), (2, \u0027South\u0027), (3, \u0027East\u0027), (4, \u0027West\u0027); INSERT INTO CommunityPolicing (id, region_id, policing_score, policing_date) VALUES (1, 1, 85, \u00272021-02-01\u0027), (2, 2, 88, \u00272021-03-01\u0027), (3, 3, 92, \u00272021-01-01\u0027), (4, 4, 78, \u00272021-04-01\u0027);", + "sql": "SELECT region_id, AVG(policing_score) as avg_score, EXTRACT(MONTH FROM policing_date) as month FROM CommunityPolicing WHERE policing_date BETWEEN DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) AND CURRENT_DATE GROUP BY region_id, month ORDER BY avg_score DESC;", + "sql_explanation": "This query calculates the average community policing score by region and month for the past year by grouping the CommunityPolicing table by region_id and month and filtering the data with a WHERE clause to only include records within the past year. It then orders the result set in descending order by the average score." +}, { + "id": "752", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of emergency calls in the \"east\" region that were responded to within 5 minutes in the last month?", + "sql_context": "CREATE TABLE EmergencyCalls (id INT, region VARCHAR(20), response_time INT, date DATE);", + "sql": "SELECT region, 100.0 * AVG(CASE WHEN response_time \u003c\u003d 5 THEN 1 ELSE 0 END) as percentage FROM EmergencyCalls WHERE region \u003d \u0027east\u0027 AND date \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 1 MONTH) GROUP BY region;", + "sql_explanation": "The SQL query calculates the percentage of emergency calls in the \"east\" region that were responded to within 5 minutes in the last month by selecting the AVG function on the CASE statement that checks if the response_time column is less than or equal to 5. The filtered EmergencyCalls table is grouped by the region column and the result is multiplied by 100.0 to get the percentage." +}, { + "id": "1287", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of emergency calls in the city of Chicago for each month of the year 2021?", + "sql_context": "CREATE TABLE emergency_calls (id INT, city VARCHAR(20), call_date DATE); INSERT INTO emergency_calls (id, city, call_date) VALUES (1, \u0027Chicago\u0027, \u00272021-01-01\u0027), (2, \u0027Chicago\u0027, \u00272021-02-01\u0027), (3, \u0027Chicago\u0027, \u00272021-03-01\u0027);", + "sql": "SELECT EXTRACT(MONTH FROM call_date) as month, COUNT(*) FROM emergency_calls WHERE city \u003d \u0027Chicago\u0027 AND call_date BETWEEN \u00272021-01-01\u0027 AND \u00272021-12-31\u0027 GROUP BY month;", + "sql_explanation": "The SQL query calculates the total number of emergency calls in the city of Chicago for each month of the year 2021 by using the COUNT function and grouping the data by the month of the call_date, filtering the data for the city of Chicago and the date range of the year 2021, and extracting the month from the call_date." +}, { + "id": "1362", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many community policing events were held in each district in the last year, ordered by the highest number of events?", + "sql_context": "CREATE TABLE CommunityPolicing (id INT, district VARCHAR(20), event_date DATE);", + "sql": "SELECT district, COUNT(*) as num_events FROM CommunityPolicing WHERE event_date \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 1 YEAR) GROUP BY district ORDER BY num_events DESC;", + "sql_explanation": "The SQL query calculates the number of community policing events in each district in the last year by selecting the COUNT(*) function on the filtered CommunityPolicing table. The filtering is done by event_date column greater than or equal to the current date minus one year. The result is ordered in descending order by the num_events column." +}, { + "id": "1816", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many \u0027Assault\u0027 and \u0027Robbery\u0027 crimes were reported in each district for 2021, from the \u0027CrimeStats\u0027 table?", + "sql_context": "CREATE TABLE CrimeStats (district VARCHAR(20), crimeType VARCHAR(20), year INT, number INT);", + "sql": "SELECT district, crimeType, SUM(number) FROM CrimeStats WHERE (crimeType IN (\u0027Assault\u0027, \u0027Robbery\u0027) AND year \u003d 2021) GROUP BY district, crimeType;", + "sql_explanation": "The query filters the \u0027CrimeStats\u0027 table based on the \u0027crimeType\u0027 and \u0027year\u0027 conditions. It then calculates the sum of the \u0027number\u0027 column values for each district and crimeType using the GROUP BY clause." +}, { + "id": "2201", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of emergency incidents reported by each community policing center, broken down by incident type?", + "sql_context": "CREATE TABLE community_policing_centers (id INT, center_name TEXT); INSERT INTO community_policing_centers (id, center_name) VALUES (1, \u0027Center A\u0027), (2, \u0027Center B\u0027), (3, \u0027Center C\u0027); CREATE TABLE emergency_incidents (id INT, center_id INT, incident_type TEXT, incident_count INT); INSERT INTO emergency_incidents (id, center_id, incident_type, incident_count) VALUES (1, 1, \u0027Fire\u0027, 30), (2, 1, \u0027Medical\u0027, 40), (3, 2, \u0027Fire\u0027, 50), (4, 2, \u0027Medical\u0027, 60), (5, 3, \u0027Fire\u0027, 20), (6, 3, \u0027Medical\u0027, 15);", + "sql": "SELECT center_id, incident_type, SUM(incident_count) AS total_incidents FROM emergency_incidents GROUP BY center_id, incident_type;", + "sql_explanation": "The SQL query groups the data by center_id and incident_type and calculates the sum of incident_count for each group." +}, { + "id": "2318", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the incidents with the longest and shortest response times for each incident?", + "sql_context": "CREATE TABLE emergency_responses (id INT, incident_id INT, responder_id INT, response_time TIMESTAMP); INSERT INTO emergency_responses (id, incident_id, responder_id, response_time) VALUES (1, 1, 10, \u00272021-01-01 10:15:00\u0027); INSERT INTO emergency_responses (id, incident_id, responder_id, response_time) VALUES (2, 2, 20, \u00272021-01-02 10:16:00\u0027);", + "sql": "SELECT incident_id, MIN(response_time) as shortest, MAX(response_time) as longest FROM emergency_responses GROUP BY incident_id;", + "sql_explanation": "This query calculates the shortest and longest response time for each incident by using the MIN and MAX functions on the response_time column, grouped by incident_id." +}, { + "id": "2519", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which community policing sectors have not had any traffic violations in the last month?", + "sql_context": "CREATE TABLE traffic_violations (id INT, sector VARCHAR(50), date DATE); INSERT INTO traffic_violations (id, sector, date) VALUES (1, \u0027Sector A\u0027, \u00272022-01-01\u0027), (2, \u0027Sector B\u0027, \u00272022-02-01\u0027);", + "sql": "SELECT sector FROM traffic_violations WHERE date \u003e\u003d DATEADD(month, -1, CURRENT_DATE) GROUP BY sector HAVING COUNT(*) \u003d 0;", + "sql_explanation": "The SQL query filters the traffic violations table to only include rows with a date within the last month. It then groups the violations by sector and uses the HAVING clause to only return sectors with zero violations." +}, { + "id": "2541", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which sectors have had zero emergency incidents in the last week?", + "sql_context": "CREATE TABLE emergency_incidents (id INT, sector VARCHAR(50), date DATE); INSERT INTO emergency_incidents (id, sector, date) VALUES (1, \u0027Sector A\u0027, \u00272022-02-01\u0027), (2, \u0027Sector B\u0027, \u00272022-02-02\u0027), (3, \u0027Sector A\u0027, \u00272022-02-03\u0027);", + "sql": "SELECT sector FROM emergency_incidents WHERE date \u003e\u003d DATEADD(week, -1, CURRENT_DATE) GROUP BY sector HAVING COUNT(*) \u003d 0;", + "sql_explanation": "The SQL query filters the emergency incidents table to only include rows with a date within the last week. It then groups the incidents by sector and uses the HAVING clause to only return sectors with zero incidents." +}, { + "id": "2750", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average response time for each community policing program?", + "sql_context": "CREATE TABLE community_policing (id INT PRIMARY KEY, program_name VARCHAR(50), start_date DATE, end_date DATE, average_response_time INT);", + "sql": "SELECT program_name, AVG(average_response_time) as avg_response_time FROM community_policing GROUP BY program_name;", + "sql_explanation": "The query calculates the average response time for each community policing program by grouping the records by \u0027program_name\u0027 and finding the average \u0027average_response_time\u0027." +}, { + "id": "2760", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many crimes were reported by category in each borough of New York City?", + "sql_context": "CREATE TABLE nyc_boroughs (id INT, borough TEXT); INSERT INTO nyc_boroughs (id, borough) VALUES (1, \u0027Manhattan\u0027), (2, \u0027Brooklyn\u0027), (3, \u0027Bronx\u0027), (4, \u0027Queens\u0027), (5, \u0027Staten Island\u0027); CREATE TABLE crimes_reported_nyc (id INT, borough TEXT, crime_type TEXT, crime_count INT); INSERT INTO crimes_reported_nyc (id, borough, crime_type, crime_count) VALUES (1, \u0027Manhattan\u0027, \u0027Murder\u0027, 10), (2, \u0027Manhattan\u0027, \u0027Robbery\u0027, 20), (3, \u0027Brooklyn\u0027, \u0027Murder\u0027, 15), (4, \u0027Brooklyn\u0027, \u0027Robbery\u0027, 30), (5, \u0027Bronx\u0027, \u0027Murder\u0027, 20), (6, \u0027Bronx\u0027, \u0027Robbery\u0027, 40), (7, \u0027Queens\u0027, \u0027Murder\u0027, 10), (8, \u0027Queens\u0027, \u0027Robbery\u0027, 20), (9, \u0027Staten Island\u0027, \u0027Murder\u0027, 5), (10, \u0027Staten Island\u0027, \u0027Robbery\u0027, 15);", + "sql": "SELECT borough, crime_type, SUM(crime_count) AS total_crimes FROM crimes_reported_nyc GROUP BY borough, crime_type;", + "sql_explanation": "The SQL query groups the data by borough and crime_type and calculates the sum of crime_count for each group." +}, { + "id": "2982", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many emergency medical incidents were there in each borough?", + "sql_context": "CREATE TABLE emergency_incidents (id INT, incident_type VARCHAR(50), borough VARCHAR(50)); INSERT INTO emergency_incidents (id, incident_type, borough) VALUES (1, \u0027Medical Emergency\u0027, \u0027Brooklyn\u0027), (2, \u0027Medical Emergency\u0027, \u0027Manhattan\u0027);", + "sql": "SELECT borough, COUNT(*) FROM emergency_incidents WHERE incident_type \u003d \u0027Medical Emergency\u0027 GROUP BY borough;", + "sql_explanation": "The SQL query groups the emergency incidents by borough and counts the number of medical emergencies in each borough." +}, { + "id": "3481", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average response time for emergency incidents in each region?", + "sql_context": "CREATE TABLE emergency_responses (id INT, region TEXT, incident_type TEXT, response_time INT); INSERT INTO emergency_responses (id, region, incident_type, response_time) VALUES (1, \u0027Region 1\u0027, \u0027Fire\u0027, 8), (2, \u0027Region 1\u0027, \u0027Medical\u0027, 10), (3, \u0027Region 2\u0027, \u0027Fire\u0027, 7), (4, \u0027Region 2\u0027, \u0027Medical\u0027, 9), (5, \u0027Region 3\u0027, \u0027Fire\u0027, 9), (6, \u0027Region 3\u0027, \u0027Medical\u0027, 11);", + "sql": "SELECT region, AVG(response_time) AS avg_response_time FROM emergency_responses GROUP BY region;", + "sql_explanation": "The SQL query calculates the average response time for emergency incidents in each region by grouping the data by region and calculating the average of response_time." +}, { + "id": "3616", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average response time for emergency calls in the city of Chicago, broken down by the type of emergency?", + "sql_context": "CREATE TABLE emergency_responses (id INT, city VARCHAR(20), type VARCHAR(20), response_time INT); INSERT INTO emergency_responses (id, city, type, response_time) VALUES (1, \u0027Chicago\u0027, \u0027emergency\u0027, 10); INSERT INTO emergency_responses (id, city, type, response_time) VALUES (2, \u0027Chicago\u0027, \u0027fire\u0027, 15);", + "sql": "SELECT type, AVG(response_time) FROM emergency_responses WHERE city \u003d \u0027Chicago\u0027 GROUP BY type", + "sql_explanation": "The SQL query calculates the average response time for emergency calls in the city of Chicago, broken down by the type of emergency. It does this by using the GROUP BY clause to group the results by the type of emergency, and then applying the AVG function to the response time column for each group." +}, { + "id": "3832", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total capacity of police stations in each district?", + "sql_context": "CREATE TABLE PoliceStations (id INT, name VARCHAR(255), district VARCHAR(255), capacity INT);", + "sql": "SELECT district, SUM(capacity) as \u0027total_capacity\u0027 FROM PoliceStations GROUP BY district;", + "sql_explanation": "This query calculates the total capacity of police stations in each district by using the aggregate function SUM with the capacity column and GROUP BY to group by district." +}, { + "id": "3872", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the total number of calls for each call type in the \u0027coastal_guard\u0027 table, ordered by the number of calls.", + "sql_context": "CREATE TABLE coastal_guard (id INT, call_type VARCHAR(20), call_date TIMESTAMP); INSERT INTO coastal_guard VALUES (1, \u0027water_rescue\u0027, \u00272022-01-06 00:00:00\u0027), (2, \u0027boating\u0027, \u00272022-01-07 01:00:00\u0027);", + "sql": "SELECT call_type, COUNT(*) FROM coastal_guard GROUP BY call_type ORDER BY COUNT(*) DESC;", + "sql_explanation": "We create the \u0027coastal_guard\u0027 table and insert records. The \u0027sql\u0027 query counts the total number of calls for each call type and orders by the number of calls." +}, { + "id": "4034", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of community policing events held in the state of New York, grouped by city?", + "sql_context": "CREATE TABLE community_policing (id INT, city VARCHAR(20), state VARCHAR(20), year INT, events INT);", + "sql": "SELECT city, COUNT(*) FROM community_policing WHERE state \u003d \u0027New York\u0027 GROUP BY city;", + "sql_explanation": "This SQL query counts the number of community policing events held in the state of New York, grouped by city by using the COUNT function with a wildcard (*) and filtering for rows where the state is New York, and grouping by the city column." +}, { + "id": "4511", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum crime count for each crime type across all districts?", + "sql_context": "CREATE TABLE crimes (crime_id INT, district_id INT, crime_type TEXT, crime_count INT);", + "sql": "SELECT c.crime_type, MAX(c.crime_count) FROM crimes c GROUP BY c.crime_type;", + "sql_explanation": "Group records in the crimes table by crime_type, and calculate the maximum crime_count for each crime_type." +}, { + "id": "5338", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of crimes committed by a single offender in a year?", + "sql_context": "CREATE TABLE offender_crimes (cid INT, oid INT, year INT, PRIMARY KEY(cid), FOREIGN KEY(oid) REFERENCES offenders(oid));", + "sql": "SELECT oid, MAX(COUNT(*)) FROM offender_crimes GROUP BY oid;", + "sql_explanation": "This query calculates the maximum number of crimes committed by a single offender in a year. It first groups the offender_crimes table by offender ID and takes the count of crimes. Then, it uses a GROUP BY clause to group by offender ID and takes the maximum of these counts." +}, { + "id": "458", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 3 countries that received the most climate finance for mitigation projects in 2019, excluding countries that have received less than $5,000,000 in total climate finance that year.", + "sql_context": "CREATE TABLE climate_finance (year INT, recipient VARCHAR(20), category VARCHAR(10), amount FLOAT); INSERT INTO climate_finance (year, recipient, category, amount) VALUES (2019, \u0027India\u0027, \u0027mitigation\u0027, 12000000), (2019, \u0027Brazil\u0027, \u0027mitigation\u0027, 18000000), (2019, \u0027Indonesia\u0027, \u0027mitigation\u0027, 7000000), (2019, \u0027South Africa\u0027, \u0027mitigation\u0027, 6000000), (2019, \u0027Mexico\u0027, \u0027mitigation\u0027, 4000000), (2019, \u0027Argentina\u0027, \u0027mitigation\u0027, 4500000), (2019, \u0027Colombia\u0027, \u0027mitigation\u0027, 4800000);", + "sql": "SELECT recipient, SUM(amount) AS total_climate_finance FROM climate_finance WHERE year \u003d 2019 AND category \u003d \u0027mitigation\u0027 GROUP BY recipient HAVING total_climate_finance \u003e 5000000 ORDER BY total_climate_finance DESC LIMIT 3;", + "sql_explanation": "The SQL query identifies the top 3 countries with the most climate finance for mitigation projects in 2019, excluding countries with a total climate finance below $5,000,000. It groups the data by the \u0027recipient\u0027 column, filters for the relevant year and category, sums the \u0027amount\u0027 column, and applies the HAVING clause to exclude countries below the $5,000,000 threshold. Finally, it orders the results by the total climate finance in descending order and limits the results to 3 rows." +}, { + "id": "1489", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which countries received the most climate finance in the form of international aid in 2022?", + "sql_context": "CREATE TABLE climate_finance (id INT, country VARCHAR(20), finance_type VARCHAR(20), amount INT, finance_year INT); INSERT INTO climate_finance (id, country, finance_type, amount, finance_year) VALUES (1, \u0027Brazil\u0027, \u0027International Aid\u0027, 1000000, 2022), (2, \u0027India\u0027, \u0027International Aid\u0027, 1200000, 2022), (3, \u0027Indonesia\u0027, \u0027International Aid\u0027, 800000, 2022);", + "sql": "SELECT country, SUM(amount) FROM climate_finance WHERE finance_type \u003d \u0027International Aid\u0027 AND finance_year \u003d 2022 GROUP BY country ORDER BY SUM(amount) DESC;", + "sql_explanation": "This query calculates the total amount of climate finance received by each country in the form of international aid in 2022 by filtering the climate_finance table by finance_type and finance_year, and then grouping the results by country and ordering them in descending order by the sum of the amount for each group." +}, { + "id": "1917", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which climate mitigation programs received the most funding in 2020, and what was the average funding amount per program?", + "sql_context": "CREATE TABLE climate_funding (program VARCHAR(255), year INT, funding_amount FLOAT);", + "sql": "SELECT program, AVG(funding_amount) AS avg_funding FROM climate_funding WHERE year \u003d 2020 GROUP BY program ORDER BY avg_funding DESC LIMIT 1;", + "sql_explanation": "This query identifies the climate mitigation program that received the most funding in 2020, and calculates the average funding amount per program. It filters the climate_funding table to only include records from 2020. The query uses the GROUP BY clause to group the results by program, and the AVG function to calculate the average funding amount per program. The results are sorted in descending order based on the average funding amount, and the LIMIT clause is used to only return the top record." +}, { + "id": "1950", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of climate adaptation projects in South America and their respective start years?", + "sql_context": "CREATE TABLE climate_projects (project_name VARCHAR(50), location VARCHAR(50), start_year INT, budget INT, sector VARCHAR(50)); INSERT INTO climate_projects (project_name, location, start_year, budget, sector) VALUES (\u0027Flood Prevention C\u0027, \u0027Brazil\u0027, 2018, 800000, \u0027Climate Adaptation\u0027), (\u0027Drought Resistance D\u0027, \u0027Argentina\u0027, 2019, 900000, \u0027Climate Adaptation\u0027);", + "sql": "SELECT COUNT(*), start_year FROM climate_projects WHERE location IN (\u0027South America\u0027) AND sector \u003d \u0027Climate Adaptation\u0027 GROUP BY start_year;", + "sql_explanation": "This query lists the number of climate adaptation projects in South America and their respective start years. It does this by filtering the climate_projects table for the relevant location and sector, and then grouping by start year and counting the number of rows." +}, { + "id": "1963", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of climate finance spent on projects in each sector, for the last 3 years?", + "sql_context": "CREATE TABLE climate_finance (id INT, sector TEXT, year INT, finance_amount FLOAT);", + "sql": "SELECT sector, SUM(finance_amount) FROM climate_finance WHERE year BETWEEN (YEAR(CURRENT_DATE) - 3) AND YEAR(CURRENT_DATE) GROUP BY sector;", + "sql_explanation": "This SQL query calculates the total amount of climate finance spent on projects in each sector, for the last 3 years. It does this by filtering the climate_finance table for rows with a year within the last 3 years, then grouping the results by sector and calculating the sum of the finance_amount for each group." +}, { + "id": "1975", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum funding provided by each climate finance provider for climate adaptation projects in Latin America?", + "sql_context": "CREATE TABLE climate_finance (id INT, provider VARCHAR(100), initiative VARCHAR(100), amount FLOAT, year INT, location VARCHAR(100)); INSERT INTO climate_finance (id, provider, initiative, amount, year, location) VALUES (1, \u0027Green Climate Fund\u0027, \u0027Climate Adaptation\u0027, 25000000, 2020, \u0027Latin America\u0027), (2, \u0027World Bank\u0027, \u0027Climate Adaptation\u0027, 30000000, 2019, \u0027Latin America\u0027);", + "sql": "SELECT provider, MAX(amount) FROM climate_finance WHERE initiative \u003d \u0027Climate Adaptation\u0027 AND location \u003d \u0027Latin America\u0027 GROUP BY provider;", + "sql_explanation": "This query calculates the maximum \u0027amount\u0027 for each \u0027provider\u0027 where the \u0027initiative\u0027 is \u0027Climate Adaptation\u0027 and the \u0027location\u0027 is \u0027Latin America\u0027." +}, { + "id": "3856", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of climate finance invested in climate mitigation and adaptation projects?", + "sql_context": "CREATE TABLE finance_projects (project_type TEXT, investment_amount FLOAT); INSERT INTO finance_projects (project_type, investment_amount) VALUES (\u0027Mitigation\u0027, 10000000), (\u0027Adaptation\u0027, 7000000);", + "sql": "SELECT project_type, SUM(investment_amount) FROM finance_projects GROUP BY project_type;", + "sql_explanation": "This SQL query calculates the total amount of climate finance invested in climate mitigation and adaptation projects. It groups the records by project type and applies the SUM function to calculate the total investment amount for each project type." +}, { + "id": "4011", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of electric vehicle charging stations installed in each state of the United States in 2022?", + "sql_context": "CREATE TABLE charging_stations_us (id INT, location VARCHAR(50), state VARCHAR(50), year INT, size INT); INSERT INTO charging_stations_us (id, location, state, year, size) VALUES (1, \u0027Los Angeles\u0027, \u0027California\u0027, 2022, 500); INSERT INTO charging_stations_us (id, location, state, year, size) VALUES (2, \u0027New York\u0027, \u0027New York\u0027, 2022, 600); INSERT INTO charging_stations_us (id, location, state, year, size) VALUES (3, \u0027Chicago\u0027, \u0027Illinois\u0027, 2022, 700); INSERT INTO charging_stations_us (id, location, state, year, size) VALUES (4, \u0027Houston\u0027, \u0027Texas\u0027, 2022, 400);", + "sql": "SELECT state, COUNT(size) FROM charging_stations_us WHERE year \u003d 2022 GROUP BY state;", + "sql_explanation": "This query calculates the total number of electric vehicle charging stations installed in each state of the United States in 2022 by grouping the records by state and counting the number of charging stations where the year is 2022." +}, { + "id": "4078", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many electric vehicle charging stations were installed in each region of India in 2022?", + "sql_context": "CREATE TABLE charging_stations (id INT, location VARCHAR(50), region VARCHAR(50), year INT, size INT); INSERT INTO charging_stations (id, location, region, year, size) VALUES (1, \u0027Delhi\u0027, \u0027North\u0027, 2022, 500); INSERT INTO charging_stations (id, location, region, year, size) VALUES (2, \u0027Mumbai\u0027, \u0027West\u0027, 2022, 600); INSERT INTO charging_stations (id, location, region, year, size) VALUES (3, \u0027Bangalore\u0027, \u0027South\u0027, 2022, 700); INSERT INTO charging_stations (id, location, region, year, size) VALUES (4, \u0027Kolkata\u0027, \u0027East\u0027, 2022, 400);", + "sql": "SELECT region, COUNT(size) FROM charging_stations WHERE year \u003d 2022 GROUP BY region;", + "sql_explanation": "This query calculates the number of electric vehicle charging stations installed in each region of India in 2022 by grouping the records by region and counting the number of charging stations." +}, { + "id": "4647", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total funding allocated for climate change projects for each type of project?", + "sql_context": "CREATE TABLE climate_change_projects_type(project_id INT, type VARCHAR(20), year INT, amount FLOAT); INSERT INTO climate_change_projects_type (project_id, type, year, amount) VALUES (24, \u0027Mitigation\u0027, 2018, 90000.0), (25, \u0027Adaptation\u0027, 2019, 100000.0), (26, \u0027Finance\u0027, 2020, 110000.0);", + "sql": "SELECT type, SUM(amount) FROM climate_change_projects_type GROUP BY type;", + "sql_explanation": "This SQL query lists the total funding allocated for climate change projects for each type of project by grouping the data by the \u0027type\u0027 column and calculating the sum of the \u0027amount\u0027 column." +}, { + "id": "4713", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average CO2 emission per vehicle type in the transportation sector?", + "sql_context": "CREATE TABLE Vehicles (ID INT, VehicleType VARCHAR(255), CO2Emission INT); INSERT INTO Vehicles (ID, VehicleType, CO2Emission) VALUES (1, \u0027Car\u0027, 4000), (2, \u0027Truck\u0027, 8000), (3, \u0027Motorcycle\u0027, 2000);", + "sql": "SELECT VehicleType, AVG(CO2Emission) FROM Vehicles GROUP BY VehicleType;", + "sql_explanation": "The SQL query calculates the average CO2 emission for each vehicle type by grouping the records based on VehicleType and then applying the AVG function to the CO2Emission column." +}, { + "id": "64", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total sales for each size, with a separate column for each season\u0027s sales?", + "sql_context": "CREATE TABLE Sales_by_Size (id INT, size VARCHAR(20), season VARCHAR(20), sales INT); INSERT INTO Sales_by_Size (id, size, season, sales) VALUES (1, \u0027XS\u0027, \u0027Spring\u0027, 100), (2, \u0027S\u0027, \u0027Spring\u0027, 200), (3, \u0027M\u0027, \u0027Spring\u0027, 300), (4, \u0027L\u0027, \u0027Spring\u0027, 150), (5, \u0027XL\u0027, \u0027Spring\u0027, 50), (6, \u0027XS\u0027, \u0027Summer\u0027, 120), (7, \u0027S\u0027, \u0027Summer\u0027, 220), (8, \u0027M\u0027, \u0027Summer\u0027, 320), (9, \u0027L\u0027, \u0027Summer\u0027, 180), (10, \u0027XL\u0027, \u0027Summer\u0027, 80), (11, \u0027XS\u0027, \u0027Fall\u0027, 150), (12, \u0027S\u0027, \u0027Fall\u0027, 250), (13, \u0027M\u0027, \u0027Fall\u0027, 350), (14, \u0027L\u0027, \u0027Fall\u0027, 200), (15, \u0027XL\u0027, \u0027Fall\u0027, 100), (16, \u0027XS\u0027, \u0027Winter\u0027, 180), (17, \u0027S\u0027, \u0027Winter\u0027, 280), (18, \u0027M\u0027, \u0027Winter\u0027, 380), (19, \u0027L\u0027, \u0027Winter\u0027, 250), (20, \u0027XL\u0027, \u0027Winter\u0027, 150);", + "sql": "SELECT size, SUM(CASE WHEN season \u003d \u0027Spring\u0027 THEN sales ELSE 0 END) AS spring_sales, SUM(CASE WHEN season \u003d \u0027Summer\u0027 THEN sales ELSE 0 END) AS summer_sales, SUM(CASE WHEN season \u003d \u0027Fall\u0027 THEN sales ELSE 0 END) AS fall_sales, SUM(CASE WHEN season \u003d \u0027Winter\u0027 THEN sales ELSE 0 END) AS winter_sales FROM Sales_by_Size GROUP BY size;", + "sql_explanation": "Calculate the total sales for each size, with a separate column for each season\u0027s sales using the conditional aggregation." +}, { + "id": "712", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average price of silk shirts sold in the past year, grouped by month?", + "sql_context": "CREATE TABLE silk_shirts (id INT PRIMARY KEY, price DECIMAL(5,2), sale_date DATE); INSERT INTO silk_shirts (id, price, sale_date) VALUES (1, 59.99, \u00272021-06-15\u0027), (2, 69.99, \u00272021-07-10\u0027), (3, 49.99, \u00272021-08-05\u0027);", + "sql": "SELECT AVG(price), EXTRACT(MONTH FROM sale_date) FROM silk_shirts WHERE sale_date \u003e\u003d DATE \u00272020-01-01\u0027 AND sale_date \u003c DATE \u00272021-01-01\u0027 AND shirt_type \u003d \u0027silk\u0027 GROUP BY EXTRACT(MONTH FROM sale_date);", + "sql_explanation": "This query calculates the average price of silk shirts sold in the past year, grouped by month. It filters the silk_shirts table to only include records where the sale_date is within the past year and the shirt_type is \u0027silk\u0027. It then calculates the average price for each month and groups the results by month." +}, { + "id": "1553", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all the textile suppliers that provide both organic cotton and hemp?", + "sql_context": "CREATE TABLE TextileSuppliers (SupplierID INT, SupplierName TEXT, Material TEXT); INSERT INTO TextileSuppliers (SupplierID, SupplierName, Material) VALUES (1, \u0027GreenFields\u0027, \u0027Organic Cotton\u0027), (2, \u0027NaturalFibers\u0027, \u0027Hemp\u0027), (3, \u0027EcoWeaves\u0027, \u0027Organic Cotton\u0027), (4, \u0027SustainableHarvest\u0027, \u0027Bamboo\u0027), (5, \u0027PureTextiles\u0027, \u0027Organic Cotton, Hemp\u0027);", + "sql": "SELECT DISTINCT SupplierName FROM TextileSuppliers WHERE Material IN (\u0027Organic Cotton\u0027, \u0027Hemp\u0027) GROUP BY SupplierName HAVING COUNT(DISTINCT Material) \u003d 2;", + "sql_explanation": "The SQL query filters for suppliers that provide either organic cotton or hemp, then groups the result by SupplierName and filters for those that supply both materials by counting distinct Material values and comparing to 2." +}, { + "id": "1632", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which countries had the most sustainable fashion metrics in the past month?", + "sql_context": "CREATE TABLE country_metric (id INT, country TEXT, metric FLOAT, date DATE);", + "sql": "SELECT country, AVG(metric) FROM country_metric WHERE date \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 1 MONTH) GROUP BY country ORDER BY AVG(metric) DESC LIMIT 1;", + "sql_explanation": "This query calculates the average sustainable fashion metric for each country in the past month by averaging the metric for each row where the date is within the past month, then grouping by the country and ordering by the average metric in descending order, and returning the top row." +}, { + "id": "1702", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of organic cotton fabric used by each supplier in the past 3 months?", + "sql_context": "CREATE TABLE FabricData (FabricID INT, SupplierID INT, FabricType TEXT, Quantity FLOAT, Organic BOOLEAN); INSERT INTO FabricData (FabricID, SupplierID, FabricType, Quantity, Organic) VALUES (1001, 1, \u0027Cotton\u0027, 500, true), (1002, 1, \u0027Polyester\u0027, 700, false), (1003, 2, \u0027Organic Cotton\u0027, 800, true);", + "sql": "SELECT SupplierID, SUM(Quantity) FROM FabricData WHERE Organic \u003d true AND FabricDate \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 3 MONTH) GROUP BY SupplierID;", + "sql_explanation": "This query calculates the total quantity of organic cotton fabric used by each supplier in the past 3 months by using the SUM function and filtering for organic cotton fabric types and fabric dates within the past 3 months." +}, { + "id": "1966", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average quantity of eco-friendly dyes used per textile shipment, grouped by supplier country?", + "sql_context": "CREATE TABLE TextileShipments (shipment_id INT, material VARCHAR(20), quantity INT, supplier_country VARCHAR(20)); INSERT INTO TextileShipments (shipment_id, material, quantity, supplier_country) VALUES (1, \u0027Eco-friendly Dye\u0027, 50, \u0027Brazil\u0027);", + "sql": "SELECT supplier_country, AVG(quantity) as avg_quantity FROM TextileShipments WHERE material \u003d \u0027Eco-friendly Dye\u0027 GROUP BY supplier_country;", + "sql_explanation": "This SQL query calculates the average quantity of eco-friendly dyes used in textile shipments, grouped by supplier country. It does this by filtering the TextileShipments table for only the eco-friendly dye material and then using the AVG function to calculate the average quantity, grouped by supplier country." +}, { + "id": "2391", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of customer purchases by size and product category?", + "sql_context": "CREATE TABLE purchases (customer_id INT, product_category VARCHAR(255), product VARCHAR(255), size VARCHAR(10));", + "sql": "SELECT product_category, size, COUNT(DISTINCT customer_id) as unique_customers FROM purchases GROUP BY product_category, size;", + "sql_explanation": "Count the number of unique customers per product category and size by grouping by product_category and size and selecting DISTINCT customer_id." +}, { + "id": "2799", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the top three most common colors in the \u0027Inventory\u0027 table, across all garment types.", + "sql_context": "CREATE TABLE Inventory (garment_type VARCHAR(20), color VARCHAR(20), quantity INT); INSERT INTO Inventory (garment_type, color, quantity) VALUES (\u0027Dress\u0027, \u0027Black\u0027, 500), (\u0027Dress\u0027, \u0027Blue\u0027, 400), (\u0027Shirt\u0027, \u0027White\u0027, 300), (\u0027Shirt\u0027, \u0027Black\u0027, 200), (\u0027Pants\u0027, \u0027Blue\u0027, 150);", + "sql": "SELECT color, SUM(quantity) AS total_quantity FROM Inventory GROUP BY color ORDER BY total_quantity DESC LIMIT 3;", + "sql_explanation": "The SQL query calculates the total quantity of each color in the \u0027Inventory\u0027 table, groups them by color, and orders the result set in descending order by total_quantity. The query then retrieves the top three records using the LIMIT clause." +}, { + "id": "3246", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average textile waste generated by each fashion brand, grouped by country and displayed in descending order based on the waste amount?", + "sql_context": "CREATE TABLE TextileWaste (brand TEXT, country TEXT, waste_kg FLOAT); INSERT INTO TextileWaste (brand, country, waste_kg) VALUES (\u0027BrandA\u0027, \u0027USA\u0027, 120.5), (\u0027BrandB\u0027, \u0027Canada\u0027, 80.3), (\u0027BrandC\u0027, \u0027USA\u0027, 160.1), (\u0027BrandD\u0027, \u0027Mexico\u0027, 95.6);", + "sql": "SELECT country, AVG(waste_kg) as avg_waste FROM TextileWaste GROUP BY country ORDER BY avg_waste DESC;", + "sql_explanation": "The SQL query calculates the average textile waste (avg_waste) generated by each fashion brand in a country by grouping the waste_kg values using the AVG function and the GROUP BY clause. The ORDER BY clause sorts the result set in descending order based on avg_waste." +}, { + "id": "3290", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total popularity of the \u0027Vintage\u0027 fashion trend?", + "sql_context": "CREATE TABLE FashionTrends (TrendName VARCHAR(255), Region VARCHAR(255), Popularity INT); INSERT INTO FashionTrends (TrendName, Region, Popularity) VALUES (\u0027Bohemian\u0027, \u0027North America\u0027, 5000), (\u0027Bohemian\u0027, \u0027Europe\u0027, 7000), (\u0027Vintage\u0027, \u0027Asia\u0027, 8000), (\u0027Vintage\u0027, \u0027South America\u0027, 9000);", + "sql": "SELECT TrendName, SUM(Popularity) FROM FashionTrends WHERE TrendName \u003d \u0027Vintage\u0027 GROUP BY TrendName;", + "sql_explanation": "The SQL query calculates the total popularity of the \u0027Vintage\u0027 fashion trend by summing up the popularity values for the \u0027Vintage\u0027 trend. It filters the results using the WHERE clause to only include rows with the TrendName \u0027Vintage\u0027 and groups the results by the TrendName, providing a sum of popularity for the unique trend \u0027Vintage\u0027." +}, { + "id": "3653", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which country has the highest CO2 emissions in textile industry?", + "sql_context": "CREATE TABLE textile_emissions (id INT, country VARCHAR(50), co2_emissions INT); INSERT INTO textile_emissions (id, country, co2_emissions) VALUES (1, \u0027Bangladesh\u0027, 5000), (2, \u0027China\u0027, 15000), (3, \u0027India\u0027, 10000), (4, \u0027USA\u0027, 8000);", + "sql": "SELECT country, MAX(co2_emissions) as max_emissions FROM textile_emissions GROUP BY country;", + "sql_explanation": "Identify the country with the highest CO2 emissions in the textile industry by grouping the \u0027textile_emissions\u0027 table by \u0027country\u0027 and finding the maximum \u0027co2_emissions\u0027 for each group." +}, { + "id": "3908", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average order size for each salesperson?", + "sql_context": "CREATE TABLE salesperson (id INT, name VARCHAR(50), region VARCHAR(50)); INSERT INTO salesperson (id, name, region) VALUES (1, \u0027John Doe\u0027, \u0027North\u0027), (2, \u0027Jane Smith\u0027, \u0027South\u0027); CREATE TABLE orders (id INT, salesperson_id INT, size INT); INSERT INTO orders (id, salesperson_id, size) VALUES (1, 1, 10), (2, 1, 15), (3, 2, 20), (4, 2, 25);", + "sql": "SELECT salesperson_id, AVG(size) as avg_order_size FROM orders GROUP BY salesperson_id;", + "sql_explanation": "Calculate the average order size by grouping the \u0027orders\u0027 table by \u0027salesperson_id\u0027 and finding the average \u0027size\u0027 for each group." +}, { + "id": "4073", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average CO2 emission for each garment type?", + "sql_context": "CREATE TABLE garment_emissions (id INT PRIMARY KEY, garment_type VARCHAR(20), co2_emission DECIMAL(5,2));", + "sql": "SELECT garment_type, AVG(co2_emission) FROM garment_emissions GROUP BY garment_type;", + "sql_explanation": "The SQL query calculates the average CO2 emission for each garment type by grouping by garment_type and calculating the average CO2 emission for each group." +}, { + "id": "4302", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average sustainability score for each fabric, grouped by material type?", + "sql_context": "CREATE TABLE Fabrics (id INT, fabric_type VARCHAR(20), fabric VARCHAR(20), source_country VARCHAR(50), sustainability_score INT); INSERT INTO Fabrics (id, fabric_type, fabric, source_country, sustainability_score) VALUES (1, \u0027Natural\u0027, \u0027Cotton\u0027, \u0027India\u0027, 80), (2, \u0027Synthetic\u0027, \u0027Polyester\u0027, \u0027China\u0027, 50), (3, \u0027Natural\u0027, \u0027Wool\u0027, \u0027Australia\u0027, 90), (4, \u0027Synthetic\u0027, \u0027Silk\u0027, \u0027China\u0027, 60), (5, \u0027Mixed\u0027, \u0027Denim\u0027, \u0027USA\u0027, 70);", + "sql": "SELECT fabric_type, AVG(sustainability_score) FROM Fabrics GROUP BY fabric_type;", + "sql_explanation": "Calculate the average sustainability score for each fabric type (natural, synthetic, mixed) by averaging the individual sustainability scores." +}, { + "id": "4911", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average price of clothing sold in each size?", + "sql_context": "CREATE TABLE ClothingItems (ItemID INT, ItemName TEXT, Size TEXT, Price INT); INSERT INTO ClothingItems (ItemID, ItemName, Size, Price) VALUES (1, \u0027Top\u0027, \u0027S\u0027, 50), (2, \u0027Pants\u0027, \u0027M\u0027, 30), (3, \u0027Dress\u0027, \u0027L\u0027, 75);", + "sql": "SELECT Size, AVG(Price) as AvgPrice FROM ClothingItems GROUP BY Size;", + "sql_explanation": "Calculates the average price of clothing sold in each size by using the AVG function and grouping by size." +}, { + "id": "5077", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average retail price of garments per country.", + "sql_context": "CREATE TABLE Garments (garment_id INT, garment_name VARCHAR(50), retail_price DECIMAL(5,2), country VARCHAR(50)); INSERT INTO Garments (garment_id, garment_name, retail_price, country) VALUES (1, \u0027Sequin Evening Gown\u0027, 850.99, \u0027USA\u0027), (2, \u0027Cashmere Sweater\u0027, 250.00, \u0027Italy\u0027), (3, \u0027Silk Blouse\u0027, 150.00, \u0027France\u0027);", + "sql": "SELECT country, AVG(retail_price) FROM Garments GROUP BY country;", + "sql_explanation": "The SQL query groups the data by country and calculates the average retail price for each country." +}, { + "id": "661", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many cases were opened in each month of the past year?", + "sql_context": "CREATE TABLE Cases (CaseID INT, CaseOpenDate DATETIME);", + "sql": "SELECT YEAR(CaseOpenDate) AS Year, MONTH(CaseOpenDate) AS Month, COUNT(*) AS CaseCount FROM Cases WHERE CaseOpenDate \u003e DATE_SUB(CURDATE(), INTERVAL 1 YEAR) GROUP BY YEAR(CaseOpenDate), MONTH(CaseOpenDate);", + "sql_explanation": "This query groups the cases by month and year, and filters out any cases that were not opened in the past year. It then calculates the count of cases for each month of the past year." +}, { + "id": "2104", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total pro bono hours for attorneys who work on civil cases, ordered by hours?", + "sql_context": "CREATE TABLE attorney (attorney_id INT, name VARCHAR(255), location VARCHAR(255)); INSERT INTO attorney (attorney_id, name, location) VALUES (1, \u0027Juan Rodriguez\u0027, \u0027Miami\u0027), (2, \u0027Maria Garcia\u0027, \u0027Los Angeles\u0027), (3, \u0027David Kim\u0027, \u0027Seattle\u0027); CREATE TABLE case_outcomes (outcome_id INT, attorney_id INT, case_type VARCHAR(255), hours DECIMAL(10,2)); INSERT INTO case_outcomes (outcome_id, attorney_id, case_type, hours) VALUES (1, 1, \u0027Civil\u0027, 20.00), (2, 1, \u0027Civil\u0027, 25.00), (3, 2, \u0027Criminal\u0027, 30.00), (4, 3, \u0027Family\u0027, 35.00), (5, 3, \u0027Civil\u0027, 40.00), (6, 3, \u0027Civil\u0027, 45.00);", + "sql": "SELECT case_type, SUM(hours) as total_hours FROM case_outcomes WHERE case_type \u003d \u0027Civil\u0027 GROUP BY case_type ORDER BY total_hours DESC;", + "sql_explanation": "The SQL query calculates the total hours for civil cases using the SUM function, filters the results for civil cases using the WHERE clause, and orders them by total hours in descending order." +}, { + "id": "3209", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total billing amount for each attorney, sorted by the total amount?", + "sql_context": "CREATE TABLE Attorneys (AttorneyID INT, Name VARCHAR(50), TotalBilling DECIMAL(10,2)); INSERT INTO Attorneys (AttorneyID, Name, TotalBilling) VALUES (1, \u0027John Doe\u0027, 5000.00), (2, \u0027Jane Smith\u0027, 7000.00);", + "sql": "SELECT Name, SUM(TotalBilling) AS TotalBilling FROM Attorneys GROUP BY Name ORDER BY TotalBilling DESC;", + "sql_explanation": "This SQL query groups the data by attorney name and calculates the total billing for each attorney. It then sorts the results by total billing in descending order." +}, { + "id": "3662", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total billable hours for each case type?", + "sql_context": "CREATE TABLE cases (case_id INT, case_type TEXT, billable_hours INT); INSERT INTO cases (case_id, case_type, billable_hours) VALUES (1, \u0027Civil\u0027, 10), (2, \u0027Criminal\u0027, 15), (3, \u0027Civil\u0027, 20), (4, \u0027Criminal\u0027, 25);", + "sql": "SELECT case_type, SUM(billable_hours) as total_billable_hours FROM cases GROUP BY case_type;", + "sql_explanation": "This SQL query calculates the total billable hours for each case type. It first groups the results by case_type. Then, it calculates the sum of billable_hours for each group." +}, { + "id": "4123", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of cases in the \u0027Criminal\u0027 category?", + "sql_context": "CREATE TABLE Cases (CaseID INT, Category VARCHAR(50), BillingAmount DECIMAL(10,2)); INSERT INTO Cases (CaseID, Category, BillingAmount) VALUES (1, \u0027Criminal\u0027, 2000.00), (2, \u0027Civil\u0027, 3000.00), (3, \u0027Criminal\u0027, 4000.00);", + "sql": "SELECT Category, COUNT(*) FROM Cases WHERE Category \u003d \u0027Criminal\u0027 GROUP BY Category;", + "sql_explanation": "This query calculates the total number of cases in the \u0027Criminal\u0027 category by first selecting the category and count of cases from the Cases table where the Category is \u0027Criminal\u0027. It then groups the results by category and calculates the count of cases for each group." +}, { + "id": "1255", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average population size of all marine species in the Southern Ocean, grouped by conservation status?\"", + "sql_context": "CREATE TABLE marine_species_population (species_name VARCHAR(255), region VARCHAR(255), avg_population_size FLOAT, conservation_status VARCHAR(255)); INSERT INTO marine_species_population (species_name, region, avg_population_size, conservation_status) VALUES (\u0027Ross Seal\u0027, \u0027Southern Ocean\u0027, 1000, \u0027Fully Protected\u0027), (\u0027Antarctic Krill\u0027, \u0027Southern Ocean\u0027, 100000, \u0027Partially Protected\u0027), (\u0027Crabeater Seal\u0027, \u0027Southern Ocean\u0027, 700, \u0027Fully Protected\u0027);", + "sql": "SELECT conservation_status, AVG(avg_population_size) as avg_population_size FROM marine_species_population WHERE region \u003d \u0027Southern Ocean\u0027 GROUP BY conservation_status;", + "sql_explanation": "This query calculates the average population size of all marine species in the Southern Ocean, grouped by their conservation status. It first filters the data to only include records from the Southern Ocean. Then, it groups the data by conservation status and calculates the average population size for each group." +}, { + "id": "3015", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many marine species are found in the Atlantic basin, grouped by species name?", + "sql_context": "CREATE TABLE marine_species_atlantic (name VARCHAR(255), basin VARCHAR(255)); INSERT INTO marine_species_atlantic (name, basin) VALUES (\u0027Species1\u0027, \u0027Atlantic\u0027), (\u0027Species2\u0027, \u0027Pacific\u0027), (\u0027Species3\u0027, \u0027Indian\u0027), (\u0027Species4\u0027, \u0027Atlantic\u0027);", + "sql": "SELECT name, COUNT(*) as num_species FROM marine_species_atlantic WHERE basin \u003d \u0027Atlantic\u0027 GROUP BY name;", + "sql_explanation": "The SQL query filters the marine_species_atlantic table to only include rows where the basin is \u0027Atlantic\u0027. It then groups the results by species name and calculates the number of species in each group." +}, { + "id": "3252", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the species with the highest frequency of coral bleaching?", + "sql_context": "CREATE TABLE marine_impacts (id INT PRIMARY KEY, species VARCHAR(255), impact VARCHAR(255), frequency INT); INSERT INTO marine_impacts (id, species, impact, frequency) VALUES (1, \u0027Clownfish\u0027, \u0027Coral Bleaching\u0027, 65);", + "sql": "SELECT species, MAX(frequency) FROM marine_impacts WHERE impact \u003d \u0027Coral Bleaching\u0027 GROUP BY species;", + "sql_explanation": "This query identifies the species most affected by coral bleaching, selecting those with the highest frequency." +}, { + "id": "3542", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the number of marine species in each order in the \u0027species_orders\u0027 table, sorted by the number of species in descending order.", + "sql_context": "CREATE TABLE species_orders (order_id INT, order_name VARCHAR(50), species_count INT);", + "sql": "SELECT order_name, COUNT(*) FROM species_orders GROUP BY order_id ORDER BY species_count DESC;", + "sql_explanation": "The SQL query groups the \u0027species_orders\u0027 table by order_id and then counts the number of marine species per order. It then sorts the results by the number of species in descending order." +}, { + "id": "4523", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average depth of all marine protected areas, grouped by country?", + "sql_context": "CREATE TABLE marine_protected_areas (id INT, country VARCHAR(50), name VARCHAR(50), area_sqkm FLOAT, avg_depth FLOAT); INSERT INTO marine_protected_areas (id, country, name, area_sqkm, avg_depth) VALUES (1, \u0027Australia\u0027, \u0027Great Barrier Reef\u0027, 344400, -2000); INSERT INTO marine_protected_areas (id, country, name, area_sqkm, avg_depth) VALUES (2, \u0027Canada\u0027, \u0027Gwaii Haanas National Park\u0027, 14280, -220);", + "sql": "SELECT country, AVG(avg_depth) FROM marine_protected_areas GROUP BY country;", + "sql_explanation": "The SQL query calculates the average depth of marine protected areas for each country by grouping the marine_protected_areas table by country and finding the average depth (avg_depth) for each group." +}, { + "id": "5604", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average ocean pH, grouped by month?", + "sql_context": "CREATE TABLE ocean_ph (id INT, month INT, ph FLOAT); INSERT INTO ocean_ph (id, month, ph) VALUES (1, 1, 8.1); INSERT INTO ocean_ph (id, month, ph) VALUES (2, 2, 8.0); INSERT INTO ocean_ph (id, month, ph) VALUES (3, 3, 7.9);", + "sql": "SELECT month, AVG(ph) FROM ocean_ph GROUP BY month;", + "sql_explanation": "This query calculates the average ocean pH, grouped by month. It does this by using the GROUP BY statement on the month column, and applying the AVG function on the ph column." +}, { + "id": "1039", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 5 genres by the number of streams in the United States?", + "sql_context": "CREATE TABLE streams (stream_id int, user_id int, track_id int, genre varchar(255), timestamp datetime); INSERT INTO streams (stream_id, user_id, track_id, genre, timestamp) VALUES (1, 123, 345, \u0027Rock\u0027, \u00272022-01-01 10:00:00\u0027), (2, 124, 346, \u0027Pop\u0027, \u00272022-01-01 11:00:00\u0027);", + "sql": "SELECT genre, COUNT(*) as stream_count FROM streams WHERE timestamp BETWEEN \u00272022-01-01\u0027 AND \u00272022-12-31\u0027 AND genre IS NOT NULL GROUP BY genre ORDER BY stream_count DESC LIMIT 5;", + "sql_explanation": "This query groups streams by genre and counts the number of occurrences for each genre in the US during 2022. It then orders the results by the count in descending order and limits the output to the top 5 genres." +}, { + "id": "1043", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of concerts held in each month for a specific artist?", + "sql_context": "CREATE TABLE concerts (concert_id int, venue_id int, timestamp datetime, artist_id int); INSERT INTO concerts (concert_id, venue_id, timestamp, artist_id) VALUES (1, 789, \u00272022-06-01 12:00:00\u0027, 101);", + "sql": "SELECT DATE_FORMAT(timestamp, \u0027%Y-%m\u0027) as month, COUNT(*) as concert_count FROM concerts WHERE artist_id \u003d 101 AND timestamp BETWEEN \u00272022-01-01\u0027 AND \u00272022-12-31\u0027 GROUP BY month;", + "sql_explanation": "This query calculates the number of concerts held in each month for a specific artist (artist_id \u003d 101) during 2022. It does this by grouping the concerts by the month component of the timestamp and counting the number of occurrences for each group." +}, { + "id": "1200", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 3 countries with the highest average concert ticket prices.", + "sql_context": "CREATE TABLE concerts (country VARCHAR(50), artist_name VARCHAR(50), ticket_price DECIMAL(5,2)); INSERT INTO concerts (country, artist_name, ticket_price) VALUES (\u0027US\u0027, \u0027BeyoncÊ\u0027, 200.00), (\u0027Canada\u0027, \u0027Drake\u0027, 150.00), (\u0027Mexico\u0027, \u0027Taylor Swift\u0027, 220.00), (\u0027US\u0027, \u0027Kendrick Lamar\u0027, 120.00);", + "sql": "SELECT country, AVG(ticket_price) AS avg_ticket_price, ROW_NUMBER() OVER(ORDER BY AVG(ticket_price) DESC) AS rank FROM concerts GROUP BY country ORDER BY rank ASC LIMIT 3;", + "sql_explanation": "This query lists the top 3 countries with the highest average concert ticket prices by using the AVG function on the ticket_price column, partitioning the data by country, and ordering the results by the average ticket price in descending order, then limiting the results to the top 3 using the ROW_NUMBER window function." +}, { + "id": "1752", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of unique users who have streamed music from each genre?", + "sql_context": "CREATE TABLE user_genre_streams (stream_id int, user_id int, timestamp datetime, genre varchar(255)); INSERT INTO user_genre_streams (stream_id, user_id, timestamp, genre) VALUES (1, 123, \u00272022-01-01 10:00:00\u0027, \u0027Rock\u0027);", + "sql": "SELECT genre, COUNT(DISTINCT user_id) as unique_users FROM user_genre_streams WHERE timestamp BETWEEN \u00272022-01-01\u0027 AND \u00272022-12-31\u0027 GROUP BY genre;", + "sql_explanation": "This query calculates the number of unique users who have streamed music from each genre during 2022. It does this by grouping the streams by genre and counting the number of unique user_ids for each group." +}, { + "id": "2282", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the most profitable concert by a female artist?", + "sql_context": "CREATE TABLE Concerts (ConcertID INT, ArtistID INT, Venue VARCHAR(100), Date DATE, TicketPrice DECIMAL(10,2), ArtistGender VARCHAR(10)); INSERT INTO Concerts (ConcertID, ArtistID, Venue, Date, TicketPrice, ArtistGender) VALUES (4, 3, \u0027London O2 Arena\u0027, \u00272023-07-22\u0027, 120, \u0027Female\u0027); INSERT INTO Concerts (ConcertID, ArtistID, Venue, Date, TicketPrice, ArtistGender) VALUES (5, 4, \u0027Barclays Center\u0027, \u00272023-08-30\u0027, 95, \u0027Male\u0027);", + "sql": "SELECT Venue, SUM(TicketPrice) FROM Concerts WHERE ArtistGender \u003d \u0027Female\u0027 GROUP BY Venue ORDER BY SUM(TicketPrice) DESC LIMIT 1;", + "sql_explanation": "Retrieve the Venue and sum of TicketPrice values from the Concerts table where the ArtistGender column is \u0027Female\u0027, grouped by the Venue column, and order by the sum of TicketPrice column in descending order. This will identify the most profitable concert by a female artist." +}, { + "id": "3293", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total revenue for a K-pop artist\u0027s concert ticket sales in 2019?", + "sql_context": "CREATE TABLE Kpop_Concerts (year INT, artist VARCHAR(50), revenue FLOAT); INSERT INTO Kpop_Concerts (year, artist, revenue) VALUES (2018, \u0027BTS\u0027, 1000000), (2019, \u0027BLACKPINK\u0027, 1500000), (2020, \u0027TWICE\u0027, 800000), (2021, \u0027SEVENTEEN\u0027, 1200000), (2019, \u0027BTS\u0027, 1500000);", + "sql": "SELECT artist, SUM(revenue) FROM Kpop_Concerts WHERE year \u003d 2019 AND artist \u003d \u0027BTS\u0027 GROUP BY artist;", + "sql_explanation": "This SQL query calculates the total revenue for a K-pop artist\u0027s concert ticket sales in 2019 by summing the revenue column where the year is equal to 2019 and the artist is \u0027BTS\u0027 and grouping by artist." +}, { + "id": "4171", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total revenue from concert ticket sales in 2021, grouped by city?", + "sql_context": "CREATE TABLE ConcertTicketSales (id INT, year INT, city VARCHAR(50), revenue FLOAT);", + "sql": "SELECT city, SUM(revenue) FROM ConcertTicketSales WHERE year \u003d 2021 GROUP BY city;", + "sql_explanation": "The query filters ConcertTicketSales table for 2021 data, calculates revenue sum for each city and groups them." +}, { + "id": "4183", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total number of concerts for each artist", + "sql_context": "CREATE TABLE concerts (id INT, artist_name VARCHAR(255), concert_date DATE); INSERT INTO concerts (id, artist_name, concert_date) VALUES (1, \u0027Taylor Swift\u0027, \u00272022-06-01\u0027), (2, \u0027BTS\u0027, \u00272022-07-01\u0027), (3, \u0027Taylor Swift\u0027, \u00272022-08-01\u0027), (4, \u0027Ariana Grande\u0027, \u00272022-09-01\u0027);", + "sql": "SELECT artist_name, COUNT(*) as total_concerts FROM concerts GROUP BY artist_name;", + "sql_explanation": "This SQL query groups the \u0027concerts\u0027 table by artist_name, and calculates the count of rows for each group. This effectively finds the total number of concerts for each artist." +}, { + "id": "4994", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the total ticket sales for each concert by city?", + "sql_context": "CREATE TABLE Concerts (concert_id INT, city VARCHAR(50), sales INT); INSERT INTO Concerts (concert_id, city, sales) VALUES (1, \u0027Los Angeles\u0027, 5000), (2, \u0027New York\u0027, 7000), (3, \u0027Chicago\u0027, 6000);", + "sql": "SELECT city, SUM(sales) as total_sales FROM Concerts GROUP BY city;", + "sql_explanation": "This SQL query calculates the total ticket sales for each concert by city. It uses the SUM() function to add up the sales for each city and the GROUP BY clause to group the results by city." +}, { + "id": "5319", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue from concert ticket sales in each country?", + "sql_context": "CREATE TABLE Concerts (ConcertID INT, ConcertName VARCHAR(100), Country VARCHAR(50), Year INT, Revenue INT); INSERT INTO Concerts VALUES (1, \u0027Concert1\u0027, \u0027France\u0027, 2020, 10000); INSERT INTO Concerts VALUES (2, \u0027Concert2\u0027, \u0027Germany\u0027, 2021, 15000); INSERT INTO Concerts VALUES (3, \u0027Concert3\u0027, \u0027Spain\u0027, 2019, 12000);", + "sql": "SELECT Country, SUM(Revenue) FROM Concerts GROUP BY Country;", + "sql_explanation": "This query calculates the total revenue from concert ticket sales in each country by grouping the Concerts table by the Country column using the GROUP BY clause, and then using the SUM function to calculate the total revenue for each group." +}, { + "id": "2395", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many containers were handled by stevedoring company \u0027ABC\u0027 in the second quarter of 2020, grouped by container type?", + "sql_context": "CREATE TABLE stevedoring (stevedoring_id INT, company VARCHAR(255), quarter INT, container_type VARCHAR(255), containers_handled INT);INSERT INTO stevedoring (stevedoring_id, company, quarter, container_type, containers_handled) VALUES (1, \u0027ABC\u0027, 2, \u0027dry\u0027, 5000), (2, \u0027ABC\u0027, 2, \u0027refrigerated\u0027, 3000);", + "sql": "SELECT container_type, SUM(containers_handled) FROM stevedoring WHERE company \u003d \u0027ABC\u0027 AND quarter \u003d 2 GROUP BY container_type;", + "sql_explanation": "This SQL query calculates the number of containers handled by stevedoring company \u0027ABC\u0027 in the second quarter of 2020, grouped by container type by filtering the data for company \u0027ABC\u0027 and quarter 2, grouping by container type and summing the number of containers handled for each type." +}, { + "id": "3292", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average cargo handling time for each shipping line?", + "sql_context": "CREATE TABLE shipping_lines (shipping_line_id INT, shipping_line_name VARCHAR(50), cargo_handling_time INT); INSERT INTO shipping_lines (shipping_line_id, shipping_line_name, cargo_handling_time) VALUES (1, \u0027SL1\u0027, 120), (2, \u0027SL2\u0027, 150), (3, \u0027SL3\u0027, 180);", + "sql": "SELECT shipping_line_name, AVG(cargo_handling_time) FROM shipping_lines GROUP BY shipping_line_name;", + "sql_explanation": "This SQL query calculates the average cargo handling time for each shipping line by grouping the shipping lines and averaging the cargo_handling_time column." +}, { + "id": "5428", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many containers of each type are there in the \u0027CONTAINER_FLEET\u0027 table?", + "sql_context": "CREATE TABLE CONTAINER_FLEET (ID INT, Type VARCHAR(20)); INSERT INTO CONTAINER_FLEET (ID, Type) VALUES (1, \u0027Dry\u0027), (2, \u0027Reefer\u0027), (3, \u0027Tank\u0027), (4, \u0027Dry\u0027), (5, \u0027Reefer\u0027);", + "sql": "SELECT Type, COUNT(*) FROM CONTAINER_FLEET GROUP BY Type;", + "sql_explanation": "This query counts the number of containers of each type by grouping the \u0027CONTAINER_FLEET\u0027 table by \u0027Type\u0027 and counting the number of rows for each group." +}, { + "id": "835", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the market share of electric vehicles in Germany and France?", + "sql_context": "CREATE TABLE vehicle_sales (id INT, country VARCHAR(50), vehicle_type VARCHAR(50), sales INT);", + "sql": "SELECT country, 100.0 * SUM(CASE WHEN vehicle_type \u003d \u0027electric\u0027 THEN sales ELSE 0 END) / SUM(sales) AS market_share FROM vehicle_sales WHERE country IN (\u0027Germany\u0027, \u0027France\u0027) GROUP BY country;", + "sql_explanation": "The SQL query calculates the market share of electric vehicles in Germany and France by using a conditional sum with the CASE statement to sum the sales of electric vehicles and dividing it by the total sales. The result is multiplied by 100 to get a percentage." +}, { + "id": "934", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 5 busiest subway stations in New York City, with the number of entries and exits?", + "sql_context": "CREATE TABLE subway_entries_exits (id INT, station_name VARCHAR(50), entry_time TIMESTAMP, exit_time TIMESTAMP); INSERT INTO subway_entries_exits (id, station_name, entry_time, exit_time) VALUES (1, \u0027Times Square\u0027, \u00272022-01-01 08:00:00\u0027, \u00272022-01-01 08:10:00\u0027), (2, \u0027Grand Central\u0027, \u00272022-01-01 08:05:00\u0027, \u00272022-01-01 08:15:00\u0027);", + "sql": "SELECT station_name, COUNT(*) as entries_exits FROM subway_entries_exits WHERE entry_time IS NOT NULL OR exit_time IS NOT NULL GROUP BY station_name ORDER BY entries_exits DESC LIMIT 5;", + "sql_explanation": "The SQL query lists the top 5 busiest subway stations by counting entries and exits for each station. It uses a GROUP BY clause to group the entries and exits by station name, and an ORDER BY clause to sort the results in descending order by the count of entries and exits. The LIMIT clause is used to limit the results to the top 5." +}, { + "id": "1371", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the most common trip distance for shared autonomous vehicles in San Francisco, CA.", + "sql_context": "CREATE TABLE shared_autonomous_vehicles (vehicle_id INT, trip_id INT, trip_start_time TIMESTAMP, trip_end_time TIMESTAMP, start_location TEXT, end_location TEXT, city TEXT, trip_distance DECIMAL);", + "sql": "SELECT trip_distance, COUNT(*) as trip_count FROM shared_autonomous_vehicles WHERE city \u003d \u0027San Francisco\u0027 GROUP BY trip_distance ORDER BY trip_count DESC LIMIT 1;", + "sql_explanation": "This query identifies the most common trip distance for shared autonomous vehicles in San Francisco, CA by selecting the trip distance and counting the number of trips for each unique trip distance. It groups them by trip_distance, orders them in descending order based on the trip_count, and limits the results to the top 1 most common trip distance." +}, { + "id": "1462", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average distance traveled per day by autonomous trains in Berlin?", + "sql_context": "CREATE TABLE autonomous_trains (train_id INT, trip_id INT, trip_start_time TIMESTAMP, trip_end_time TIMESTAMP, start_latitude DECIMAL(9,6), start_longitude DECIMAL(9,6), end_latitude DECIMAL(9,6), end_longitude DECIMAL(9,6), distance DECIMAL(10,2), trip_date DATE);", + "sql": "SELECT AVG(distance/100) FROM autonomous_trains WHERE start_longitude BETWEEN 13.1 AND 13.8 AND start_latitude BETWEEN 52.3 AND 52.7 GROUP BY DATE(trip_date);", + "sql_explanation": "This query calculates the average distance traveled per day by autonomous trains in Berlin by finding the average of the distance column in the autonomous_trains table, which stores data for train trips. The WHERE clause filters the results to only include trips that started in the Berlin area, defined here as being between 13.1 and 13.8 longitude and 52.3 and 52.7 latitude. The GROUP BY clause groups the results by the trip_date column to get the daily average. The result is divided by 100 to convert meters to kilometers." +}, { + "id": "1521", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average fuel efficiency of electric vehicles in Japan and South Korea?", + "sql_context": "CREATE TABLE vehicle_fuel_efficiency (id INT, country VARCHAR(50), vehicle_type VARCHAR(50), fuel_efficiency FLOAT);", + "sql": "SELECT country, AVG(fuel_efficiency) FROM vehicle_fuel_efficiency WHERE country IN (\u0027Japan\u0027, \u0027South Korea\u0027) AND vehicle_type \u003d \u0027electric\u0027 GROUP BY country;", + "sql_explanation": "The SQL query calculates the average fuel efficiency of electric vehicles in Japan and South Korea by selecting the fuel_efficiency column and using the AVG function. The results are grouped by country and the query also checks if the vehicle type is electric." +}, { + "id": "1779", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of electric vehicles in Tokyo, partitioned by car manufacturer?", + "sql_context": "CREATE TABLE vehicles (id INT, manufacturer VARCHAR(255), vehicle_type VARCHAR(255), electric BOOLEAN); INSERT INTO vehicles (id, manufacturer, vehicle_type, electric) VALUES (1, \u0027Toyota\u0027, \u0027sedan\u0027, true), (2, \u0027Honda\u0027, \u0027hybrid\u0027, false);", + "sql": "SELECT manufacturer, 100.0 * COUNT(*) FILTER (WHERE electric) / COUNT(*) AS pct_electric FROM vehicles WHERE city \u003d \u0027Tokyo\u0027 GROUP BY manufacturer;", + "sql_explanation": "Calculate the percentage of electric vehicles for each car manufacturer in Tokyo. Filter the vehicles table to include only the vehicles from Tokyo, group the data by manufacturer, and count the number of electric vehicles for each manufacturer. Calculate the percentage of electric vehicles for each manufacturer based on the total number of vehicles for the same manufacturer." +}, { + "id": "2000", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which day of the week has the highest usage of public transportation in London?", + "sql_context": "CREATE TABLE public_transportation (id INT, mode VARCHAR(255), usage INT, date DATE); INSERT INTO public_transportation (id, mode, usage, date) VALUES (1, \u0027bus\u0027, 1500, \u00272022-01-01\u0027), (2, \u0027tube\u0027, 2000, \u00272022-01-01\u0027);", + "sql": "SELECT TO_CHAR(date, \u0027Day\u0027) AS day_of_week, MAX(usage) AS max_usage FROM public_transportation WHERE city \u003d \u0027London\u0027 GROUP BY day_of_week;", + "sql_explanation": "Determine the day of the week with the highest public transportation usage in London. Extract the day of the week from the date field and group the data by day of the week. Calculate the maximum usage for each day of the week and return the corresponding day." +}, { + "id": "2051", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which city has the highest number of electric taxi rides in a month?", + "sql_context": "CREATE TABLE electric_taxis (taxi_id INT, ride_id INT, start_time TIMESTAMP, end_time TIMESTAMP, city VARCHAR(255));", + "sql": "SELECT city, COUNT(*) as num_rides FROM electric_taxis WHERE ride_id BETWEEN 1 AND 100000 GROUP BY city ORDER BY num_rides DESC LIMIT 1;", + "sql_explanation": "The query finds the city with the highest number of electric taxi rides in the first 100,000 rides by grouping the electric_taxis table by city and using the COUNT function, then sorting the results by the number of rides in descending order and limiting the results to 1." +}, { + "id": "2134", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many electric vehicles are there in each state in the US?", + "sql_context": "CREATE TABLE us_vehicles (state VARCHAR(20), vehicle_type VARCHAR(20), quantity INT);", + "sql": "SELECT state, vehicle_type, SUM(quantity) AS total_electric_vehicles FROM us_vehicles WHERE vehicle_type \u003d \u0027electric\u0027 GROUP BY state;", + "sql_explanation": "Calculate the number of electric vehicles in each state in the US by filtering the vehicle_type column and summing the quantity column of the us_vehicles table using a group by statement." +}, { + "id": "2336", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many charging stations are there in \u0027California\u0027 and \u0027Texas\u0027 in the charging_stations table?", + "sql_context": "CREATE TABLE charging_stations (id INT, state TEXT, station_type TEXT, total_stations INT); INSERT INTO charging_stations (id, state, station_type, total_stations) VALUES (1, \u0027California\u0027, \u0027Fast\u0027, 50), (2, \u0027Texas\u0027, \u0027Standard\u0027, 40), (3, \u0027California\u0027, \u0027Standard\u0027, 60);", + "sql": "SELECT state, COUNT(*) as total_charging_stations FROM charging_stations WHERE state IN (\u0027California\u0027, \u0027Texas\u0027) GROUP BY state;", + "sql_explanation": "Determine the number of charging stations in \u0027California\u0027 and \u0027Texas\u0027 in the charging_stations table." +}, { + "id": "2598", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 5 most common types of autonomous vehicles in California?", + "sql_context": "CREATE TABLE autonomous_vehicles (id INT, type VARCHAR(50), state VARCHAR(50));", + "sql": "SELECT type, COUNT(*) FROM autonomous_vehicles WHERE state \u003d \u0027California\u0027 GROUP BY type ORDER BY COUNT(*) DESC LIMIT 5;", + "sql_explanation": "This SQL query finds the top 5 most common types of autonomous vehicles in California. It does this by using the COUNT function, which returns the number of rows in a table, and the GROUP BY clause, which groups the rows in the table by the type column. The query filters the autonomous_vehicles table to only include rows where the state is \u0027California\u0027, and then orders the results by the count of each type in descending order. The query then limits the results to the top 5 rows using the LIMIT clause." +}, { + "id": "2725", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the market share of autonomous vehicles by brand?", + "sql_context": "CREATE TABLE autonomous_vehicles (brand VARCHAR(50), model VARCHAR(50), is_autonomous BOOLEAN);", + "sql": "SELECT brand, AVG(is_autonomous) as market_share FROM autonomous_vehicles WHERE is_autonomous \u003d true GROUP BY brand;", + "sql_explanation": "The SQL query calculates the market share of autonomous vehicles by brand by averaging the is_autonomous column and grouping by brand. Only rows where is_autonomous is true are included in the calculation." +}, { + "id": "3060", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of electric vehicle charging stations by country and state?", + "sql_context": "CREATE TABLE ElectricVehicleChargingStationsByRegion(Country VARCHAR(50), State VARCHAR(50), Stations INT);", + "sql": "SELECT Country, State, SUM(Stations) FROM ElectricVehicleChargingStationsByRegion GROUP BY Country, State;", + "sql_explanation": "The query groups the data by both the Country and State columns and calculates the total Stations for each group." +}, { + "id": "3397", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the most popular type of autonomous vehicle in Chicago?", + "sql_context": "CREATE TABLE Autonomous_Vehicle_Adoption (id INT, make VARCHAR(50), model VARCHAR(50), year INT, city VARCHAR(50), type VARCHAR(50), adoptions INT);", + "sql": "SELECT type, MAX(adoptions) FROM Autonomous_Vehicle_Adoption WHERE city \u003d \u0027Chicago\u0027 GROUP BY type;", + "sql_explanation": "This query identifies the most popular type of autonomous vehicle in Chicago by filtering the Autonomous_Vehicle_Adoption table based on the city column, grouping the table by the type column, and then selecting the type column and the maximum value of the adoptions column." +}, { + "id": "3482", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many bike-sharing stations are there in Paris and London?", + "sql_context": "CREATE TABLE EuroBikeSharing (id INT, city VARCHAR(20), stations INT);", + "sql": "SELECT city, SUM(stations) FROM EuroBikeSharing WHERE city IN (\u0027Paris\u0027, \u0027London\u0027) GROUP BY city;", + "sql_explanation": "This query calculates the total number of bike-sharing stations in Paris and London by using the SUM and GROUP BY functions on the stations and city columns." +}, { + "id": "3763", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the average speed of electric vehicles in the US per month in 2021?", + "sql_context": "CREATE TABLE EV_Speeds (country VARCHAR(20), month INT, year INT, avg_speed DECIMAL(5,2)); INSERT INTO EV_Speeds (country, month, year, avg_speed) VALUES (\u0027USA\u0027, 1, 2021, 65.5), (\u0027USA\u0027, 2, 2021, 68.3), (\u0027USA\u0027, 3, 2021, 70.1), (\u0027USA\u0027, 4, 2021, 72.4), (\u0027USA\u0027, 5, 2021, 74.2), (\u0027USA\u0027, 6, 2021, 76.9), (\u0027USA\u0027, 7, 2021, 79.1), (\u0027USA\u0027, 8, 2021, 81.5), (\u0027USA\u0027, 9, 2021, 84.2), (\u0027USA\u0027, 10, 2021, 86.3), (\u0027USA\u0027, 11, 2021, 88.7), (\u0027USA\u0027, 12, 2021, 91.2);", + "sql": "SELECT AVG(avg_speed) FROM EV_Speeds WHERE country \u003d \u0027USA\u0027 AND year \u003d 2021 GROUP BY month;", + "sql_explanation": "The SQL query calculates the average speed of electric vehicles in the US per month in 2021 by grouping the data by month and calculating the average speed." +}, { + "id": "3840", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of electric vehicles per city in the \u0027transportation\u0027 schema, grouped by country?", + "sql_context": "CREATE TABLE city_electric_vehicles (city_name VARCHAR(255), country VARCHAR(255), num_electric_vehicles INT); INSERT INTO city_electric_vehicles (city_name, country, num_electric_vehicles) VALUES (\u0027San Francisco\u0027, \u0027USA\u0027, 15000), (\u0027Los Angeles\u0027, \u0027USA\u0027, 20000), (\u0027Toronto\u0027, \u0027Canada\u0027, 10000), (\u0027Montreal\u0027, \u0027Canada\u0027, 8000), (\u0027Mexico City\u0027, \u0027Mexico\u0027, 5000);", + "sql": "SELECT country, AVG(num_electric_vehicles) FROM city_electric_vehicles GROUP BY country;", + "sql_explanation": "This query calculates the average number of electric vehicles per city in the \u0027transportation\u0027 schema, grouped by country. It does this by selecting the \u0027country\u0027 and the average of \u0027num_electric_vehicles\u0027 from the \u0027city_electric_vehicles\u0027 table, grouped by \u0027country\u0027." +}, { + "id": "4154", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the breakdown of multimodal mobility usage by age group?", + "sql_context": "CREATE TABLE MultimodalMobility(AgeGroup VARCHAR(50), Mode VARCHAR(50), Usage FLOAT);", + "sql": "SELECT AgeGroup, Mode, SUM(Usage) FROM MultimodalMobility GROUP BY AgeGroup, Mode;", + "sql_explanation": "The query groups the data by both the AgeGroup and Mode columns and calculates the total Usage for each group." +}, { + "id": "4324", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many shared scooters are there in Berlin, grouped by company?", + "sql_context": "CREATE TABLE shared_scooters (scooter_id INT, scooter_company VARCHAR(50), scooter_registration_date DATE); INSERT INTO shared_scooters (scooter_id, scooter_company, scooter_registration_date) VALUES (1, \u0027Lime\u0027, \u00272022-04-01\u0027), (2, \u0027Bird\u0027, \u00272022-04-02\u0027), (3, \u0027Lime\u0027, \u00272022-04-03\u0027);", + "sql": "SELECT scooter_company, COUNT(*) FROM shared_scooters GROUP BY scooter_company;", + "sql_explanation": "Count the number of shared scooters in Berlin, grouped by company." +}, { + "id": "5151", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of public transportation trips taken in a single day in each city?", + "sql_context": "CREATE TABLE Daily_Transportation (id INT, city VARCHAR(50), trips INT, date DATE);", + "sql": "SELECT city, MAX(trips) FROM Daily_Transportation GROUP BY city;", + "sql_explanation": "This query identifies the maximum number of public transportation trips taken in a single day in each city by grouping the Daily_Transportation table by the city column, and then selecting the city column and the maximum value of the trips column." +}, { + "id": "1314", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many polar bear sightings are in each Arctic region per year?", + "sql_context": "CREATE TABLE polar_bear_sightings (sighting_date DATE, region VARCHAR(50)); INSERT INTO polar_bear_sightings (sighting_date, region) VALUES (\u00272010-01-01\u0027, \u0027Arctic North America\u0027), (\u00272010-01-05\u0027, \u0027Arctic Europe\u0027);", + "sql": "SELECT e.region, EXTRACT(YEAR FROM e.sighting_date) as year, COUNT(e.sighting_date) as sighting_count FROM polar_bear_sightings e GROUP BY e.region, e.sighting_date;", + "sql_explanation": "The SQL query groups the records by region and year, and counts the number of polar bear sightings for each group." +}, { + "id": "1974", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average temperature in the Arctic Ocean for each year between 2000 and 2005.", + "sql_context": "CREATE TABLE Climate (id INT PRIMARY KEY, year INT, location VARCHAR(255), temperature FLOAT); INSERT INTO Climate (id, year, location, temperature) VALUES (1, 2000, \u0027Arctic Ocean\u0027, -1.5); INSERT INTO Climate (id, year, location, temperature) VALUES (2, 2001, \u0027Arctic Ocean\u0027, -1.8);", + "sql": "SELECT year, AVG(temperature) as avg_temperature FROM Climate WHERE location \u003d \u0027Arctic Ocean\u0027 AND year BETWEEN 2000 AND 2005 GROUP BY year;", + "sql_explanation": "This SQL query calculates the average temperature in the Arctic Ocean for each year between 2000 and 2005 by using the GROUP BY clause to group the records by year and the WHERE clause to filter the records for the Arctic Ocean and the specified years. The AVG function calculates the average temperature for each group." +}, { + "id": "2206", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average temperature for each species in a given year in the species_measurements table?", + "sql_context": "CREATE TABLE species_measurements (species_id INT, measurement_date DATE, temperature DECIMAL(5,2));", + "sql": "SELECT species_id, AVG(temperature) FROM species_measurements WHERE EXTRACT(YEAR FROM measurement_date) \u003d 2022 GROUP BY species_id;", + "sql_explanation": "Filter the data to the given year, then find the average temperature for each species." +}, { + "id": "2239", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average temperature in each Arctic region for the last 5 years?", + "sql_context": "CREATE TABLE temperature_data (id INT, arctic_region VARCHAR(255), date DATE, temperature FLOAT); INSERT INTO temperature_data (id, arctic_region, date, temperature) VALUES (1, \u0027North Pole\u0027, \u00272018-01-01\u0027, -25.0), (2, \u0027Canada\u0027, \u00272018-01-01\u0027, -20.0);", + "sql": "SELECT arctic_region, AVG(temperature) FROM temperature_data WHERE date \u003e\u003d DATEADD(year, -5, CURRENT_DATE) GROUP BY arctic_region;", + "sql_explanation": "This SQL query calculates the average temperature in each Arctic region for the last 5 years by filtering records based on the date column and then calculating the average temperature for each Arctic region, providing a better understanding of the current climate change." +}, { + "id": "2449", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many distinct species have been observed in each location in the Arctic and Antarctic regions?", + "sql_context": "CREATE TABLE SpeciesObservations (id INT, species VARCHAR(50), location VARCHAR(50), last_seen DATE, region VARCHAR(50)); INSERT INTO SpeciesObservations (id, species, location, last_seen, region) VALUES (1, \u0027Polar Bear\u0027, \u0027Arctic\u0027, \u00272020-01-01\u0027, \u0027Arctic\u0027); INSERT INTO SpeciesObservations (id, species, location, last_seen, region) VALUES (2, \u0027Walrus\u0027, \u0027Arctic\u0027, \u00272020-01-02\u0027, \u0027Arctic\u0027);", + "sql": "SELECT location, COUNT(DISTINCT species) FROM SpeciesObservations WHERE region IN (\u0027Arctic\u0027, \u0027Antarctic\u0027) GROUP BY location;", + "sql_explanation": "Find the number of distinct species per location in the Arctic and Antarctic regions by counting the distinct number of species for each location." +}, { + "id": "2493", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of animals rescued by each organization in the last 3 years?", + "sql_context": "CREATE TABLE animal_rescue_data (organization VARCHAR(255), year INT, animals_rescued INT);", + "sql": "SELECT organization, SUM(animals_rescued) FROM animal_rescue_data WHERE year BETWEEN 2020 AND 2022 GROUP BY organization;", + "sql_explanation": "The query calculates the total number of animals rescued by each organization from the animal_rescue_data table for the last 3 years." +}, { + "id": "3157", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many indigenous communities are present in each arctic country?", + "sql_context": "CREATE TABLE IndigenousCommunity (ID INT, Name TEXT, Country TEXT); INSERT INTO IndigenousCommunity (ID, Name, Country) VALUES (1, \u0027Community1\u0027, \u0027Canada\u0027); INSERT INTO IndigenousCommunity (ID, Name, Country) VALUES (2, \u0027Community2\u0027, \u0027Canada\u0027); INSERT INTO IndigenousCommunity (ID, Name, Country) VALUES (3, \u0027Community3\u0027, \u0027Russia\u0027);", + "sql": "SELECT Country, COUNT(DISTINCT Name) as Number_of_Communities FROM IndigenousCommunity GROUP BY Country;", + "sql_explanation": "This query calculates the number of indigenous communities for each arctic country by grouping the IndigenousCommunity table by country and selecting the count of distinct community names." +}, { + "id": "3503", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of accidents for each type?", + "sql_context": "CREATE TABLE accident_type (accident_type_id INT, accident_type_name VARCHAR(255)); INSERT INTO accident_type (accident_type_id, accident_type_name) VALUES (1, \u0027AccidentTypeA\u0027), (2, \u0027AccidentTypeB\u0027); CREATE TABLE accidents (accident_type_id INT, number INT); INSERT INTO accidents (accident_type_id, number) VALUES (1, 5), (2, 8), (1, 6), (2, 9), (1, 7), (2, 10);", + "sql": "SELECT accident_type_id, SUM(number) as total_accidents FROM accidents GROUP BY accident_type_id", + "sql_explanation": "Calculate the total number of accidents (total_accidents) for each accident type, grouped by accident_type_id." +}, { + "id": "3631", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average ice thickness in the Arctic per month since 2010?", + "sql_context": "CREATE TABLE ice_thickness (month INT, year INT, ice_thickness FLOAT); INSERT INTO ice_thickness (month, year, ice_thickness) VALUES (1, 2010, 3.5), (2, 2010, 3.7);", + "sql": "SELECT t.month, AVG(t.ice_thickness) as avg_thickness FROM ice_thickness t GROUP BY t.month;", + "sql_explanation": "The SQL query groups the records by month and calculates the average ice thickness for each group." +}, { + "id": "3675", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average temperature for each station in the \u0027ClimateData\u0027 table for the year 2010 or later?", + "sql_context": "CREATE TABLE ClimateData (station_id INT, year INT, temperature FLOAT); INSERT INTO ClimateData (station_id, year, temperature) VALUES (1, 2010, -10.5); INSERT INTO ClimateData (station_id, year, temperature) VALUES (1, 2011, -11.0); INSERT INTO ClimateData (station_id, year, temperature) VALUES (2, 2015, -15.0);", + "sql": "SELECT station_id, AVG(temperature) FROM ClimateData WHERE year \u003e\u003d 2010 GROUP BY station_id;", + "sql_explanation": "This query calculates the average temperature for each station in the \u0027ClimateData\u0027 table for the year 2010 or later." +}, { + "id": "3770", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of observations for each species", + "sql_context": "species_observations", + "sql": "SELECT species, COUNT(*) as total_observations FROM species_observations GROUP BY species;", + "sql_explanation": "The SQL query groups the \u0027species_observations\u0027 table data by the \u0027species\u0027 column, and for each group, it calculates and returns the count of records as \u0027total_observations\u0027." +}, { + "id": "3790", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total CO2 emission from Arctic research stations in 2021, grouped by country?", + "sql_context": "CREATE TABLE Co2Emissions (Station VARCHAR(255), Country VARCHAR(255), Date DATE, Emission FLOAT); INSERT INTO Co2Emissions (Station, Country, Date, Emission) VALUES (\u0027StationA\u0027, \u0027Norway\u0027, \u00272021-01-01\u0027, 10.5), (\u0027StationB\u0027, \u0027Finland\u0027, \u00272021-01-01\u0027, 12.3);", + "sql": "SELECT Country, SUM(Emission) FROM Co2Emissions WHERE YEAR(Date) \u003d 2021 GROUP BY Country;", + "sql_explanation": "This SQL query calculates the total CO2 emissions from Arctic research stations in 2021, grouped by country. It uses the Co2Emissions table, which contains the station name, country, date, and CO2 emission. The query filters the data for the year 2021, then groups the results by country, calculating the total emission for each group." +}, { + "id": "3919", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average snowfall per month for the past 3 years?", + "sql_context": "CREATE TABLE SnowfallData (id INT, year INT, month INT, snowfall FLOAT); INSERT INTO SnowfallData (id, year, month, snowfall) VALUES (1, 2019, 1, 15.2), (2, 2019, 2, 13.5), (3, 2019, 3, 16.3);", + "sql": "SELECT AVG(snowfall) FROM SnowfallData WHERE year IN (2019, 2020, 2021) GROUP BY month;", + "sql_explanation": "This query calculates the average snowfall for each month in the SnowfallData table, for the past 3 years (2019-2021). It groups the data by month and then calculates the average snowfall for each group." +}, { + "id": "4181", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average temperature per year in Canada\u0027s Nunavut territory?", + "sql_context": "CREATE TABLE WeatherData (location VARCHAR(50), year INT, temperature FLOAT); INSERT INTO WeatherData (location, year, temperature) VALUES (\u0027Nunavut\u0027, 2000, 10.2), (\u0027Nunavut\u0027, 2001, 12.1), (\u0027Nunavut\u0027, 2002, 8.9);", + "sql": "SELECT AVG(temperature) FROM WeatherData WHERE location \u003d \u0027Nunavut\u0027 GROUP BY year;", + "sql_explanation": "Calculate the average temperature for each year in Nunavut by grouping by year and calculating the average temperature." +}, { + "id": "4224", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum carbon sequestration value for each region in the \u0027carbon_sequestration\u0027 table?", + "sql_context": "CREATE TABLE carbon_sequestration (region VARCHAR(255), value FLOAT); INSERT INTO carbon_sequestration (region, value) VALUES (\u0027Arctic\u0027, 12.5), (\u0027Antarctic\u0027, 15.0);", + "sql": "SELECT region, MAX(value) as max_value FROM carbon_sequestration GROUP BY region;", + "sql_explanation": "This query calculates the maximum carbon sequestration value for each region in the \u0027carbon_sequestration\u0027 table by grouping by the \u0027region\u0027 column and finding the maximum \u0027value\u0027 column value." +}, { + "id": "4569", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum temperature per month in the Arctic Research Lab?", + "sql_context": "CREATE TABLE ArcticResearchLab (id INT, year INT, month INT, temperature FLOAT); INSERT INTO ArcticResearchLab (id, year, month, temperature) VALUES (1, 2000, 1, -10.5), (2, 2000, 2, -12.3), (3, 2000, 3, -13.1);", + "sql": "SELECT month, MIN(temperature) FROM ArcticResearchLab GROUP BY year, month;", + "sql_explanation": "This query calculates the minimum temperature for each month in the ArcticResearchLab table. It groups the data by year and month, and then calculates the minimum temperature for each group." +}, { + "id": "4859", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many times has each researcher visited the Arctic Research Station?", + "sql_context": "CREATE TABLE ResearcherVisits(researcher VARCHAR(50), visit_date DATE);INSERT INTO ResearcherVisits(researcher, visit_date) VALUES(\u0027Alice Johnson\u0027, \u00272020-01-01\u0027), (\u0027Bob Brown\u0027, \u00272019-12-01\u0027), (\u0027Alice Johnson\u0027, \u00272021-02-03\u0027), (\u0027Charlie Green\u0027, \u00272020-06-15\u0027);", + "sql": "SELECT researcher, COUNT(*) FROM ResearcherVisits GROUP BY researcher;", + "sql_explanation": "The SQL query counts the number of visits to the Arctic Research Station for each researcher. It groups the records by the researcher column and then counts the number of records in each group." +}, { + "id": "5193", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of glacier melt incidents in each region?", + "sql_context": "CREATE TABLE glacier_melt_data (id INT, date DATE, region VARCHAR(255));", + "sql": "SELECT region, COUNT(*) FROM glacier_melt_data GROUP BY region;", + "sql_explanation": "This query calculates the total number of glacier melt incidents (COUNT(*) column) for each region in the glacier_melt_data table. It groups the data by the region column and then calculates the total number of incidents for each group." +}, { + "id": "5269", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average temperature per year in the Arctic Research Lab?", + "sql_context": "CREATE TABLE ArcticResearchLab (id INT, year INT, temperature FLOAT); INSERT INTO ArcticResearchLab (id, year, temperature) VALUES (1, 2000, -10.5), (2, 2001, -11.3), (3, 2002, -12.1);", + "sql": "SELECT AVG(temperature) FROM ArcticResearchLab GROUP BY year;", + "sql_explanation": "This query calculates the average temperature for each year in the ArcticResearchLab table. It groups the data by year and then calculates the average temperature for each group." +}, { + "id": "5369", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of reindeer in the \u0027arctic_reindeer\u0027 table, grouped by year?", + "sql_context": "CREATE TABLE arctic_reindeer (year INT, count INT);", + "sql": "SELECT year, MAX(count) FROM arctic_reindeer GROUP BY year;", + "sql_explanation": "The SQL query calculates the maximum number of reindeer in the \u0027arctic_reindeer\u0027 table for each year by using the MAX function on the \u0027count\u0027 column. It groups the records by year using the GROUP BY clause." +}, { + "id": "4", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of organic skincare products sold per month, displayed as pivoted data.", + "sql_context": "CREATE TABLE product_labels_v4 (brand VARCHAR(20), product_name VARCHAR(20), product_subcategory VARCHAR(20), product_label VARCHAR(20), sale_date DATE, sale_count INT); INSERT INTO product_labels_v4 VALUES (\u0027Skincare BrandC\u0027, \u0027Cleanser\u0027, \u0027Skincare\u0027, \u0027Organic\u0027, \u00272021-01-01\u0027, 20), (\u0027Skincare BrandC\u0027, \u0027Toner\u0027, \u0027Skincare\u0027, \u0027Organic\u0027, \u00272021-01-15\u0027, 30), (\u0027Skincare BrandD\u0027, \u0027Serum\u0027, \u0027Skincare\u0027, \u0027Organic\u0027, \u00272021-04-01\u0027, 40), (\u0027Skincare BrandD\u0027, \u0027Moisturizer\u0027, \u0027Skincare\u0027, \u0027Organic\u0027, \u00272021-07-01\u0027, 50);", + "sql": "SELECT EXTRACT(MONTH FROM sale_date) AS month, brand, SUM(CASE WHEN product_subcategory \u003d \u0027Cleanser\u0027 THEN sale_count ELSE 0 END) AS Cleanser, SUM(CASE WHEN product_subcategory \u003d \u0027Toner\u0027 THEN sale_count ELSE 0 END) AS Toner, SUM(CASE WHEN product_subcategory \u003d \u0027Serum\u0027 THEN sale_count ELSE 0 END) AS Serum, SUM(CASE WHEN product_subcategory \u003d \u0027Moisturizer\u0027 THEN sale_count ELSE 0 END) AS Moisturizer FROM product_labels_v4 WHERE product_subcategory IN (\u0027Cleanser\u0027, \u0027Toner\u0027, \u0027Serum\u0027, \u0027Moisturizer\u0027) AND product_label \u003d \u0027Organic\u0027 GROUP BY EXTRACT(MONTH FROM sale_date), brand;", + "sql_explanation": "This SQL query displays the number of organic skincare products sold per month, in a pivoted format. It filters the product_labels_v4 table to only include rows with organic skincare products. Then, it calculates the total sales for each skincare product type and month using SUM, CASE, and GROUP BY. Finally, it pivots the results by skincare product type, without using the UNPIVOT or PIVOT clause." +}, { + "id": "760", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the monthly sales trend of cosmetic products in India, and which product categories have the highest and lowest sales?", + "sql_context": "CREATE TABLE sales (id INT, product_name VARCHAR(255), product_category VARCHAR(255), sale_date DATE, sales_amount DECIMAL(10, 2), country VARCHAR(255));", + "sql": "SELECT DATE_TRUNC(\u0027month\u0027, sale_date) as month, product_category, AVG(sales_amount) as avg_sales FROM sales WHERE country \u003d \u0027India\u0027 GROUP BY month, product_category ORDER BY month, avg_sales DESC;", + "sql_explanation": "We group the sales table by month and product_category, and calculate the average sales amount for each group. We then order the results by month and average sales in descending order." +}, { + "id": "802", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total quantity of \"Eye Shadow\" sold and group by supplier name.", + "sql_context": "CREATE TABLE products (id INT PRIMARY KEY, name VARCHAR(255), category VARCHAR(255), price DECIMAL(5,2)); CREATE TABLE sales (id INT PRIMARY KEY, product_id INT, supplier_id INT, quantity INT, date DATE); CREATE VIEW sales_per_product AS SELECT sales.product_id, products.name, SUM(sales.quantity) as quantity_sold FROM sales JOIN products ON sales.product_id \u003d products.id GROUP BY sales.product_id, products.name, sales.supplier_id;", + "sql": "SELECT sales_per_product.name as product_name, supplier_id, SUM(quantity_sold) as total_quantity_sold FROM sales_per_product WHERE product_name \u003d \u0027Eye Shadow\u0027 GROUP BY product_name, supplier_id;", + "sql_explanation": "This SQL query calculates the total quantity sold for the product \"Eye Shadow\" and groups the results by supplier_id, by selecting the records from the sales_per_product view where the product_name is \"Eye Shadow\", summing the quantity_sold for each record, and grouping the results by product_name and supplier_id." +}, { + "id": "1854", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 3 beauty products with the highest consumer preference score in the USA, ordered by the score in descending order.", + "sql_context": "CREATE TABLE consumer_preferences (product_id INT, preference_score DECIMAL(5,2), country VARCHAR(50)); INSERT INTO consumer_preferences (product_id, preference_score, country) VALUES (1001, 4.8, \u0027USA\u0027), (1002, 4.5, \u0027USA\u0027), (1003, 4.9, \u0027Canada\u0027), (1004, 4.7, \u0027USA\u0027), (1005, 4.6, \u0027Mexico\u0027);", + "sql": "SELECT product_id, preference_score FROM consumer_preferences WHERE country \u003d \u0027USA\u0027 GROUP BY product_id ORDER BY preference_score DESC LIMIT 3;", + "sql_explanation": "The SQL query calculates the consumer preference score for each product in the consumer_preferences table and filters the results to only include products sold in the USA. It then groups the results by product ID and orders them in descending order based on the preference score. The query returns the top 3 products with the highest preference score." +}, { + "id": "2316", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the total sales for each product category in Q1 of 2022?", + "sql_context": "CREATE TABLE sales (product_id INT, product_name VARCHAR(100), category VARCHAR(50), sale_date DATE, revenue DECIMAL(10, 2)); INSERT INTO sales (product_id, product_name, category, sale_date, revenue) VALUES (1, \u0027Lipstick\u0027, \u0027Cosmetics\u0027, \u00272022-01-02\u0027, 25.99), (2, \u0027Foundation\u0027, \u0027Cosmetics\u0027, \u00272022-01-15\u0027, 34.99);", + "sql": "SELECT category, SUM(revenue) AS total_sales FROM sales WHERE sale_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-03-31\u0027 GROUP BY category;", + "sql_explanation": "The SQL query calculates the total sales for each product category in Q1 of 2022 by selecting the category and summing the revenue for all records between \u00272022-01-01\u0027 and \u00272022-03-31\u0027. The result is grouped by the category column." +}, { + "id": "2621", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 5 most purchased lipsticks based on sales volume across all regions?", + "sql_context": "CREATE TABLE lipsticks (lipstick_id INT, lipstick_name VARCHAR(100), region VARCHAR(50), sales_volume INT); INSERT INTO lipsticks (lipstick_id, lipstick_name, region, sales_volume) VALUES (1, \u0027Ruby Woo\u0027, \u0027North America\u0027, 1500), (2, \u0027Russian Red\u0027, \u0027Europe\u0027, 1200), (3, \u0027Cherry\u0027, \u0027Asia\u0027, 1800), (4, \u0027Lady Danger\u0027, \u0027South America\u0027, 1000), (5, \u0027Mademoiselle\u0027, \u0027Australia\u0027, 1600);", + "sql": "SELECT lipstick_name, SUM(sales_volume) FROM lipsticks GROUP BY lipstick_name ORDER BY SUM(sales_volume) DESC LIMIT 5;", + "sql_explanation": "First, the query groups the sales data by lipstick name, then calculates the total sales volume for each lipstick. Finally, it orders the results in descending order of sales volume and returns the top 5 lipsticks." +}, { + "id": "3058", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 3 brands by total revenue?", + "sql_context": "CREATE TABLE sales (product_id INT, brand VARCHAR(255), revenue FLOAT); INSERT INTO sales (product_id, brand, revenue) VALUES (1, \u0027Lush\u0027, 100), (2, \u0027The Body Shop\u0027, 120), (3, \u0027Sephora\u0027, 150), (4, \u0027Lush\u0027, 175), (5, \u0027The Body Shop\u0027, 200);", + "sql": "SELECT brand, SUM(revenue) as total_revenue FROM sales GROUP BY brand ORDER BY total_revenue DESC LIMIT 3;", + "sql_explanation": "This query retrieves the total revenue for each brand by filtering the sales table, grouping by brand, calculating the sum of revenue, and ordering by total revenue in descending order, then limiting the result to the top 3 brands." +}, { + "id": "3127", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the number of beauty products that are both eco-friendly and cruelty-free, grouped by region", + "sql_context": "CREATE TABLE products (product_type VARCHAR(20), eco_friendly BOOLEAN, cruelty_free BOOLEAN, region VARCHAR(10)); INSERT INTO products (product_type, eco_friendly, cruelty_free, region) VALUES (\u0027lipstick\u0027, TRUE, TRUE, \u0027North\u0027), (\u0027mascara\u0027, FALSE, FALSE, \u0027North\u0027), (\u0027eyeshadow\u0027, TRUE, TRUE, \u0027West\u0027), (\u0027blush\u0027, TRUE, FALSE, \u0027South\u0027), (\u0027foundation\u0027, TRUE, TRUE, \u0027East\u0027);", + "sql": "SELECT region, COUNT(*) FROM products WHERE eco_friendly \u003d TRUE AND cruelty_free \u003d TRUE GROUP BY region;", + "sql_explanation": "This query lists the number of beauty products that are both eco-friendly and cruelty-free, grouped by region by filtering for eco-friendly and cruelty-free products and grouping by region." +}, { + "id": "3573", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the top 3 sustainable cosmetic brands by sales.", + "sql_context": "CREATE TABLE brand_sales (brand VARCHAR(20), product_category VARCHAR(20), revenue DECIMAL(10,2)); INSERT INTO brand_sales (brand, product_category, revenue) VALUES (\u0027BrandA\u0027, \u0027Makeup\u0027, 12000), (\u0027BrandA\u0027, \u0027Skincare\u0027, 15000), (\u0027BrandB\u0027, \u0027Makeup\u0027, 9000), (\u0027BrandB\u0027, \u0027Skincare\u0027, 11000), (\u0027BrandC\u0027, \u0027Makeup\u0027, 10000), (\u0027BrandC\u0027, \u0027Skincare\u0027, 16000);", + "sql": "SELECT brand, SUM(revenue) FROM brand_sales GROUP BY brand ORDER BY SUM(revenue) DESC LIMIT 3;", + "sql_explanation": "This SQL query shows the top 3 sustainable cosmetic brands by sales by summing the revenue column for each brand, grouping by brand, and ordering in descending order by the sum of revenue, then limiting the results to the top 3." +}, { + "id": "3961", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of cruelty-free ingredients in products, grouped by their category?", + "sql_context": "CREATE TABLE Ingredients (IngredientID INT, ProductID INT, IngredientName VARCHAR(50), CrueltyFree BOOLEAN, Category VARCHAR(50)); INSERT INTO Ingredients (IngredientID, ProductID, IngredientName, CrueltyFree, Category) VALUES (1, 1, \u0027Rose Oil\u0027, TRUE, \u0027Skincare\u0027), (2, 1, \u0027Paraben\u0027, FALSE, \u0027Skincare\u0027), (3, 2, \u0027Silicone\u0027, FALSE, \u0027Hair Care\u0027), (4, 3, \u0027Aloe Vera\u0027, TRUE, \u0027Skincare\u0027), (5, 3, \u0027Fragrance\u0027, FALSE, \u0027Skincare\u0027);", + "sql": "SELECT Category, COUNT(*) FROM Ingredients WHERE CrueltyFree \u003d TRUE GROUP BY Category;", + "sql_explanation": "Count the number of cruelty-free ingredients for each product category by grouping the Ingredients table by the Category column and filtering where the CrueltyFree column is TRUE." +}, { + "id": "484", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many units of each product were sold in the last month, by supplier?", + "sql_context": "CREATE TABLE sales (sale_date DATE, supplier VARCHAR(255), product VARCHAR(255), quantity INT);", + "sql": "SELECT supplier, product, SUM(quantity) AS qty_sold, DATE_TRUNC(\u0027month\u0027, sale_date) AS sale_month FROM sales WHERE sale_date \u003e\u003d DATE_TRUNC(\u0027month\u0027, CURRENT_DATE - INTERVAL \u00271 month\u0027) GROUP BY supplier, product, sale_month;", + "sql_explanation": "The SQL query calculates the number of units of each product sold in the last month, grouped by supplier. It does this by first truncating the sale_date to the month level using the DATE_TRUNC function. Then, it filters the data to only include sales from the last month using the WHERE clause. Finally, it groups the data by supplier, product, and sale_month, and calculates the sum of the quantity for each group." +}, { + "id": "1773", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of factories in each country that follow fair labor practices?", + "sql_context": "CREATE TABLE labor_practices (country VARCHAR(255), factory_id INT, ethical_practice BOOLEAN); INSERT INTO labor_practices (country, factory_id, ethical_practice) VALUES (\u0027US\u0027, 1, TRUE), (\u0027US\u0027, 2, FALSE), (\u0027China\u0027, 1, FALSE), (\u0027China\u0027, 2, FALSE), (\u0027Bangladesh\u0027, 1, TRUE), (\u0027Bangladesh\u0027, 2, TRUE);", + "sql": "SELECT country, 100.0 * COUNT(*) FILTER (WHERE ethical_practice \u003d TRUE) / COUNT(*) as fair_labor_percentage FROM labor_practices GROUP BY country;", + "sql_explanation": "This query calculates the percentage of factories in each country that follow fair labor practices. It uses the COUNT function with the FILTER clause to count the number of rows where ethical_practice is TRUE and divides this by the total number of rows for each country." +}, { + "id": "1988", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total production usage of eco-friendly materials in factories located in a specific country?", + "sql_context": "CREATE TABLE Country_Production (id INT, country VARCHAR(255), material VARCHAR(255), production_usage INT); INSERT INTO Country_Production (id, country, material, production_usage) VALUES (1, \u0027USA\u0027, \u0027Organic Cotton\u0027, 1000), (2, \u0027China\u0027, \u0027Recycled Polyester\u0027, 1500);", + "sql": "SELECT country, SUM(production_usage) FROM Country_Production WHERE material IN (\u0027Organic Cotton\u0027, \u0027Recycled Polyester\u0027) GROUP BY country;", + "sql_explanation": "This query calculates the total production usage of eco-friendly materials in factories located in a specific country by using the SUM function and filtering for factories where \u0027material\u0027 is \u0027Organic Cotton\u0027 or \u0027Recycled Polyester\u0027. The results are grouped by the \u0027country\u0027 column." +}, { + "id": "2162", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which country has the highest number of fair-trade certified factories?", + "sql_context": "CREATE TABLE factories (id INT, name VARCHAR(50), country VARCHAR(50), certified BOOLEAN);", + "sql": "SELECT country, COUNT(*) AS factory_count FROM factories WHERE certified \u003d TRUE GROUP BY country ORDER BY factory_count DESC LIMIT 1;", + "sql_explanation": "Filter factories based on certification status, group them by country, and count the number of factories per country. Then, order the countries by the factory count in descending order and limit the results to the top country." +}, { + "id": "2209", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which country has the most garment workers?", + "sql_context": "CREATE TABLE garment_workers (country VARCHAR(255), worker_id INT, worker_name VARCHAR(255), role VARCHAR(255)); INSERT INTO garment_workers (country, worker_id, worker_name, role) VALUES (\u0027India\u0027, 1, \u0027Ravi Kumar\u0027, \u0027sewer\u0027); INSERT INTO garment_workers (country, worker_id, worker_name, role) VALUES (\u0027India\u0027, 2, \u0027Deepika Patel\u0027, \u0027cutter\u0027); INSERT INTO garment_workers (country, worker_id, worker_name, role) VALUES (\u0027Vietnam\u0027, 1, \u0027Tran Thi Thu\u0027, \u0027sewer\u0027);", + "sql": "SELECT country, COUNT(DISTINCT worker_id) AS worker_count FROM garment_workers GROUP BY country ORDER BY worker_count DESC LIMIT 1;", + "sql_explanation": "This SQL query identifies the country with the most garment workers by selecting the \u0027country\u0027 and \u0027worker_id\u0027 columns, applying the COUNT function to distinct \u0027worker_id\u0027 values, and grouping results by \u0027country\u0027. The query then orders the results by \u0027worker_count\u0027 in descending order and limits the results to the first row." +}, { + "id": "2298", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of unique factories producing sustainable materials, by continent?", + "sql_context": "CREATE SCHEMA ethical_fashion; CREATE TABLE factories (factory_id INT, country VARCHAR(255), continent VARCHAR(255), produces_sustainable BOOLEAN); INSERT INTO factories VALUES (1,\u0027USA\u0027,\u0027North America\u0027,TRUE),(2,\u0027Mexico\u0027,\u0027North America\u0027,FALSE),(3,\u0027Brazil\u0027,\u0027South America\u0027,TRUE),(4,\u0027Argentina\u0027,\u0027South America\u0027,FALSE),(5,\u0027China\u0027,\u0027Asia\u0027,FALSE),(6,\u0027India\u0027,\u0027Asia\u0027,TRUE);", + "sql": "SELECT continent, COUNT(DISTINCT factory_id) FROM ethical_fashion.factories WHERE produces_sustainable \u003d TRUE GROUP BY continent;", + "sql_explanation": "The SQL query calculates the number of unique factories producing sustainable materials by continent. It uses the COUNT function with DISTINCT clause to count the number of unique factories and the GROUP BY clause to group the results by continent. The query also uses the WHERE clause to filter the results for factories that produce sustainable materials." +}, { + "id": "2337", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 5 customers of recycled textile products by quantity?", + "sql_context": "CREATE TABLE recycled_textile_customers (customer_id INT, customer_name VARCHAR(50), quantity INT); INSERT INTO recycled_textile_customers (customer_id, customer_name, quantity) VALUES (1, \u0027Green Retailer\u0027, 1000), (2, \u0027Sustainable Store\u0027, 1500), (3, \u0027Eco-Friendly Outlet\u0027, 800), (4, \u0027Natural Market\u0027, 1200), (5, \u0027Renewable Resource\u0027, 900), (6, \u0027Circular Solutions\u0027, 1300);", + "sql": "SELECT customer_name, SUM(quantity) FROM recycled_textile_customers GROUP BY customer_name ORDER BY SUM(quantity) DESC LIMIT 5;", + "sql_explanation": "This query retrieves the top 5 customers of recycled textile products by quantity. It groups the records by the \u0027customer_name\u0027 column, calculates the sum of the \u0027quantity\u0027 column for each group, orders the groups by the sum of the \u0027quantity\u0027 column in descending order, and limits the results to the top 5 records." +}, { + "id": "2588", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total weight of sustainable materials used by each manufacturer?", + "sql_context": "CREATE TABLE ManufacturerSustainableMaterials (manufacturer_id INT, manufacturer_name VARCHAR(255), material_type VARCHAR(255), weight INT); INSERT INTO ManufacturerSustainableMaterials (manufacturer_id, manufacturer_name, material_type, weight) VALUES (1, \u0027ABC Manufacturing\u0027, \u0027Organic Cotton\u0027, 10000), (2, \u0027XYZ Manufacturing\u0027, \u0027Recycled Polyester\u0027, 12000), (3, \u0027Green Manufacturing\u0027, \u0027Hemp\u0027, 8000), (4, \u0027Eco Manufacturing\u0027, \u0027Bamboo\u0027, 15000), (5, \u0027Sustainable Manufacturing\u0027, \u0027Tencel\u0027, 9000);", + "sql": "SELECT manufacturer_name, SUM(weight) as total_weight FROM ManufacturerSustainableMaterials GROUP BY manufacturer_name;", + "sql_explanation": "This SQL query calculates the total weight of sustainable materials used by each manufacturer. It uses the GROUP BY clause to group the results by manufacturer name and the SUM function to calculate the total weight of sustainable materials for each manufacturer." +}, { + "id": "2889", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many unique brands in the ethical fashion database have a rating of 4 or higher for sustainable materials?", + "sql_context": "CREATE TABLE brand_material_ratings (brand VARCHAR(50), material VARCHAR(50), rating INT); INSERT INTO brand_material_ratings (brand, material, rating) VALUES (\u0027Brand A\u0027, \u0027organic cotton\u0027, 5), (\u0027Brand A\u0027, \u0027recycled polyester\u0027, 4), (\u0027Brand B\u0027, \u0027organic cotton\u0027, 5), (\u0027Brand B\u0027, \u0027hemp\u0027, 3);", + "sql": "SELECT brand FROM brand_material_ratings WHERE rating \u003e\u003d 4 GROUP BY brand HAVING COUNT(DISTINCT material) \u003e\u003d 1;", + "sql_explanation": "The query first selects the brand from the brand_material_ratings table where the rating is 4 or higher. It then groups the results by brand and filters for groups with at least one unique material. This will return the list of brands with a rating of 4 or higher for at least one sustainable material." +}, { + "id": "2896", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which sustainable material has the highest production cost in Europe?", + "sql_context": "CREATE TABLE material_prices (material_id INT, material_name VARCHAR(50), region VARCHAR(50), production_cost DECIMAL(10,2)); INSERT INTO material_prices (material_id, material_name, region, production_cost) VALUES (1, \u0027Organic Cotton\u0027, \u0027Europe\u0027, 3.00), (2, \u0027Recycled Polyester\u0027, \u0027Europe\u0027, 4.00), (3, \u0027Hemp\u0027, \u0027Europe\u0027, 2.20);", + "sql": "SELECT material_name, MAX(production_cost) FROM material_prices WHERE region \u003d \u0027Europe\u0027 GROUP BY material_name;", + "sql_explanation": "The SQL query finds the sustainable material with the highest production cost in Europe by using the MAX function and the GROUP BY clause." +}, { + "id": "2968", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average production cost of garments made from organic cotton, per manufacturer, for the year 2020?", + "sql_context": "CREATE TABLE OrganicCottonGarments (manufacturer VARCHAR(255), production_cost DECIMAL(10,2), year INT);", + "sql": "SELECT manufacturer, AVG(production_cost) FROM OrganicCottonGarments WHERE year \u003d 2020 GROUP BY manufacturer;", + "sql_explanation": "This query calculates the average production cost of garments made from organic cotton for each manufacturer in the year 2020. It groups the results by manufacturer and applies the AVG function to calculate the average production cost." +}, { + "id": "2984", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average carbon footprint of garments made from recycled materials, per country?", + "sql_context": "CREATE TABLE RecycledMaterialGarments (id INT, country VARCHAR(50), carbon_footprint DECIMAL(5,2));", + "sql": "SELECT country, AVG(carbon_footprint) as avg_carbon_footprint FROM RecycledMaterialGarments GROUP BY country;", + "sql_explanation": "The SQL query calculates the average carbon footprint of garments made from recycled materials, per country. It does this by grouping the records in the RecycledMaterialGarments table by the country column and then calculating the average carbon footprint for each group using the AVG function." +}, { + "id": "2998", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of fair trade certified garments, by type, in the African ethical fashion market?", + "sql_context": "CREATE TABLE african_garments (id INT PRIMARY KEY, garment VARCHAR(50), type VARCHAR(50), fair_trade_certified BOOLEAN); INSERT INTO african_garments (id, garment, type, fair_trade_certified) VALUES (1, \u0027Dashiki\u0027, \u0027Cotton\u0027, true), (2, \u0027Kaftan\u0027, \u0027Silk\u0027, false), (3, \u0027Boubou\u0027, \u0027Wool\u0027, false), (4, \u0027Kitenge\u0027, \u0027Polyester\u0027, true), (5, \u0027Kanga\u0027, \u0027Rayon\u0027, false), (6, \u0027Leso\u0027, \u0027Cotton\u0027, true), (7, \u0027Kufi\u0027, \u0027Wool\u0027, true);", + "sql": "SELECT type, 100.0 * SUM(fair_trade_certified) / COUNT(*) as percentage FROM african_garments GROUP BY type;", + "sql_explanation": "The query calculates the percentage of fair trade certified garments, grouped by type, in the African ethical fashion market. It performs a group by operation on the type column, calculates the sum of the fair_trade_certified column, divides it by the total number of garments, and multiplies by 100.0 to get the percentage." +}, { + "id": "3033", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many fair-trade certified factories are there in each country?", + "sql_context": "CREATE TABLE factories (id INT, name VARCHAR(50), country VARCHAR(50), certified BOOLEAN);", + "sql": "SELECT country, COUNT(*) AS certified_factory_count FROM factories WHERE certified \u003d TRUE GROUP BY country;", + "sql_explanation": "Filter factories based on certification status and group them by country. Count the number of factories per country." +}, { + "id": "3210", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many unique retailers sell each eco-friendly material product?", + "sql_context": "CREATE TABLE RetailerProducts (RetailerID int, ProductID int);", + "sql": "SELECT ProductID, COUNT(DISTINCT RetailerID) AS RetailerCount FROM RetailerProducts GROUP BY ProductID;", + "sql_explanation": "This query retrieves the number of unique retailers for each eco-friendly material product by using the COUNT and DISTINCT functions along with the GROUP BY clause." +}, { + "id": "3351", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average water usage, in cubic meters, for denim production by each manufacturer, for the year 2020?", + "sql_context": "CREATE TABLE DenimProduction (manufacturer VARCHAR(255), water_usage DECIMAL(10,2), year INT);", + "sql": "SELECT manufacturer, AVG(water_usage) FROM DenimProduction WHERE year \u003d 2020 GROUP BY manufacturer;", + "sql_explanation": "This query calculates the average water usage, in cubic meters, for denim production by each manufacturer, for the year 2020. It groups the results by manufacturer and applies the AVG function to calculate the average water usage." +}, { + "id": "3383", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum CO2 emissions for each material in the \u0027sustainable_materials\u0027 table?", + "sql_context": "CREATE TABLE sustainable_materials (material_id INT, material TEXT, co2_emissions FLOAT);", + "sql": "SELECT material, MAX(co2_emissions) as max_emissions FROM sustainable_materials GROUP BY material;", + "sql_explanation": "1. Group the \u0027sustainable_materials\u0027 table by the \u0027material\u0027 column. 2. Find the maximum CO2 emissions for each material." +}, { + "id": "3465", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue generated by each company from ethical fashion products?", + "sql_context": "CREATE TABLE revenue(company VARCHAR(50), product VARCHAR(50), revenue DECIMAL(10,2));", + "sql": "SELECT company, SUM(revenue) FROM revenue WHERE product IN (\u0027ethical_fashion\u0027) GROUP BY company;", + "sql_explanation": "The SQL query calculates the total revenue generated by each company from ethical fashion products by grouping the records by the company attribute and calculating the sum of the revenue attribute, filtering only the records with the product attribute set to \"ethical_fashion\"." +}, { + "id": "3495", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all sustainable material types and their maximum production cost across all brands.", + "sql_context": "CREATE TABLE SustainableMaterialsCost(material_type VARCHAR(255), brand VARCHAR(255), production_cost DECIMAL(5,2));", + "sql": "SELECT material_type, MAX(production_cost) FROM SustainableMaterialsCost GROUP BY material_type;", + "sql_explanation": "This query groups the SustainableMaterialsCost table by material_type and calculates the maximum production cost for each group, providing a list of sustainable material types and their maximum production costs." +}, { + "id": "3762", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which sustainable materials are used the least in clothing production and what is their average price?", + "sql_context": "CREATE TABLE sustainable_materials (material VARCHAR(255), price DECIMAL(10,2), usage INT); INSERT INTO sustainable_materials (material, price, usage) VALUES (\u0027Organic Cotton\u0027, 3.50, 100), (\u0027Recycled Polyester\u0027, 4.25, 80), (\u0027Hemp\u0027, 2.75, 50), (\u0027Linen\u0027, 4.00, 70), (\u0027Tencel\u0027, 5.00, 60);", + "sql": "SELECT material, AVG(price) FROM sustainable_materials WHERE usage \u003c 70 GROUP BY material;", + "sql_explanation": "This query lists the sustainable materials that are used the least in clothing production and their average price by filtering the rows where usage is less than 70, then grouping the rows by material and calculating the average price over those rows." +}, { + "id": "3801", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many labor violations have been reported for each manufacturer?", + "sql_context": "CREATE TABLE manufacturers (manufacturer_id INT, manufacturer_name TEXT); CREATE TABLE labor_reports (report_id INT, manufacturer_id INT, violation_count INT); INSERT INTO manufacturers (manufacturer_id, manufacturer_name) VALUES (1, \u0027Ethical Clothing\u0027), (2, \u0027Fast Fashion Inc.\u0027); INSERT INTO labor_reports (report_id, manufacturer_id, violation_count) VALUES (1, 1, 0), (2, 1, 2), (3, 2, 5), (4, 2, 3);", + "sql": "SELECT manufacturer_id, SUM(violation_count) FROM labor_reports GROUP BY manufacturer_id;", + "sql_explanation": "The SQL query calculates the total number of labor violations for each manufacturer using the SUM function and GROUP BY clause." +}, { + "id": "4075", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average production cost of ethical garments, grouped by country.", + "sql_context": "CREATE TABLE country_cost (id INT, country VARCHAR(255), garment_type VARCHAR(255), production_cost DECIMAL(10,2));", + "sql": "SELECT country, AVG(production_cost) AS avg_cost FROM country_cost GROUP BY country;", + "sql_explanation": "This query finds the average production cost of ethical garments, grouped by country. It does this by using the GROUP BY clause to group the results by country, and the AVG() function to calculate the average production cost for each country." +}, { + "id": "4382", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average consumer awareness score for each country?", + "sql_context": "CREATE TABLE consumer_awareness (awareness_id INT, consumer_id INT, country VARCHAR(50), awareness_score INT); INSERT INTO consumer_awareness (awareness_id, consumer_id, country, awareness_score) VALUES (1, 1001, \u0027US\u0027, 8), (2, 1002, \u0027CA\u0027, 9), (3, 1003, \u0027MX\u0027, 7);", + "sql": "SELECT country, AVG(awareness_score) FROM consumer_awareness GROUP BY country;", + "sql_explanation": "This query calculates the average consumer awareness score for each country by summing all the awareness_score values for each country and dividing by the count of scores." +}, { + "id": "4477", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue for each sales region, by month?", + "sql_context": "CREATE TABLE sales_region (id INT, region VARCHAR(255), year INT, month INT, revenue FLOAT); INSERT INTO sales_region (id, region, year, month, revenue) VALUES (1, \u0027North\u0027, 2022, 1, 500), (2, \u0027South\u0027, 2022, 1, 300), (3, \u0027East\u0027, 2022, 1, 700), (4, \u0027West\u0027, 2022, 1, 400), (1, \u0027North\u0027, 2022, 2, 600), (2, \u0027South\u0027, 2022, 2, 400), (3, \u0027East\u0027, 2022, 2, 800), (4, \u0027West\u0027, 2022, 2, 500);", + "sql": "SELECT region, month, SUM(revenue) FROM sales_region GROUP BY region, month;", + "sql_explanation": "This query calculates the total revenue for each sales region, by month, by grouping the sales_region table by the \u0027region\u0027 and \u0027month\u0027 columns and summing the \u0027revenue\u0027 for each group." +}, { + "id": "4598", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average consumer awareness score for each country in the ethical fashion industry?", + "sql_context": "CREATE TABLE consumer_awareness_scores (country VARCHAR(50), score INT); INSERT INTO consumer_awareness_scores (country, score) VALUES (\u0027Brazil\u0027, 75), (\u0027Colombia\u0027, 80), (\u0027Ecuador\u0027, 85), (\u0027Peru\u0027, 90), (\u0027Venezuela\u0027, 70);", + "sql": "SELECT country, AVG(score) FROM consumer_awareness_scores GROUP BY country;", + "sql_explanation": "This SQL query calculates the average consumer awareness score for each country in the ethical fashion industry by selecting the \u0027country\u0027 and \u0027score\u0027 columns in the \u0027consumer_awareness_scores\u0027 table and grouping the results by the \u0027country\u0027 column, then calculating the average score for each group." +}, { + "id": "4638", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total CO2 emission from transportation for each country in the ethical fashion supply chain?", + "sql_context": "CREATE TABLE transportation (id INT, country VARCHAR(255), co2_emission_kg INT, orders INT); INSERT INTO transportation VALUES (1, \u0027Italy\u0027, 150, 2000), (2, \u0027Germany\u0027, 120, 1500), (3, \u0027Italy\u0027, 180, 3000), (4, \u0027Brazil\u0027, 80, 1000);", + "sql": "SELECT country, SUM(co2_emission_kg) FROM transportation GROUP BY country;", + "sql_explanation": "This query calculates the total CO2 emission from transportation for each country in the ethical fashion supply chain in the transportation table. It groups the records by country and applies the SUM function to the co2_emission_kg column for each group." +}, { + "id": "4671", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many labor rights violations have been reported for each country in the ethical fashion industry?", + "sql_context": "CREATE TABLE labor_rights_violations (country VARCHAR(50), violations INT); INSERT INTO labor_rights_violations (country, violations) VALUES (\u0027Bangladesh\u0027, 200), (\u0027Cambodia\u0027, 150), (\u0027India\u0027, 250);", + "sql": "SELECT country, violations FROM labor_rights_violations GROUP BY country;", + "sql_explanation": "This SQL query groups labor rights violations by country in the ethical fashion industry by selecting the \u0027country\u0027 and \u0027violations\u0027 columns in the \u0027labor_rights_violations\u0027 table and grouping the results by the \u0027country\u0027 column." +}, { + "id": "5019", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many ethical fashion brands are located in each country?", + "sql_context": "CREATE TABLE Brands (BrandID INT, Country VARCHAR(50)); INSERT INTO Brands (BrandID, Country) VALUES (1, \u0027USA\u0027), (2, \u0027India\u0027), (3, \u0027Bangladesh\u0027), (4, \u0027USA\u0027), (5, \u0027France\u0027);", + "sql": "SELECT Country, COUNT(*) AS NumBrands FROM Brands GROUP BY Country;", + "sql_explanation": "The SQL query calculates the number of ethical fashion brands located in each country by grouping the Brands table by Country and applying the COUNT function." +}, { + "id": "5337", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of items produced in a given year?", + "sql_context": "CREATE TABLE ItemsProduced (id INT, year INT, quantity INT); INSERT INTO ItemsProduced (id, year, quantity) VALUES (1, 2021, 1000), (2, 2021, 1500), (3, 2022, 1200), (4, 2022, 1800), (5, 2023, 2000);", + "sql": "SELECT year, SUM(quantity) FROM ItemsProduced GROUP BY year;", + "sql_explanation": "This SQL query calculates the total quantity of items produced in a given year by using the SUM function on the quantity column and grouping the results by year in the ItemsProduced table." +}, { + "id": "5390", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average water usage for each type of fabric?", + "sql_context": "CREATE TABLE WaterUsage (id INT, fabric VARCHAR(50), usage DECIMAL); INSERT INTO WaterUsage (id, fabric, usage) VALUES (1, \u0027Organic Cotton\u0027, 10.0), (2, \u0027Recycled Polyester\u0027, 1.5), (3, \u0027Hemp\u0027, 2.5), (4, \u0027Tencel\u0027, 7.5);", + "sql": "SELECT fabric, AVG(usage) FROM WaterUsage GROUP BY fabric;", + "sql_explanation": "This SQL query calculates the average water usage for each type of fabric by using the AVG function on the usage column and grouping the results by fabric in the WaterUsage table." +}, { + "id": "5414", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average salary of workers in each country?", + "sql_context": "CREATE TABLE workers (id INT, country VARCHAR(255), salary FLOAT);", + "sql": "SELECT country, AVG(salary) FROM workers GROUP BY country;", + "sql_explanation": "The SQL query selects the country and average of salary from the workers table, grouped by country. This gives the average salary of workers in each country." +}, { + "id": "1611", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average gameplay duration for players from Japan, India, and Brazil, partitioned by platform?", + "sql_context": "CREATE TABLE Players (PlayerID INT, GameDuration FLOAT, Country VARCHAR(50), Platform VARCHAR(50)); INSERT INTO Players (PlayerID, GameDuration, Country, Platform) VALUES (1, 567.8, \u0027Japan\u0027, \u0027PC\u0027), (2, 678.9, \u0027India\u0027, \u0027Console\u0027), (3, 345.1, \u0027Brazil\u0027, \u0027Mobile\u0027);", + "sql": "SELECT AVG(GameDuration) AS AvgGameDuration, Platform FROM Players WHERE Country IN (\u0027Japan\u0027, \u0027India\u0027, \u0027Brazil\u0027) GROUP BY Platform, Country WITH ROLLUP;", + "sql_explanation": "The query calculates the average game duration for players from Japan, India, and Brazil, grouped by platform. The ROLLUP keyword is used to include a grand total row for all platforms." +}, { + "id": "1834", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all the virtual reality games that have more than 500 hours of playtime and their total playtime, ordered by the total playtime in descending order.", + "sql_context": "CREATE TABLE games (id INT, name VARCHAR(255), type VARCHAR(255), playtime INT); INSERT INTO games (id, name, type, playtime) VALUES (1, \u0027Game1\u0027, \u0027VR\u0027, 600), (2, \u0027Game2\u0027, \u0027Non-VR\u0027, 300), (3, \u0027Game3\u0027, \u0027VR\u0027, 1000);", + "sql": "SELECT name, SUM(playtime) as total_playtime FROM games WHERE type \u003d \u0027VR\u0027 GROUP BY name HAVING SUM(playtime) \u003e 500 ORDER BY total_playtime DESC;", + "sql_explanation": "List all the virtual reality games that have more than 500 hours of playtime and their total playtime, ordered by the total playtime in descending order." +}, { + "id": "1989", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the player counts and average scores for each game released in 2020?", + "sql_context": "CREATE TABLE GameSessions (SessionID int, GameName varchar(50), PlayerCount int, ReleaseYear int, AvgScore int); INSERT INTO GameSessions (SessionID, GameName, PlayerCount, ReleaseYear, AvgScore) VALUES (3, \u0027GameG\u0027, 120, 2020, 88); INSERT INTO GameSessions (SessionID, GameName, PlayerCount, ReleaseYear, AvgScore) VALUES (4, \u0027GameH\u0027, 180, 2019, 90);", + "sql": "SELECT GameName, SUM(PlayerCount) as TotalPlayers, AVG(AvgScore) as AvgScore FROM GameSessions WHERE ReleaseYear \u003d 2020 GROUP BY GameName;", + "sql_explanation": "This query calculates the player counts and average scores for each game released in 2020. It filters the GameSessions table for games released in 2020 and then groups the results by GameName. The SUM() function calculates the total player count, and the AVG() function calculates the average score." +}, { + "id": "2009", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 3 players with the highest number of wins in the \u0027FPS\u0027 category?", + "sql_context": "CREATE TABLE PlayerPerformance (PlayerID int, PlayerName varchar(50), Game varchar(50), Wins int); INSERT INTO PlayerPerformance (PlayerID, PlayerName, Game, Wins) VALUES (1, \u0027Player1\u0027, \u0027GameA\u0027, 50), (2, \u0027Player2\u0027, \u0027GameB\u0027, 30), (3, \u0027Player3\u0027, \u0027GameA\u0027, 70), (4, \u0027Player4\u0027, \u0027GameC\u0027, 40), (5, \u0027Player5\u0027, \u0027GameA\u0027, 60), (6, \u0027Player1\u0027, \u0027GameB\u0027, 20), (7, \u0027Player2\u0027, \u0027GameA\u0027, 40);", + "sql": "SELECT PlayerName, SUM(Wins) as TotalWins FROM PlayerPerformance WHERE Game \u003d \u0027GameA\u0027 GROUP BY PlayerName ORDER BY TotalWins DESC LIMIT 3;", + "sql_explanation": "This query identifies the top 3 players with the highest number of wins in the \u0027FPS\u0027 category (GameA). It uses the WHERE clause to filter the records for GameA, the GROUP BY clause to group the records by player name, the SUM function to calculate the total number of wins for each player, and the ORDER BY clause with LIMIT to return only the top 3 players with the highest number of wins." +}, { + "id": "2068", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the number of active users in each country in the last month?", + "sql_context": "CREATE TABLE users (id INT, name VARCHAR(100), country VARCHAR(100), last_login DATE); INSERT INTO users (id, name, country, last_login) VALUES (1, \u0027John Doe\u0027, \u0027USA\u0027, \u00272022-02-15\u0027), (2, \u0027Jane Smith\u0027, \u0027USA\u0027, \u00272022-03-15\u0027), (3, \u0027Alex Brown\u0027, \u0027Canada\u0027, \u00272022-03-16\u0027);", + "sql": "SELECT country, COUNT(DISTINCT id) as active_users FROM users WHERE last_login \u003e\u003d (CURRENT_DATE - INTERVAL \u00271 month\u0027) GROUP BY country;", + "sql_explanation": "Counting the number of active users in each country in the last month, based on the last_login date." +}, { + "id": "2222", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 5 players with the highest level in the \u0027MMORPG\u0027 category?", + "sql_context": "CREATE TABLE PlayerLevel (PlayerID int, PlayerName varchar(50), Game varchar(50), Level int); INSERT INTO PlayerLevel (PlayerID, PlayerName, Game, Level) VALUES (1, \u0027Player1\u0027, \u0027GameA\u0027, 60), (2, \u0027Player2\u0027, \u0027GameB\u0027, 75), (3, \u0027Player3\u0027, \u0027GameA\u0027, 85), (4, \u0027Player4\u0027, \u0027GameC\u0027, 65), (5, \u0027Player5\u0027, \u0027GameA\u0027, 90), (6, \u0027Player1\u0027, \u0027GameB\u0027, 80), (7, \u0027Player2\u0027, \u0027GameA\u0027, 70);", + "sql": "SELECT PlayerName, AVG(Level) as AvgLevel FROM PlayerLevel WHERE Game \u003d \u0027GameA\u0027 GROUP BY PlayerName ORDER BY AvgLevel DESC LIMIT 5;", + "sql_explanation": "This query identifies the top 5 players with the highest level in the \u0027MMORPG\u0027 category (GameA). It uses the WHERE clause to filter the records for GameA, the GROUP BY clause to group the records by player name, the AVG function to calculate the average level for each player, and the ORDER BY clause with LIMIT to return only the top 5 players with the highest level." +}, { + "id": "2597", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many players in each country are part of the \"GlobalGamersCommunity\"?", + "sql_context": "CREATE TABLE Players (PlayerID INT PRIMARY KEY, Name VARCHAR(50), GamingCommunity VARCHAR(50), Country VARCHAR(50)); INSERT INTO Players (PlayerID, Name, GamingCommunity, Country) VALUES (1, \u0027Fatima Khan\u0027, \u0027GlobalGamersCommunity\u0027, \u0027Pakistan\u0027), (2, \u0027Ella Johnson\u0027, \u0027GlobalGamersCommunity\u0027, \u0027Australia\u0027), (3, \u0027Jaime Lopez\u0027, \u0027GlobalGamersCommunity\u0027, \u0027Brazil\u0027), (4, \u0027Nguyen Hoang\u0027, \u0027GlobalGamersCommunity\u0027, \u0027Vietnam\u0027);", + "sql": "SELECT Country, COUNT(DISTINCT PlayerID) FROM Players WHERE GamingCommunity \u003d \u0027GlobalGamersCommunity\u0027 GROUP BY Country;", + "sql_explanation": "Count the number of players in each country who are part of the \u0027GlobalGamersCommunity\u0027." +}, { + "id": "2605", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 3 players with the highest number of wins in the \u0027multiplayer_games\u0027 table.", + "sql_context": "CREATE TABLE multiplayer_games (player_id INT, player_name TEXT, wins INT);", + "sql": "SELECT player_name, SUM(wins) as total_wins FROM multiplayer_games GROUP BY player_id ORDER BY total_wins DESC LIMIT 3;", + "sql_explanation": "This query finds the top 3 players with the highest number of wins in the \u0027multiplayer_games\u0027 table. It does this by joining the \u0027multiplayer_games\u0027 and \u0027players\u0027 tables, grouping the results by \u0027player_id\u0027, and ordering them by the sum of \u0027wins\u0027 column value in descending order. The \u0027LIMIT 3\u0027 clause limits the results to 3 rows." +}, { + "id": "2856", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which players have achieved the highest scores in the \u0027player_scores\u0027 table of the \u0027gaming\u0027 database?", + "sql_context": "CREATE TABLE player_scores (player_id INT, game_id INT, score INT); INSERT INTO player_scores (player_id, game_id, score) VALUES (1, 1, 1000), (2, 1, 1200), (3, 2, 1500), (4, 2, 1300), (5, 3, 1100), (6, 1, 1400), (6, 2, 1600), (6, 3, 1700);", + "sql": "SELECT player_id, MAX(score) as highest_score FROM player_scores GROUP BY player_id ORDER BY highest_score DESC;", + "sql_explanation": "This query identifies the players who have achieved the highest scores by grouping the records based on the player_id and finding the maximum score value for each group. It then orders the groups based on the highest_score value in descending order to identify the players with the highest scores." +}, { + "id": "3413", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total duration played for each player?", + "sql_context": "CREATE TABLE PlayerGame (PlayerID INT, GameID INT, Played DATE, StartTime TIMESTAMP, EndTime TIMESTAMP); INSERT INTO PlayerGame (PlayerID, GameID, Played, StartTime, EndTime) VALUES (1, 1, \u00272022-01-01\u0027, \u00272022-01-01 10:00:00\u0027, \u00272022-01-01 12:00:00\u0027), (2, 2, \u00272022-01-02\u0027, \u00272022-01-02 14:00:00\u0027, \u00272022-01-02 16:00:00\u0027), (3, 1, \u00272022-01-03\u0027, \u00272022-01-03 10:00:00\u0027, \u00272022-01-03 11:00:00\u0027), (4, 3, \u00272022-01-04\u0027, \u00272022-01-04 18:00:00\u0027, \u00272022-01-04 19:00:00\u0027);", + "sql": "SELECT PlayerID, SUM(TIMESTAMPDIFF(MINUTE, StartTime, EndTime)) FROM PlayerGame GROUP BY PlayerID;", + "sql_explanation": "This query calculates the total duration played for each player in minutes. It groups the records in the PlayerGame table by the PlayerID column, calculates the difference between the end time and start time using the TIMESTAMPDIFF function, and applies the SUM function to this difference." +}, { + "id": "3878", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get the total revenue from \u0027InGamePurchases\u0027 table for each game", + "sql_context": "CREATE TABLE InGamePurchases (GameID INT, GameName VARCHAR(50), PurchaseAmount DECIMAL(10,2));", + "sql": "SELECT GameID, SUM(PurchaseAmount) as TotalRevenue FROM InGamePurchases GROUP BY GameID;", + "sql_explanation": "The SQL query calculates the total revenue for each game by grouping the InGamePurchases table by GameID and summing the PurchaseAmount column." +}, { + "id": "4105", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total playtime, in hours, for each game, broken down by genre?", + "sql_context": "CREATE TABLE GamePlaytime (PlayerID INT, PlayerName TEXT, Game TEXT, Genre TEXT, Playtime INT); INSERT INTO GamePlaytime (PlayerID, PlayerName, Game, Genre, Playtime) VALUES (1, \u0027John Doe\u0027, \u0027Game A\u0027, \u0027Shooter\u0027, 50), (2, \u0027Jane Smith\u0027, \u0027Game B\u0027, \u0027Strategy\u0027, 75), (3, \u0027Bob Johnson\u0027, \u0027Game C\u0027, \u0027Shooter\u0027, 100), (4, \u0027Alice Williams\u0027, \u0027Game D\u0027, \u0027Role-playing\u0027, 30), (5, \u0027Charlie Brown\u0027, \u0027Game A\u0027, \u0027Shooter\u0027, 25);", + "sql": "SELECT Genre, SUM(Playtime / 60) AS TotalPlaytime FROM GamePlaytime GROUP BY Genre;", + "sql_explanation": "This SQL query calculates the total playtime, in hours, for each game, broken down by genre. It uses the SUM() function to find the total playtime for each genre and the GROUP BY clause to group the results by genre. The query divides the playtime by 60 to convert it from minutes to hours." +}, { + "id": "4585", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue for each game category?", + "sql_context": "CREATE TABLE Games (GameID int, GameName varchar(50), Category varchar(50), Revenue int); INSERT INTO Games (GameID, GameName, Category, Revenue) VALUES (1, \u0027Game1\u0027, \u0027Action\u0027, 500000), (2, \u0027Game2\u0027, \u0027RPG\u0027, 700000), (3, \u0027Game3\u0027, \u0027Strategy\u0027, 600000);", + "sql": "SELECT Category, SUM(Revenue) as TotalRevenue FROM Games GROUP BY Category;", + "sql_explanation": "The SQL query calculates the total revenue for each game category by grouping the Games table by the Category column and then summing up the Revenue column values within each group." +}, { + "id": "4830", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get the total number of wins for each game in the \u0027PlayerGames\u0027 table", + "sql_context": "CREATE TABLE PlayerGames (PlayerID INT, GameID INT, GameName VARCHAR(50), Win BIT);", + "sql": "SELECT GameID, SUM(Win) as TotalWins FROM PlayerGames GROUP BY GameID;", + "sql_explanation": "The SQL query calculates the total number of wins for each game by grouping the PlayerGames table by GameID and summing the Win column." +}, { + "id": "4908", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum score for each game and the player who achieved it?", + "sql_context": "CREATE TABLE PlayerScore (PlayerID INT, GameID INT, Genre VARCHAR(50), Score INT); INSERT INTO PlayerScore (PlayerID, GameID, Genre, Score) VALUES (1, 1, \u0027Shooter\u0027, 100), (2, 2, \u0027Strategy\u0027, 200), (3, 1, \u0027Shooter\u0027, 150), (4, 3, \u0027RPG\u0027, 300), (5, 2, \u0027Strategy\u0027, 250), (6, 1, \u0027Shooter\u0027, 300), (7, 2, \u0027Strategy\u0027, 350);", + "sql": "SELECT PlayerID, GameID, MAX(Score) FROM PlayerScore GROUP BY GameID;", + "sql_explanation": "This query calculates the maximum score for each game and the player who achieved it. It groups the records in the PlayerScore table by the GameID column, and applies the MAX function to the Score column." +}, { + "id": "4949", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum playtime for players from each country?", + "sql_context": "CREATE TABLE PlayerPlaytimes (PlayerID int, Country varchar(50), Playtime int); INSERT INTO PlayerPlaytimes (PlayerID, Country, Playtime) VALUES (1, \u0027USA\u0027, 200), (2, \u0027Canada\u0027, 150), (3, \u0027Australia\u0027, 250), (4, \u0027England\u0027, 300), (5, \u0027USA\u0027, 350), (6, \u0027Canada\u0027, 400), (7, \u0027Australia\u0027, 450), (8, \u0027England\u0027, 500);", + "sql": "SELECT Country, MIN(Playtime) FROM PlayerPlaytimes GROUP BY Country;", + "sql_explanation": "This query calculates the minimum playtime for players from each country by using the GROUP BY clause to group the rows based on the Country column, and the MIN function to retrieve the minimum value of the Playtime column for each group." +}, { + "id": "5075", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total playtime for each game in the \u0027games_v2\u0027 table?", + "sql_context": "CREATE TABLE games_v2 (game_id INT, game_name VARCHAR(50), playtime INT); INSERT INTO games_v2 (game_id, game_name, playtime) VALUES (4, \u0027GameD\u0027, 4000), (5, \u0027GameE\u0027, 5000);", + "sql": "SELECT game_name, SUM(playtime) FROM games_v2 GROUP BY game_name;", + "sql_explanation": "This query calculates the total playtime for each game in the \u0027games_v2\u0027 table. It does so by grouping by the \u0027game_name\u0027 column and then summing the \u0027playtime\u0027 column for each group." +}, { + "id": "54", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of female, male, and non-binary employees in the Sales department?", + "sql_context": "CREATE TABLE EmployeeDemographics (EmployeeID int, Gender varchar(10), Department varchar(20)); INSERT INTO EmployeeDemographics (EmployeeID, Gender, Department) VALUES (1, \u0027Female\u0027, \u0027Engineering\u0027), (2, \u0027Male\u0027, \u0027IT\u0027), (3, \u0027Non-binary\u0027, \u0027Engineering\u0027), (4, \u0027Female\u0027, \u0027Sales\u0027), (5, \u0027Male\u0027, \u0027Sales\u0027), (6, \u0027Female\u0027, \u0027Sales\u0027);", + "sql": "SELECT Department, ROUND(COUNT(CASE WHEN Gender \u003d \u0027Female\u0027 THEN 1 END) * 100.0 / COUNT(*), 1) AS FemalePercentage, ROUND(COUNT(CASE WHEN Gender \u003d \u0027Male\u0027 THEN 1 END) * 100.0 / COUNT(*), 1) AS MalePercentage, ROUND(COUNT(CASE WHEN Gender \u003d \u0027Non-binary\u0027 THEN 1 END) * 100.0 / COUNT(*), 1) AS NonBinaryPercentage FROM EmployeeDemographics GROUP BY Department;", + "sql_explanation": "The SQL query uses conditional aggregation to calculate the percentage of female, male, and non-binary employees in the Sales department." +}, { + "id": "68", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the count of total applicants, interviewed applicants, and hired applicants by job position for the Engineering department?", + "sql_context": "CREATE TABLE ApplicantData (ApplicantID int, JobPosition varchar(20), ApplicantType varchar(10), Department varchar(20)); INSERT INTO ApplicantData (ApplicantID, JobPosition, ApplicantType, Department) VALUES (1, \u0027Sales Representative\u0027, \u0027Applicant\u0027, \u0027Sales\u0027), (2, \u0027Sales Representative\u0027, \u0027Interviewed\u0027, \u0027Sales\u0027), (3, \u0027Sales Manager\u0027, \u0027Hired\u0027, \u0027Sales\u0027), (4, \u0027Software Engineer\u0027, \u0027Applicant\u0027, \u0027Engineering\u0027), (5, \u0027Software Engineer\u0027, \u0027Interviewed\u0027, \u0027Engineering\u0027), (6, \u0027Software Engineer\u0027, \u0027Hired\u0027, \u0027Engineering\u0027);", + "sql": "SELECT JobPosition, COUNT(CASE WHEN ApplicantType \u003d \u0027Applicant\u0027 THEN 1 END) AS TotalApplicants, COUNT(CASE WHEN ApplicantType \u003d \u0027Interviewed\u0027 THEN 1 END) AS InterviewedApplicants, COUNT(CASE WHEN ApplicantType \u003d \u0027Hired\u0027 THEN 1 END) AS HiredApplicants FROM ApplicantData WHERE Department \u003d \u0027Engineering\u0027 GROUP BY JobPosition;", + "sql_explanation": "The SQL query uses conditional aggregation to calculate the count of total applicants, interviewed applicants, and hired applicants by job position for the Engineering department." +}, { + "id": "129", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many diversity and inclusion training sessions were conducted in the last 6 months, and the number of participants?", + "sql_context": "CREATE TABLE TrainingSessions (SessionID INT, SessionDate DATE, TrainingType VARCHAR(255), Participants INT); INSERT INTO TrainingSessions (SessionID, SessionDate, TrainingType, Participants) VALUES (1, \u00272022-01-01\u0027, \u0027Diversity and Inclusion\u0027, 20), (2, \u00272022-04-15\u0027, \u0027Diversity and Inclusion\u0027, 25), (3, \u00272022-02-01\u0027, \u0027Diversity and Inclusion\u0027, 30);", + "sql": "SELECT DATEPART(MONTH, SessionDate) AS Month, TrainingType, COUNT(*) AS Sessions, SUM(Participants) AS Participants_Total FROM TrainingSessions WHERE SessionDate \u003e\u003d DATEADD(MONTH, -6, GETDATE()) AND TrainingType \u003d \u0027Diversity and Inclusion\u0027 GROUP BY DATEPART(MONTH, SessionDate), TrainingType;", + "sql_explanation": "The SQL query calculates the number of diversity and inclusion training sessions conducted in the last 6 months and the total number of participants by using the COUNT and SUM functions on the asterisk (*) and grouping by the DATEPART(MONTH, SessionDate) and TrainingType columns. The WHERE clause filters the records based on the TrainingType being \u0027Diversity and Inclusion\u0027 and the SessionDate being within the last 6 months." +}, { + "id": "429", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the difference in average salary between employees who have and have not completed diversity and inclusion training, by job title and region?", + "sql_context": "CREATE TABLE Employees (EmployeeID INT, Gender VARCHAR(10), JobTitle VARCHAR(50), Region VARCHAR(50), Salary INT, CompletedDiversityTraining BOOLEAN); INSERT INTO Employees (EmployeeID, Gender, JobTitle, Region, Salary, CompletedDiversityTraining) VALUES (1, \u0027Male\u0027, \u0027Manager\u0027, \u0027North\u0027, 70000, TRUE), (2, \u0027Female\u0027, \u0027Manager\u0027, \u0027South\u0027, 65000, FALSE), (3, \u0027Male\u0027, \u0027Developer\u0027, \u0027East\u0027, 60000, TRUE), (4, \u0027Female\u0027, \u0027Developer\u0027, \u0027West\u0027, 62000, FALSE);", + "sql": "SELECT Region, JobTitle, AVG(CASE WHEN CompletedDiversityTraining THEN Salary ELSE NULL END) - AVG(CASE WHEN NOT CompletedDiversityTraining THEN Salary ELSE NULL END) AS Salary_Difference FROM Employees GROUP BY Region, JobTitle;", + "sql_explanation": "The SQL query calculates the difference in average salary between employees who have and have not completed diversity and inclusion training, by job title and region. It uses the AVG function along with a CASE statement to calculate the average salary for employees who have and have not completed the training." +}, { + "id": "491", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 3 cities with the highest number of female candidates interviewed for a job in the past year, including their respective job titles and the number of candidates interviewed?", + "sql_context": "CREATE TABLE Interviews (InterviewID int, InterviewDate date, CandidateName varchar(50), CandidateGender varchar(10), JobTitle varchar(50), City varchar(50)); INSERT INTO Interviews (InterviewID, InterviewDate, CandidateName, CandidateGender, JobTitle, City) VALUES (1, \u00272022-01-01\u0027, \u0027Alex Brown\u0027, \u0027Female\u0027, \u0027Software Engineer\u0027, \u0027New York\u0027), (2, \u00272022-01-02\u0027, \u0027Taylor Green\u0027, \u0027Female\u0027, \u0027Data Analyst\u0027, \u0027Los Angeles\u0027), (3, \u00272022-01-03\u0027, \u0027Jessica White\u0027, \u0027Female\u0027, \u0027Software Engineer\u0027, \u0027New York\u0027), (4, \u00272022-01-04\u0027, \u0027Brittany Black\u0027, \u0027Female\u0027, \u0027Data Scientist\u0027, \u0027San Francisco\u0027), (5, \u00272022-01-05\u0027, \u0027Kim Harris\u0027, \u0027Female\u0027, \u0027Software Engineer\u0027, \u0027Los Angeles\u0027), (6, \u00272022-01-06\u0027, \u0027Amanda Wilson\u0027, \u0027Female\u0027, \u0027Data Analyst\u0027, \u0027Los Angeles\u0027);", + "sql": "SELECT City, JobTitle, COUNT(*) AS num_candidates FROM Interviews WHERE CandidateGender \u003d \u0027Female\u0027 AND InterviewDate \u003e\u003d DATEADD(year, -1, GETDATE()) GROUP BY City, JobTitle ORDER BY num_candidates DESC, City DESC LIMIT 3;", + "sql_explanation": "The query retrieves the top 3 cities with the highest number of female candidates interviewed for a job in the past year, along with their respective job titles and the number of candidates interviewed. It filters the data to only include female candidates interviewed in the past year, then groups the data by city and job title, calculating the number of candidates interviewed. Finally, it orders the results by the number of candidates interviewed in descending order and limits the results to the top 3 cities." +}, { + "id": "911", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of new hires in each quarter of the last year?", + "sql_context": "CREATE TABLE Employees (EmployeeID INT, HireDate DATE);", + "sql": "SELECT DATEPART(QUARTER, HireDate) as Quarter, COUNT(*) as NewHires FROM Employees WHERE HireDate BETWEEN DATEADD(YEAR, -1, GETDATE()) AND GETDATE() GROUP BY DATEPART(QUARTER, HireDate);", + "sql_explanation": "This query counts the number of new hires in each quarter of the last year. It filters the Employees table for records with hire dates in the last year using the HireDate column and calculates the count of new hires for each quarter using the COUNT() function, grouping results by the quarter of the hire date." +}, { + "id": "940", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the percentage of employees in each department who are recent college graduates.", + "sql_context": "CREATE TABLE Employees (EmployeeID int, Department varchar(20), Degree varchar(50), GraduationYear int); INSERT INTO Employees (EmployeeID, Department, Degree, GraduationYear) VALUES (1, \u0027IT\u0027, \u0027BS in Computer Science\u0027, 2021), (2, \u0027IT\u0027, \u0027MS in Computer Science\u0027, 2019), (3, \u0027Sales\u0027, \u0027BA in Marketing\u0027, 2018), (4, \u0027Sales\u0027, \u0027BS in Business Administration\u0027, 2020), (5, \u0027Sales\u0027, \u0027BA in Marketing\u0027, 2021);", + "sql": "SELECT e.Department, ROUND(COUNT(CASE WHEN e.GraduationYear \u003e\u003d YEAR(CURRENT_DATE) - 2 THEN 1 END) * 100.0 / COUNT(*), 1) AS Percent_Recent_Grads FROM Employees e GROUP BY e.Department;", + "sql_explanation": "The SQL query uses conditional aggregation to calculate the percentage of employees in each department who are recent college graduates. It returns the results rounded to one decimal place." +}, { + "id": "1048", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of job applicants per source, by employee\u0027s region, for the year 2022?", + "sql_context": "CREATE TABLE job_applications (id INT, applicant_name VARCHAR(50), department VARCHAR(50), application_source VARCHAR(50), application_date DATE, applicant_region VARCHAR(50)); INSERT INTO job_applications (id, applicant_name, department, application_source, application_date, applicant_region) VALUES (1, \u0027Jane Doe\u0027, \u0027IT\u0027, \u0027LinkedIn\u0027, \u00272022-02-12\u0027, \u0027Northeast\u0027); INSERT INTO job_applications (id, applicant_name, department, application_source, application_date, applicant_region) VALUES (2, \u0027Bob Smith\u0027, \u0027HR\u0027, \u0027Indeed\u0027, \u00272022-05-04\u0027, \u0027Midwest\u0027);", + "sql": "SELECT applicant_region, application_source, COUNT(*) as total_applicants FROM job_applications WHERE YEAR(application_date) \u003d 2022 GROUP BY applicant_region, application_source;", + "sql_explanation": "This query groups the job applications by their applicant\u0027s region and application source in the year 2022. Then, it counts the total number of job applicants per source for each applicant\u0027s region." +}, { + "id": "1367", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of employees who have completed diversity and inclusion training in each department?", + "sql_context": "CREATE TABLE EmployeeTraining(EmployeeID INT, Department VARCHAR(255), TrainingType VARCHAR(255), CompletionDate DATE);", + "sql": "SELECT Department, (COUNT(CASE WHEN TrainingType \u003d \u0027Diversity and Inclusion\u0027 THEN 1 END) / COUNT(*)) * 100 AS Percentage FROM EmployeeTraining GROUP BY Department;", + "sql_explanation": "The SQL query calculates the percentage of employees who have completed diversity and inclusion training in each department by filtering the EmployeeTraining table based on the TrainingType column and then computing the percentage using the COUNT function with a CASE statement and grouping by the Department column." +}, { + "id": "1539", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of employees hired in each month of 2022?", + "sql_context": "CREATE TABLE Employees (id INT, name VARCHAR(50), department VARCHAR(50), hire_date DATE); INSERT INTO Employees (id, name, department, hire_date) VALUES (1, \u0027Jamal Thompson\u0027, \u0027Engineering\u0027, \u00272022-01-15\u0027); INSERT INTO Employees (id, name, department, hire_date) VALUES (2, \u0027Sophia Garcia\u0027, \u0027HR\u0027, \u00272022-03-20\u0027); INSERT INTO Employees (id, name, department, hire_date) VALUES (3, \u0027Hassan Patel\u0027, \u0027Engineering\u0027, \u00272022-06-10\u0027);", + "sql": "SELECT EXTRACT(MONTH FROM hire_date) AS month, COUNT(*) AS total_hired FROM Employees WHERE hire_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-12-31\u0027 GROUP BY month;", + "sql_explanation": "This query extracts the month from the hire_date field, groups the records by the extracted month, and counts the number of records for each month to get the total number of employees hired in each month of 2022." +}, { + "id": "1585", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of job applicants by job category, for the last 3 months?", + "sql_context": "CREATE TABLE JobApplications (ApplicationID INT, ApplicantID INT, JobCategory VARCHAR(50), ApplicationDate DATE);", + "sql": "SELECT JobCategory, COUNT(DISTINCT ApplicantID) FROM JobApplications WHERE ApplicationDate \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 3 MONTH) GROUP BY JobCategory;", + "sql_explanation": "This query selects the number of job applicants by job category for the last 3 months. It filters the records based on the application date and groups the results by job category." +}, { + "id": "2003", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many female employees have been hired in each department since 2018?", + "sql_context": "CREATE TABLE Employees (EmployeeID INT, Gender VARCHAR(10), HireDate DATE, Department VARCHAR(50)); INSERT INTO Employees (EmployeeID, Gender, HireDate, Department) VALUES (1, \u0027Male\u0027, \u00272020-01-01\u0027, \u0027HR\u0027); INSERT INTO Employees (EmployeeID, Gender, HireDate, Department) VALUES (2, \u0027Female\u0027, \u00272019-01-01\u0027, \u0027IT\u0027); INSERT INTO Employees (EmployeeID, Gender, HireDate, Department) VALUES (3, \u0027Male\u0027, \u00272020-05-01\u0027, \u0027IT\u0027); INSERT INTO Employees (EmployeeID, Gender, HireDate, Department) VALUES (4, \u0027Female\u0027, \u00272018-01-01\u0027, \u0027Finance\u0027);", + "sql": "SELECT Department, COUNT(*) as Num_Female_Employees FROM Employees WHERE Gender \u003d \u0027Female\u0027 AND YEAR(HireDate) \u003e\u003d 2018 GROUP BY Department;", + "sql_explanation": "This query calculates the number of female employees hired in each department since 2018. It first filters the data based on gender and hire date, then counts the number of female employees for each department using the COUNT function and groups the data by department." +}, { + "id": "2939", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the average salary for employees, by department, and sort the results by the average salary in ascending order", + "sql_context": "CREATE TABLE Employees (EmployeeID INT, FirstName VARCHAR(50), LastName VARCHAR(50), Department VARCHAR(50), Salary DECIMAL(10,2)); INSERT INTO Employees (EmployeeID, FirstName, LastName, Department, Salary) VALUES (1, \u0027John\u0027, \u0027Doe\u0027, \u0027Engineering\u0027, 50000.00); INSERT INTO Employees (EmployeeID, FirstName, LastName, Department, Salary) VALUES (2, \u0027Jane\u0027, \u0027Doe\u0027, \u0027Marketing\u0027, 60000.00);", + "sql": "SELECT Department, AVG(Salary) as AverageSalary FROM Employees GROUP BY Department ORDER BY AverageSalary ASC;", + "sql_explanation": "This query groups employees by their department and calculates the average salary for each group. It then orders the groups by the average salary in ascending order." +}, { + "id": "2975", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of employees by job role and hire date, in descending order by hire date?", + "sql_context": "CREATE TABLE Employees (EmployeeID int, FirstName varchar(50), LastName varchar(50), JobRole varchar(50), HireDate date); INSERT INTO Employees (EmployeeID, FirstName, LastName, JobRole, HireDate) VALUES (1, \u0027John\u0027, \u0027Doe\u0027, \u0027Software Engineer\u0027, \u00272020-01-01\u0027); INSERT INTO Employees (EmployeeID, FirstName, LastName, JobRole, HireDate) VALUES (2, \u0027Jane\u0027, \u0027Smith\u0027, \u0027HR Manager\u0027, \u00272020-02-01\u0027);", + "sql": "SELECT JobRole, HireDate, COUNT(*) as Count FROM Employees GROUP BY JobRole, HireDate ORDER BY HireDate DESC;", + "sql_explanation": "The SQL query calculates the count of employees for each combination of job role and hire date and groups them by job role and hire date. The results are ordered by hire date in descending order." +}, { + "id": "3337", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the number of employees in the \"IT\" and \"HR\" departments in the \"hr\" schema", + "sql_context": "CREATE TABLE hr.employees (id INT, name VARCHAR(50), department VARCHAR(50)); INSERT INTO hr.employees (id, name, department) VALUES (1, \u0027John Doe\u0027, \u0027HR\u0027); INSERT INTO hr.employees (id, name, department) VALUES (2, \u0027Jane Smith\u0027, \u0027IT\u0027); INSERT INTO hr.employees (id, name, department) VALUES (3, \u0027Bob Brown\u0027, \u0027IT\u0027); INSERT INTO hr.employees (id, name, department) VALUES (4, \u0027Alice Johnson\u0027, \u0027HR\u0027); INSERT INTO hr.employees (id, name, department) VALUES (5, \u0027Maria Garcia\u0027, \u0027HR\u0027);", + "sql": "SELECT department, COUNT(*) FROM hr.employees WHERE department IN (\u0027IT\u0027, \u0027HR\u0027) GROUP BY department;", + "sql_explanation": "This query groups the \"employees\" table in the \"hr\" schema by the \"department\" column and counts the number of records for each group where the department is either \u0027IT\u0027 or \u0027HR\u0027." +}, { + "id": "3811", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum salary for each position in the HR department?", + "sql_context": "CREATE TABLE hr_positions (id INT, position VARCHAR(50), department VARCHAR(50), salary FLOAT); INSERT INTO hr_positions (id, position, department, salary) VALUES (1, \u0027HR Manager\u0027, \u0027HR\u0027, 95000.0), (2, \u0027HR Specialist\u0027, \u0027HR\u0027, 75000.0), (3, \u0027HR Coordinator\u0027, \u0027HR\u0027, 65000.0);", + "sql": "SELECT position, MIN(salary) FROM hr_positions WHERE department \u003d \u0027HR\u0027 GROUP BY position;", + "sql_explanation": "The SQL query finds the minimum salary for each position in the HR department by using the MIN function on the salary column, filtering the records with a WHERE clause for the HR department, and grouping the results by position." +}, { + "id": "3959", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the age and nationality distribution of employees?", + "sql_context": "CREATE TABLE EmployeeDemographics (EmployeeID INT, Age INT, Nationality VARCHAR(50)); INSERT INTO EmployeeDemographics (EmployeeID, Age, Nationality) VALUES (1, 30, \u0027American\u0027), (2, 40, \u0027Canadian\u0027), (3, 35, \u0027British\u0027), (4, 28, \u0027Mexican\u0027), (5, 45, \u0027German\u0027), (6, 32, \u0027Indian\u0027);", + "sql": "SELECT Nationality, Age, COUNT(*) FROM EmployeeDemographics GROUP BY Nationality, Age;", + "sql_explanation": "Retrieve the age and nationality distribution of employees by grouping by nationality and age." +}, { + "id": "4156", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of employees in each location, ordered by the number of employees.", + "sql_context": "CREATE TABLE Employees (EmployeeID INT, Location VARCHAR(50)); INSERT INTO Employees (EmployeeID, Location) VALUES (1, \u0027NYC\u0027), (2, \u0027LA\u0027), (3, \u0027NYC\u0027), (4, \u0027NYC\u0027), (5, \u0027LA\u0027);", + "sql": "SELECT Location, COUNT(*) FROM Employees GROUP BY Location ORDER BY COUNT(*) DESC;", + "sql_explanation": "1. Group rows by the \u0027Location\u0027 column. 2. Count the number of rows in each group. 3. Order the resulting rows by the count in descending order." +}, { + "id": "4173", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the earliest hire date for employees, by country, and display the results in a table", + "sql_context": "CREATE TABLE Employees (EmployeeID INT, FirstName VARCHAR(50), LastName VARCHAR(50), Country VARCHAR(50), HireDate DATE); INSERT INTO Employees (EmployeeID, FirstName, LastName, Country, HireDate) VALUES (1, \u0027John\u0027, \u0027Doe\u0027, \u0027USA\u0027, \u00272021-01-01\u0027); INSERT INTO Employees (EmployeeID, FirstName, LastName, Country, HireDate) VALUES (2, \u0027Jane\u0027, \u0027Doe\u0027, \u0027Canada\u0027, \u00272021-02-01\u0027);", + "sql": "SELECT Country, MIN(HireDate) as EarliestHireDate FROM Employees GROUP BY Country;", + "sql_explanation": "This query groups employees by their country and calculates the minimum hire date for each group." +}, { + "id": "4628", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of employees by department?", + "sql_context": "CREATE TABLE Employees (id INT, name VARCHAR(50), department VARCHAR(50)); INSERT INTO Employees (id, name, department) VALUES (1, \u0027John Doe\u0027, \u0027HR\u0027); INSERT INTO Employees (id, name, department) VALUES (2, \u0027Jane Smith\u0027, \u0027IT\u0027); INSERT INTO Employees (id, name, department) VALUES (3, \u0027Alice Johnson\u0027, \u0027Finance\u0027);", + "sql": "SELECT department, COUNT(*) AS total FROM Employees GROUP BY department;", + "sql_explanation": "This query groups the records by department, and counts the number of records for each department to get the distribution of employees by department." +}, { + "id": "5191", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average age of employees in each department.", + "sql_context": "CREATE TABLE Employees (EmployeeID INT, Department VARCHAR(20), Age INT); INSERT INTO Employees (EmployeeID, Department, Age) VALUES (1, \u0027IT\u0027, 30), (2, \u0027IT\u0027, 35), (3, \u0027HR\u0027, 40), (4, \u0027Sales\u0027, 45), (5, \u0027Sales\u0027, 50);", + "sql": "SELECT Department, AVG(Age) FROM Employees GROUP BY Department;", + "sql_explanation": "The SQL query calculates the average age of employees in each department by using the AVG() function and a GROUP BY clause. This groups the data by department and calculates the average age for each group." +}, { + "id": "117", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of employees by gender and age group in the Mining department?", + "sql_context": "CREATE TABLE Employees(id INT, name VARCHAR(50), department VARCHAR(50), position VARCHAR(50), salary FLOAT, full_time BOOLEAN, gender VARCHAR(50), start_date DATE, age INT);", + "sql": "SELECT gender, CASE WHEN age \u003c 25 THEN \u0027Under 25\u0027 WHEN age BETWEEN 25 AND 34 THEN \u002725-34\u0027 WHEN age BETWEEN 35 AND 44 THEN \u002735-44\u0027 WHEN age BETWEEN 45 AND 54 THEN \u002745-54\u0027 ELSE \u002755 and older\u0027 END AS Age_Group, COUNT(*) AS Count FROM Employees WHERE department \u003d \u0027Mining\u0027 GROUP BY gender, Age_Group;", + "sql_explanation": "The query calculates the number of employees by gender and age group in the Mining department by grouping employees based on the gender and age columns. The CASE statement is used to create age groups." +}, { + "id": "163", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total production of gold and silver by each mine in Q3 2021?", + "sql_context": "CREATE TABLE mine (id INT, name TEXT, location TEXT, type TEXT, production FLOAT); INSERT INTO mine (id, name, location, type, production) VALUES (1, \u0027ABC Mine\u0027, \u0027Colorado, USA\u0027, \u0027Gold\u0027, 12000.0), (2, \u0027DEF Mine\u0027, \u0027Wyoming, USA\u0027, \u0027Silver\u0027, 15000.0);", + "sql": "SELECT name, SUM(CASE WHEN type \u003d \u0027Gold\u0027 THEN production ELSE 0 END) as total_gold_production, SUM(CASE WHEN type \u003d \u0027Silver\u0027 THEN production ELSE 0 END) as total_silver_production FROM mine WHERE production_date BETWEEN \u00272021-07-01\u0027 AND \u00272021-09-30\u0027 GROUP BY name, type, location;", + "sql_explanation": "Calculate the total production of gold and silver by each mine in Q3 2021 and group them by mine name, type, and location." +}, { + "id": "430", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of employees from underrepresented racial and ethnic groups in each mining company?", + "sql_context": "CREATE TABLE company_demographics (company_id INT, company_name TEXT, racial_ethnic_group TEXT, num_employees INT);", + "sql": "SELECT company_name, racial_ethnic_group, SUM(num_employees) AS total_employees FROM company_demographics WHERE racial_ethnic_group IN (\u0027Native American\u0027, \u0027Black or African American\u0027, \u0027Hispanic or Latino\u0027) GROUP BY company_name;", + "sql_explanation": "This query filters the company_demographics table based on specific racial and ethnic groups and calculates the sum of num_employees for each mining company." +}, { + "id": "674", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total CO2 emission per month for each mine?", + "sql_context": "CREATE TABLE mine (mine_id INT, mine_name TEXT, location TEXT, daily_co2_emission INT); INSERT INTO mine VALUES (1, \u0027ABC Mine\u0027, \u0027Wyoming, USA\u0027, 500), (2, \u0027DEF Mine\u0027, \u0027West Virginia, USA\u0027, 700), (3, \u0027GHI Mine\u0027, \u0027Kentucky, USA\u0027, 400), (4, \u0027JKL Mine\u0027, \u0027Colorado, USA\u0027, 600);", + "sql": "SELECT mine_name, DATE_TRUNC(\u0027month\u0027, CURRENT_DATE - (daily_co2_emission * 7)) as month, SUM(daily_co2_emission) as total_co2_emission FROM mine GROUP BY mine_name, month ORDER BY total_co2_emission DESC;", + "sql_explanation": "The SQL query calculates the total CO2 emission per month for each mine by using the DATE_TRUNC function to get the first day of the month and subtracting the daily_co2_emission multiplied by 7 (for a week) from the current date. It then groups the results by mine_name and month, and calculates the sum of daily_co2_emission for each group. Finally, the query orders the results in descending order based on the total_co2_emission." +}, { + "id": "796", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many resources were depleted in each mining operation in the last quarter?", + "sql_context": "CREATE TABLE mining_operations (operation_id INT, operation_name VARCHAR(50), resource_type VARCHAR(50), depletion_date DATE, quantity INT); INSERT INTO mining_operations (operation_id, operation_name, resource_type, depletion_date, quantity) VALUES (1, \u0027Operation A\u0027, \u0027Coal\u0027, \u00272022-01-01\u0027, 100), (2, \u0027Operation B\u0027, \u0027Iron\u0027, \u00272022-02-15\u0027, 200), (3, \u0027Operation C\u0027, \u0027Gold\u0027, \u00272022-03-30\u0027, 150);", + "sql": "SELECT operation_name, resource_type, SUM(quantity) AS total_depleted FROM mining_operations WHERE depletion_date \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 3 MONTH) GROUP BY operation_name, resource_type;", + "sql_explanation": "The SQL query calculates the total quantity of resources depleted in each mining operation in the last quarter by filtering the \u0027depletion_date\u0027 column in the \u0027mining_operations\u0027 table using the DATE_SUB function with an interval of 3 months and then grouping the results by \u0027operation_name\u0027 and \u0027resource_type\u0027 while using the SUM function to calculate the total quantity of each resource type depleted in each operation." +}, { + "id": "995", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the monthly trend of carbon emissions from 2019 to 2021?", + "sql_context": "CREATE TABLE emission (id INT, mine_id INT, date DATE, carbon_emission FLOAT); INSERT INTO emission (id, mine_id, date, carbon_emission) VALUES (1, 1, \u00272019-01-01\u0027, 1200.0), (2, 1, \u00272019-02-01\u0027, 1500.0);", + "sql": "SELECT DATE_TRUNC(\u0027month\u0027, date) as month, AVG(carbon_emission) as avg_monthly_emission FROM emission WHERE date BETWEEN \u00272019-01-01\u0027 AND \u00272021-12-31\u0027 GROUP BY month ORDER BY month;", + "sql_explanation": "Calculate the monthly trend of carbon emissions between 2019 and 2021 and order them by month." +}, { + "id": "1015", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average amount of resources extracted per day in Canada and Australia?", + "sql_context": "CREATE TABLE ResourceExtraction(id INT, country VARCHAR(50), extraction_date DATE, amount INT);", + "sql": "SELECT country, AVG(amount) AS Avg_Amount FROM ResourceExtraction WHERE country IN (\u0027Canada\u0027, \u0027Australia\u0027) AND extraction_date \u003e\u003d DATEADD(day, -365, CURRENT_DATE) GROUP BY country;", + "sql_explanation": "The query calculates the average amount of resources extracted per day in Canada and Australia by filtering resources based on the country and extraction date columns. The AVG function is used to get the average amount for each country, and the results are grouped based on the country column." +}, { + "id": "1055", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display total resources extracted and the respective extraction dates for \u0027gold\u0027 and \u0027silver\u0027 resources from the \u0027resource_extraction\u0027 table.", + "sql_context": "CREATE TABLE resource_extraction (resource_type VARCHAR(50), extraction_date DATE, quantity INT); INSERT INTO resource_extraction (resource_type, extraction_date, quantity) VALUES (\u0027gold\u0027, \u00272022-01-01\u0027, 100), (\u0027silver\u0027, \u00272022-01-05\u0027, 200);", + "sql": "SELECT resource_type, SUM(quantity) as total_quantity, extraction_date FROM resource_extraction WHERE resource_type IN (\u0027gold\u0027, \u0027silver\u0027) GROUP BY resource_type, extraction_date;", + "sql_explanation": "The SQL query filters the \u0027resource_extraction\u0027 table to only include rows where the \u0027resource_type\u0027 is either \u0027gold\u0027 or \u0027silver\u0027. Then, it groups the filtered data by \u0027resource_type\u0027 and \u0027extraction_date\u0027 and calculates the total quantity for each group." +}, { + "id": "1191", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of accidents in mining operations in India and Argentina, and the total number of employees in those operations?", + "sql_context": "CREATE TABLE mining_operations (id INT, country VARCHAR(20), operation_name VARCHAR(30), accidents INT, total_employees INT); INSERT INTO mining_operations (id, country, operation_name, accidents, total_employees) VALUES (1, \u0027India\u0027, \u0027Operation P\u0027, 5, 150); INSERT INTO mining_operations (id, country, operation_name, accidents, total_employees) VALUES (2, \u0027India\u0027, \u0027Operation Q\u0027, 3, 200); INSERT INTO mining_operations (id, country, operation_name, accidents, total_employees) VALUES (3, \u0027Argentina\u0027, \u0027Operation R\u0027, 4, 120);", + "sql": "SELECT country, SUM(accidents) AS total_accidents, SUM(total_employees) AS total_employees FROM mining_operations WHERE country IN (\u0027India\u0027, \u0027Argentina\u0027) GROUP BY country;", + "sql_explanation": "This query filters mining operations based on countries India and Argentina, calculates the total number of accidents and total number of employees in those operations, and groups the results by country." +}, { + "id": "1507", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of female workers in the mining industry per state?", + "sql_context": "CREATE TABLE gender_distribution (site VARCHAR(20), state VARCHAR(20), gender VARCHAR(10), num_employees INT); INSERT INTO gender_distribution (site, state, gender, num_employees) VALUES (\u0027SiteA\u0027, \u0027QLD\u0027, \u0027Male\u0027, 30), (\u0027SiteA\u0027, \u0027QLD\u0027, \u0027Female\u0027, 20), (\u0027SiteB\u0027, \u0027NSW\u0027, \u0027Male\u0027, 40), (\u0027SiteB\u0027, \u0027NSW\u0027, \u0027Female\u0027, 20);", + "sql": "SELECT state, (SUM(CASE WHEN gender \u003d \u0027Female\u0027 THEN num_employees ELSE 0 END)/SUM(num_employees))*100 AS pct_female FROM gender_distribution GROUP BY state;", + "sql_explanation": "Calculate the percentage of female workers in the mining industry per state by grouping the gender_distribution table by state, summing the num_employees column for each gender, and then dividing the sum of female num_employees by the total sum of num_employees for each state, multiplying by 100 to obtain a percentage." +}, { + "id": "1697", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum and minimum environmental impact score of mining operations in each country?", + "sql_context": "CREATE TABLE mining_operations (id INT, location VARCHAR(255), environmental_impact_score INT); INSERT INTO mining_operations (id, location, environmental_impact_score) VALUES (1, \u0027Canada\u0027, 85), (2, \u0027Canada\u0027, 60), (3, \u0027USA\u0027, 70), (4, \u0027USA\u0027, 90), (5, \u0027Mexico\u0027, 88), (6, \u0027Mexico\u0027, 55), (7, \u0027Australia\u0027, 60), (8, \u0027Australia\u0027, 75);", + "sql": "SELECT location, MAX(environmental_impact_score) AS max_score, MIN(environmental_impact_score) AS min_score FROM mining_operations GROUP BY location;", + "sql_explanation": "The SQL query calculates the maximum and minimum environmental impact score of mining operations in each country. It does so by grouping the mining_operations table by the location column and then calculating the maximum and minimum of the environmental_impact_score column." +}, { + "id": "1715", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average coal production per machine in the Illinois district for the year 2020?", + "sql_context": "CREATE TABLE district (district_id INT, district_name VARCHAR(20)); INSERT INTO district (district_id, district_name) VALUES (1, \u0027Illinois\u0027); CREATE TABLE production (machine_id INT, district_id INT, production_quantity INT, production_date DATE); INSERT INTO production (machine_id, district_id, production_quantity, production_date) VALUES (1, 1, 500, \u00272020-01-01\u0027), (2, 1, 600, \u00272020-01-02\u0027);", + "sql": "SELECT AVG(production_quantity) FROM production WHERE district_id \u003d 1 AND production_date BETWEEN \u00272020-01-01\u0027 AND \u00272020-12-31\u0027 GROUP BY machine_id;", + "sql_explanation": "The SQL query calculates the average coal production per machine in the Illinois district for the year 2020 by grouping production_quantity by machine_id and filtering district_id and production_date within the given range." +}, { + "id": "1784", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum carbon emission per mining operation in Australia for the year 2019?", + "sql_context": "CREATE TABLE mining_emissions (id INT, mine_name TEXT, location TEXT, year INT, carbon_emission FLOAT); INSERT INTO mining_emissions (id, mine_name, location, year, carbon_emission) VALUES (1, \u0027Iron Mine\u0027, \u0027Australia\u0027, 2019, 20000), (2, \u0027Coal Mine\u0027, \u0027Australia\u0027, 2019, 30000), (3, \u0027Gold Mine\u0027, \u0027Australia\u0027, 2019, 25000);", + "sql": "SELECT location, MAX(carbon_emission) as max_carbon_emission FROM mining_emissions WHERE location \u003d \u0027Australia\u0027 AND year \u003d 2019 GROUP BY location;", + "sql_explanation": "This SQL query calculates the maximum carbon emission per mining operation in Australia for the year 2019. It uses the \u0027mining_emissions\u0027 table with the provided context. The query groups the data by location and calculates the maximum carbon_emission for each group." +}, { + "id": "2202", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average amount of experience for employees from underrepresented communities in the \u0027mining_operations\u0027 table, grouped by their job titles?", + "sql_context": "CREATE TABLE mining_operations (id INT, name VARCHAR(50), job_title VARCHAR(50), department VARCHAR(50), community VARCHAR(50), experience INT); INSERT INTO mining_operations (id, name, job_title, department, community, experience) VALUES (1, \u0027John Doe\u0027, \u0027Mining Engineer\u0027, \u0027Operations\u0027, \u0027Underrepresented\u0027, 7); INSERT INTO mining_operations (id, name, job_title, department, community, experience) VALUES (2, \u0027Jane Smith\u0027, \u0027Geologist\u0027, \u0027Exploration\u0027, \u0027Underrepresented\u0027, 5); INSERT INTO mining_operations (id, name, job_title, department, community, experience) VALUES (3, \u0027Maria Garcia\u0027, \u0027Mining Engineer\u0027, \u0027Operations\u0027, \u0027Not Underrepresented\u0027, 8);", + "sql": "SELECT job_title, AVG(experience) as avg_experience FROM mining_operations WHERE community \u003d \u0027Underrepresented\u0027 GROUP BY job_title;", + "sql_explanation": "This SQL query calculates the average amount of experience for employees from underrepresented communities in the \u0027mining_operations\u0027 table, grouped by their job titles. It uses the WHERE clause to filter the results to only employees from underrepresented communities, and the GROUP BY clause to group the results by job_title and the AVG function to calculate the average value of the \u0027experience\u0027 column for each job title." +}, { + "id": "2494", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many employees in the workforce identify as male, by department?", + "sql_context": "CREATE TABLE employee_data (employee_id INT, first_name VARCHAR(50), last_name VARCHAR(50), department_name VARCHAR(50), gender VARCHAR(10)); INSERT INTO employee_data (employee_id, first_name, last_name, department_name, gender) VALUES (1, \u0027Jane\u0027, \u0027Smith\u0027, \u0027Engineering\u0027, \u0027Female\u0027), (2, \u0027John\u0027, \u0027Doe\u0027, \u0027Engineering\u0027, \u0027Male\u0027), (3, \u0027Jessica\u0027, \u0027Johnson\u0027, \u0027Human Resources\u0027, \u0027Female\u0027), (4, \u0027Michael\u0027, \u0027Brown\u0027, \u0027Human Resources\u0027, \u0027Male\u0027), (5, \u0027David\u0027, \u0027Williams\u0027, \u0027Operations\u0027, \u0027Male\u0027), (6, \u0027Sarah\u0027, \u0027Jones\u0027, \u0027Operations\u0027, \u0027Female\u0027);", + "sql": "SELECT department_name, COUNT(*) as male_employee_count FROM employee_data WHERE gender \u003d \u0027Male\u0027 GROUP BY department_name;", + "sql_explanation": "This SQL query calculates the number of employees in the workforce who identify as male, by department. It uses the COUNT() aggregate function to count the number of employees per department where the gender is \u0027Male\u0027 and groups the rows based on department_name." +}, { + "id": "2614", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 3 mines with the highest CO2 emissions in the \u0027environmental_impact\u0027 table.", + "sql_context": "CREATE TABLE environmental_impact (mine_id INT, year INT, co2_emissions INT, methane_emissions INT, waste_generation INT); INSERT INTO environmental_impact (mine_id, year, co2_emissions, methane_emissions, waste_generation) VALUES (1, 2020, 5000, 2000, 15000); INSERT INTO environmental_impact (mine_id, year, co2_emissions, methane_emissions, waste_generation) VALUES (2, 2020, 6000, 2500, 18000); INSERT INTO environmental_impact (mine_id, year, co2_emissions, methane_emissions, waste_generation) VALUES (3, 2020, 7000, 3000, 20000);", + "sql": "SELECT mine_id, SUM(co2_emissions) FROM environmental_impact GROUP BY mine_id ORDER BY SUM(co2_emissions) DESC LIMIT 3;", + "sql_explanation": "The SQL query finds the top 3 mines with the highest CO2 emissions by using the SUM function on the \u0027co2_emissions\u0027 column, grouping it by \u0027mine_id\u0027, and ordering the results in descending order. The LIMIT clause restricts the output to the top 3 records." +}, { + "id": "2961", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of employees hired each month in the \u0027mining_operations\u0027 table?", + "sql_context": "CREATE TABLE mining_operations (id INT, name VARCHAR(50), num_employees INT, hire_date DATE); INSERT INTO mining_operations (id, name, num_employees, hire_date) VALUES (1, \u0027Gold Rush Inc.\u0027, 100, \u00272022-01-01\u0027), (2, \u0027Gold Rush Inc.\u0027, 101, \u00272022-02-01\u0027), (3, \u0027Gold Rush Inc.\u0027, 102, \u00272022-03-01\u0027), (4, \u0027Silver Strike Ltd.\u0027, 75, \u00272022-01-01\u0027), (5, \u0027Silver Strike Ltd.\u0027, 76, \u00272022-02-01\u0027), (6, \u0027Silver Strike Ltd.\u0027, 77, \u00272022-03-01\u0027);", + "sql": "SELECT EXTRACT(MONTH FROM hire_date), COUNT(*) FROM mining_operations GROUP BY EXTRACT(MONTH FROM hire_date);", + "sql_explanation": "This query counts the number of employees hired each month in the \u0027mining_operations\u0027 table by using the COUNT() function and the EXTRACT() function to extract the month from the hire_date column." +}, { + "id": "3012", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many work permits were issued by year and region in the \u0027mining_operations\u0027 database?", + "sql_context": "CREATE TABLE work_permits (permit_id INT PRIMARY KEY, issue_year INT, region VARCHAR(30), permit_count INT); INSERT INTO work_permits (permit_id, issue_year, region, permit_count) VALUES (1, 2020, \u0027North\u0027, 100), (2, 2020, \u0027South\u0027, 150), (3, 2021, \u0027North\u0027, 120), (4, 2021, \u0027South\u0027, 180);", + "sql": "SELECT issue_year, region, SUM(permit_count) as total_permits FROM work_permits GROUP BY issue_year, region;", + "sql_explanation": "This query performs a group by operation on the \u0027issue_year\u0027 and \u0027region\u0027 columns and calculates the total number of work permits issued for each unique combination." +}, { + "id": "3102", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total zinc production for each machine type?", + "sql_context": "CREATE TABLE zinc_production (id INT, machine_type VARCHAR(20), zinc_production FLOAT); INSERT INTO zinc_production (id, machine_type, zinc_production) VALUES (5, \u0027TypeD\u0027, 1100.0), (6, \u0027TypeA\u0027, 1400.0), (7, \u0027TypeB\u0027, 1600.0), (8, \u0027TypeD\u0027, 1700.5);", + "sql": "SELECT machine_type, SUM(zinc_production) as total_production FROM zinc_production GROUP BY machine_type;", + "sql_explanation": "This query calculates the total zinc production for each machine type by using the SUM function on the zinc_production column, grouped by machine_type." +}, { + "id": "3240", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average carbon emissions score for each mining site, grouped by country?", + "sql_context": "CREATE TABLE mining_sites (id INT, name VARCHAR(255), country VARCHAR(255), carbon_emissions_score INT); INSERT INTO mining_sites (id, name, country, carbon_emissions_score) VALUES (1, \u0027Site A\u0027, \u0027Canada\u0027, 75), (2, \u0027Site B\u0027, \u0027Mexico\u0027, 85), (3, \u0027Site C\u0027, \u0027Brazil\u0027, 95);", + "sql": "SELECT country, AVG(carbon_emissions_score) AS avg_emissions_score FROM mining_sites GROUP BY country;", + "sql_explanation": "This query performs a grouped aggregation on the mining_sites table, grouping the results by the country column and calculating the average carbon_emissions_score for each group." +}, { + "id": "3467", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average gold production per employee for each state?", + "sql_context": "CREATE TABLE gold_staff (id INT, state VARCHAR(20), employees INT, gold_production FLOAT); INSERT INTO gold_staff (id, state, employees, gold_production) VALUES (6, \u0027California\u0027, 60, 12500.5), (7, \u0027Nevada\u0027, 70, 16000.3), (8, \u0027California\u0027, 80, 19000.3), (9, \u0027Nevada\u0027, 90, 22000.0);", + "sql": "SELECT state, AVG(gold_production/employees) as avg_productivity FROM gold_staff GROUP BY state;", + "sql_explanation": "This query calculates the average gold production per employee for each state by using the AVG function on the gold_production to employees ratio, grouped by state." +}, { + "id": "3572", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of employees of different genders and ethnicities in the entire company.", + "sql_context": "CREATE TABLE employee (id INT, name VARCHAR(255), gender VARCHAR(50), ethnicity VARCHAR(50), department_id INT);", + "sql": "SELECT gender, ethnicity, COUNT(*) AS employee_count FROM employee GROUP BY gender, ethnicity;", + "sql_explanation": "This query creates a table for employees and then groups them by gender and ethnicity, calculating the number of employees in each group. The final result is a list of the number of employees of different genders and ethnicities in the entire company." +}, { + "id": "3650", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of resources extracted by each mining method in the Appalachians?", + "sql_context": "CREATE TABLE resources(id INT, method VARCHAR, quantity FLOAT, location VARCHAR); INSERT INTO resources(id, method, quantity, location) VALUES (1, \u0027Open Pit\u0027, 1234.56, \u0027Rocky Mountains\u0027), (2, \u0027Underground\u0027, 789.10, \u0027Appalachians\u0027), (3, \u0027In-Situ\u0027, 456.78, \u0027Rocky Mountains\u0027), (4, \u0027Open Pit\u0027, 987.65, \u0027Appalachians\u0027);", + "sql": "SELECT method, SUM(quantity) FROM resources WHERE location \u003d \u0027Appalachians\u0027 GROUP BY method;", + "sql_explanation": "This SQL query calculates the total quantity of resources extracted by each mining method in the Appalachians by using the SUM function to add up the quantity values for rows with a location of \u0027Appalachians\u0027. It then groups the results by the method to ensure that the query returns the total quantity of resources extracted by each mining method in the Appalachians separately." +}, { + "id": "3851", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total water consumption per mining site, grouped by mine ownership in the \u0027mining_sites\u0027 table?", + "sql_context": "CREATE TABLE mining_sites (site_id INT, site_ownership VARCHAR(50), year INT, water_consumption INT); INSERT INTO mining_sites (site_id, site_ownership, year, water_consumption) VALUES (1, \u0027Company C\u0027, 2020, 12000); INSERT INTO mining_sites (site_id, site_ownership, year, water_consumption) VALUES (2, \u0027Company D\u0027, 2020, 15000); INSERT INTO mining_sites (site_id, site_ownership, year, water_consumption) VALUES (3, \u0027Company C\u0027, 2020, 18000);", + "sql": "SELECT site_ownership, SUM(water_consumption) FROM mining_sites GROUP BY site_ownership;", + "sql_explanation": "The SQL query calculates the total water consumption grouped by mine ownership by using the SUM function on the \u0027water_consumption\u0027 column in the \u0027mining_sites\u0027 table and grouping it by \u0027site_ownership\u0027." +}, { + "id": "4677", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many employees are there in each department in the \u0027workforce_diversity\u0027 table?", + "sql_context": "CREATE TABLE workforce_diversity (employee_id INT, name VARCHAR(50), department VARCHAR(50), gender VARCHAR(10), age INT); INSERT INTO workforce_diversity (employee_id, name, department, gender, age) VALUES (1, \u0027John Doe\u0027, \u0027Engineering\u0027, \u0027Male\u0027, 35); INSERT INTO workforce_diversity (employee_id, name, department, gender, age) VALUES (2, \u0027Jane Smith\u0027, \u0027Operations\u0027, \u0027Female\u0027, 28);", + "sql": "SELECT department, COUNT(*) FROM workforce_diversity GROUP BY department;", + "sql_explanation": "This SQL query calculates the number of employees in each department by using the COUNT function along with the GROUP BY clause on the \u0027department\u0027 column in the \u0027workforce_diversity\u0027 table." +}, { + "id": "793", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which organizations have the highest R\u0026D expenditure in Asia in H2 2022?", + "sql_context": "CREATE TABLE rd_expenditure (expenditure_id INT, organization_id INT, region VARCHAR(255), quarter INT, year INT, amount DECIMAL(10, 2));", + "sql": "SELECT organization_id, SUM(amount) as total_expenditure FROM rd_expenditure WHERE region \u003d \u0027Asia\u0027 AND quarter IN (3, 4) AND year \u003d 2022 GROUP BY organization_id ORDER BY total_expenditure DESC;", + "sql_explanation": "This query filters R\u0026D expenditure in Asia and H2 2022, groups by organization_id, calculates the total R\u0026D expenditure for each organization, and orders the results in descending order to show organizations with the highest expenditure first." +}, { + "id": "852", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 2 sales representatives by total sales for \u0027DrugT\u0027 in the North America region in Q1 2021?", + "sql_context": "CREATE TABLE sales_data_2 (rep_name TEXT, drug_name TEXT, region TEXT, quarter INT, total_sales FLOAT); INSERT INTO sales_data_2 (rep_name, drug_name, region, quarter, total_sales) VALUES (\u0027RepE\u0027, \u0027DrugT\u0027, \u0027North America\u0027, 1, 600000), (\u0027RepF\u0027, \u0027DrugT\u0027, \u0027North America\u0027, 1, 700000), (\u0027RepG\u0027, \u0027DrugT\u0027, \u0027North America\u0027, 1, 550000), (\u0027RepH\u0027, \u0027DrugT\u0027, \u0027North America\u0027, 1, 450000);", + "sql": "SELECT rep_name, SUM(total_sales) AS total_sales FROM sales_data_2 WHERE drug_name \u003d \u0027DrugT\u0027 AND region \u003d \u0027North America\u0027 AND quarter \u003d 1 GROUP BY rep_name ORDER BY total_sales DESC LIMIT 2;", + "sql_explanation": "This query groups the sales representatives by drug name, region, and quarter, and calculates the total sales for each sales representative for \u0027DrugT\u0027 in the North America region in Q1 2021. It then orders the results by total sales in descending order and limits the results to the top 2 sales representatives with the highest total sales." +}, { + "id": "1460", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 3 countries with the highest clinical trial completion rates in 2020?", + "sql_context": "CREATE TABLE clinical_trials (id INT, country VARCHAR(50), year INT, completion_rate DECIMAL(5,2)); INSERT INTO clinical_trials (id, country, year, completion_rate) VALUES (1, \u0027USA\u0027, 2020, 0.85), (2, \u0027Germany\u0027, 2020, 0.82), (3, \u0027Canada\u0027, 2020, 0.78), (4, \u0027USA\u0027, 2020, 0.90), (5, \u0027Germany\u0027, 2020, 0.87), (6, \u0027Canada\u0027, 2020, 0.73);", + "sql": "SELECT country, MAX(completion_rate) as max_completion_rate FROM clinical_trials WHERE year \u003d 2020 GROUP BY country ORDER BY max_completion_rate DESC LIMIT 3;", + "sql_explanation": "The SQL query groups the clinical trials by country and finds the maximum completion rate for each country in 2020. It then orders the results in descending order based on the maximum completion rate and limits the output to the top 3 countries." +}, { + "id": "1647", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of drugs approved in the \u0027Asia-Pacific\u0027 region for each year?", + "sql_context": "CREATE TABLE drug_approval (drug_name VARCHAR(255), approval_region VARCHAR(255), approval_date DATE); INSERT INTO drug_approval (drug_name, approval_region, approval_date) VALUES (\u0027DrugX\u0027, \u0027Asia-Pacific\u0027, \u00272017-01-01\u0027);", + "sql": "SELECT approval_region, YEAR(approval_date) AS year, COUNT(*) FROM drug_approval WHERE approval_region \u003d \u0027Asia-Pacific\u0027 GROUP BY approval_region, year;", + "sql_explanation": "This SQL query retrieves the number of drugs approved in the \u0027Asia-Pacific\u0027 region for each year from the drug_approval table by grouping the rows by approval_region and approval_date, and then counting the rows for each group." +}, { + "id": "1777", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total market share for each drug, ranked by the highest market share first, for the year 2020?", + "sql_context": "CREATE TABLE market_share (market_share_id INT, drug_name VARCHAR(255), year INT, market_share DECIMAL(10,2)); INSERT INTO market_share (market_share_id, drug_name, year, market_share) VALUES (1, \u0027DrugA\u0027, 2020, 0.35), (2, \u0027DrugB\u0027, 2020, 0.28), (3, \u0027DrugC\u0027, 2020, 0.30), (4, \u0027DrugA\u0027, 2020, 0.33), (5, \u0027DrugB\u0027, 2020, 0.31), (6, \u0027DrugC\u0027, 2020, 0.29);", + "sql": "SELECT drug_name, SUM(market_share) as total_market_share FROM market_share WHERE year \u003d 2020 GROUP BY drug_name ORDER BY total_market_share DESC;", + "sql_explanation": "This query calculates the total market share for each drug in the year 2020. It groups the market share data by drug name and sums up the market share for each category. Finally, it orders the results by the total market share in descending order." +}, { + "id": "1881", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total R\u0026D expenditure for each organization in H1 2022?", + "sql_context": "CREATE TABLE rd_expenditure (expenditure_id INT, organization_id INT, quarter INT, year INT, amount DECIMAL(10, 2));", + "sql": "SELECT organization_id, SUM(amount) as total_expenditure FROM rd_expenditure WHERE quarter IN (1, 2) AND year \u003d 2022 GROUP BY organization_id;", + "sql_explanation": "This query filters R\u0026D expenditure in H1 2022, groups by organization_id, and calculates the total R\u0026D expenditure for each organization." +}, { + "id": "2950", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the average sale price of a specific drug, \u0027DrugZ\u0027, in different regions in the year 2020?", + "sql_context": "CREATE TABLE sale_price (drug_name VARCHAR(50), sale_year INT, region VARCHAR(50), price FLOAT); INSERT INTO sale_price (drug_name, sale_year, region, price) VALUES (\u0027DrugX\u0027, 2020, \u0027North\u0027, 100), (\u0027DrugX\u0027, 2020, \u0027South\u0027, 120), (\u0027DrugX\u0027, 2020, \u0027East\u0027, 140), (\u0027DrugZ\u0027, 2020, \u0027North\u0027, 150), (\u0027DrugZ\u0027, 2020, \u0027South\u0027, 170), (\u0027DrugZ\u0027, 2020, \u0027East\u0027, 190);", + "sql": "SELECT AVG(price) as avg_price FROM sale_price WHERE drug_name \u003d \u0027DrugZ\u0027 AND sale_year \u003d 2020 GROUP BY region;", + "sql_explanation": "The SQL query calculates the average sale price for \u0027DrugZ\u0027 in 2020 for each region by averaging the price and grouping by the region." +}, { + "id": "3303", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total revenue for each drug approved in H1 2020?", + "sql_context": "CREATE TABLE drug_approval (drug_name TEXT, half INT, year INT, revenue FLOAT); INSERT INTO drug_approval (drug_name, half, year, revenue) VALUES (\u0027DrugE\u0027, 1, 2020, 2000000.0), (\u0027DrugF\u0027, 1, 2020, 2500000.0);", + "sql": "SELECT drug_name, SUM(revenue) FROM drug_approval WHERE half \u003d 1 AND year \u003d 2020 GROUP BY drug_name;", + "sql_explanation": "The SQL query finds the total revenue for each drug approved in H1 2020 by selecting drug_name and summing the revenue from the drug_approval table where half is 1 and year is 2020, grouped by drug_name." +}, { + "id": "926", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the states with the lowest wastewater treatment plant construction rates between 2005 and 2010, including only states with at least 3 plants constructed.", + "sql_context": "CREATE TABLE wastewater_plants(state VARCHAR(20), year INT, num_plants INT); INSERT INTO wastewater_plants VALUES (\u0027California\u0027, 2005, 5), (\u0027California\u0027, 2006, 6), (\u0027California\u0027, 2007, 7), (\u0027New York\u0027, 2005, 3), (\u0027New York\u0027, 2006, 4), (\u0027New York\u0027, 2007, 5), (\u0027Florida\u0027, 2005, 2), (\u0027Florida\u0027, 2006, 3), (\u0027Florida\u0027, 2007, 4);", + "sql": "SELECT state, AVG(num_plants) AS avg_construction_rate FROM wastewater_plants WHERE year BETWEEN 2005 AND 2007 AND num_plants \u003e\u003d 3 GROUP BY state ORDER BY avg_construction_rate LIMIT 2;", + "sql_explanation": "The SQL query identifies the states with the lowest wastewater treatment plant construction rates between 2005 and 2007, including only states with at least 3 plants constructed. It calculates the average construction rate per year for each state, groups the data by state, and filters the result for the specified years and condition. Then, it orders the result in ascending order and returns the top 2 states with the lowest construction rates." +}, { + "id": "1235", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total water consumption by each water treatment plant in the state of California in the month of July in the year 2022?", + "sql_context": "CREATE TABLE water_treatment_plant (plant_id INT, state VARCHAR(50), year INT, month INT, water_consumption FLOAT); INSERT INTO water_treatment_plant (plant_id, state, year, month, water_consumption) VALUES (13, \u0027California\u0027, 2022, 7, 12345.6), (14, \u0027California\u0027, 2022, 7, 23456.7), (15, \u0027California\u0027, 2022, 7, 34567.8);", + "sql": "SELECT plant_id, SUM(water_consumption) as total_water_consumption FROM water_treatment_plant WHERE state \u003d \u0027California\u0027 AND year \u003d 2022 AND month \u003d 7 GROUP BY plant_id;", + "sql_explanation": "This SQL query calculates the total water consumption for each water treatment plant in the state of California in the month of July in the year 2022. It does this by summing up the water consumption in the water_treatment_plant table where the state is California, the year is 2022, and the month is 7. It then groups the results by plant_id, giving the total water consumption for each plant." +}, { + "id": "1811", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the water treatment plants with the highest water recycling rate, comparing at least 3 regions worldwide.", + "sql_context": "CREATE TABLE wastewater_plants (plant TEXT, region TEXT, recycling_rate DECIMAL(5,4)); INSERT INTO wastewater_plants (plant, region, recycling_rate) VALUES (\u0027Plant A\u0027, \u0027North America\u0027, 0.85), (\u0027Plant B\u0027, \u0027North America\u0027, 0.80), (\u0027Plant C\u0027, \u0027Europe\u0027, 0.90), (\u0027Plant D\u0027, \u0027Europe\u0027, 0.92), (\u0027Plant E\u0027, \u0027Asia\u0027, 0.75), (\u0027Plant F\u0027, \u0027Asia\u0027, 0.78);", + "sql": "SELECT region, MAX(recycling_rate) AS highest_recycling_rate FROM wastewater_plants GROUP BY region ORDER BY highest_recycling_rate DESC LIMIT 3;", + "sql_explanation": "The query calculates the highest water recycling rate for each region and returns the top 3 regions with the highest recycling rates." +}, { + "id": "1995", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the agricultural and environmental impact of drought in a specific location over time?", + "sql_context": "CREATE TABLE DroughtImpactAssessments (AssessmentID INT, Location VARCHAR(50), Date DATE, AgriculturalImpact INT, EnvironmentalImpact INT); INSERT INTO DroughtImpactAssessments (AssessmentID, Location, Date, AgriculturalImpact, EnvironmentalImpact) VALUES (4, \u0027Los Angeles, CA\u0027, \u00272022-04-01\u0027, 250, 125), (5, \u0027Los Angeles, CA\u0027, \u00272022-04-02\u0027, 300, 150);", + "sql": "SELECT Location, Date, SUM(AgriculturalImpact + EnvironmentalImpact) as TotalImpact FROM DroughtImpactAssessments GROUP BY Location, Date;", + "sql_explanation": "This SQL query calculates the agricultural and environmental impact of drought in a specific location over time by summing the AgriculturalImpact and EnvironmentalImpact for each location and date, and then groups the results by the Location and Date." +}, { + "id": "2341", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average water consumption per capita in 2020?", + "sql_context": "CREATE TABLE city_water_usage (city VARCHAR(50), year INT, consumption INT, population INT); INSERT INTO city_water_usage (city, year, consumption, population) VALUES (\u0027CityA\u0027, 2019, 1200, 500000), (\u0027CityA\u0027, 2020, 1500, 510000), (\u0027CityB\u0027, 2019, 1000, 600000), (\u0027CityB\u0027, 2020, 1100, 610000);", + "sql": "SELECT city, AVG(consumption / population) as avg_consumption_per_capita FROM city_water_usage WHERE year \u003d 2020 GROUP BY city;", + "sql_explanation": "The query calculates the average water consumption per capita for the year 2020 by dividing the consumption by the population and grouping the results by city." +}, { + "id": "2377", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What were the water conservation initiatives with an average savings of over 75 mÂŗ on October 15, 2021?", + "sql_context": "CREATE TABLE WaterConservation (Id INT, Initiative VARCHAR(50), Savings DECIMAL(5,2), Date DATE); INSERT INTO WaterConservation (Id, Initiative, Savings, Date) VALUES (1, \u0027Rain Barrels\u0027, 85.2, \u00272021-10-15\u0027); INSERT INTO WaterConservation (Id, Initiative, Savings, Date) VALUES (2, \u0027Smart Irrigation\u0027, 76.3, \u00272021-10-15\u0027);", + "sql": "SELECT Initiative, AVG(Savings) FROM WaterConservation WHERE Date \u003d \u00272021-10-15\u0027 GROUP BY Initiative HAVING AVG(Savings) \u003e 75;", + "sql_explanation": "List water conservation initiatives with an average savings of over 75 mÂŗ on October 15, 2021." +}, { + "id": "2833", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total water usage in cubic meters for each month in the year 2020", + "sql_context": "CREATE TABLE water_usage_by_month (year INT, month INT, usage FLOAT); INSERT INTO water_usage_by_month (year, month, usage) VALUES (2020, 1, 289.56), (2020, 2, 301.23), (2020, 3, 345.78), (2020, 4, 299.67), (2020, 5, 456.34), (2020, 6, 500.89), (2020, 7, 567.89), (2020, 8, 434.56), (2020, 9, 356.78), (2020, 10, 444.45), (2020, 11, 600.11), (2020, 12, 700.22);", + "sql": "SELECT EXTRACT(MONTH FROM date) AS month, SUM(usage) FROM water_usage_by_month WHERE year \u003d 2020 GROUP BY month;", + "sql_explanation": "This query calculates the total water usage for each month in the year 2020 by grouping the \u0027water_usage_by_month\u0027 table by the month column and then calculating the sum of the \u0027usage\u0027 column for each group." +}, { + "id": "3094", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total water consumption by each city in the year 2020?", + "sql_context": "CREATE TABLE city_water_usage (city VARCHAR(50), year INT, consumption INT); INSERT INTO city_water_usage (city, year, consumption) VALUES (\u0027CityA\u0027, 2019, 1200), (\u0027CityA\u0027, 2020, 1500), (\u0027CityB\u0027, 2019, 1000), (\u0027CityB\u0027, 2020, 1100);", + "sql": "SELECT city, SUM(consumption) as total_consumption FROM city_water_usage WHERE year \u003d 2020 GROUP BY city;", + "sql_explanation": "The SQL query filters the records based on the year 2020, calculates the total water consumption for each city, and groups the results by city." +}, { + "id": "3133", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average water pressure by city and location, per day?", + "sql_context": "CREATE TABLE water_pressure_2 (id INT, city VARCHAR(255), location VARCHAR(255), pressure FLOAT, pressure_date DATE); INSERT INTO water_pressure_2 (id, city, location, pressure, pressure_date) VALUES (1, \u0027Miami\u0027, \u0027Downtown\u0027, 55, \u00272022-03-01\u0027); INSERT INTO water_pressure_2 (id, city, location, pressure, pressure_date) VALUES (2, \u0027Chicago\u0027, \u0027Loop\u0027, 60, \u00272022-03-02\u0027);", + "sql": "SELECT city, location, AVG(pressure) FROM water_pressure_2 GROUP BY city, location, DATE(pressure_date);", + "sql_explanation": "This SQL query calculates the average water pressure for each city-location combination, per day, by grouping the \u0027city\u0027, \u0027location\u0027, and date part of the \u0027pressure_date\u0027 column and averaging the \u0027pressure\u0027 column." +}, { + "id": "4161", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum water conservation effort by any state in a month?", + "sql_context": "CREATE TABLE state_conservation_efforts (state TEXT, month INT, water_conserved FLOAT); INSERT INTO state_conservation_efforts (state, month, water_conserved) VALUES (\u0027CA\u0027, 1, 100000), (\u0027CA\u0027, 2, 120000), (\u0027NY\u0027, 1, 150000), (\u0027NY\u0027, 2, 180000);", + "sql": "SELECT state, MAX(water_conserved) FROM state_conservation_efforts GROUP BY state;", + "sql_explanation": "The query calculates the maximum water conservation effort by any state in a month." +}, { + "id": "4168", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get the number of water treatment plants in \u0027WaterTreatmentPlants\u0027 table for each unique treatment type", + "sql_context": "CREATE TABLE WaterTreatmentPlants (plant_id INT, location VARCHAR(50), treatment_type VARCHAR(20));", + "sql": "SELECT treatment_type, COUNT(*) FROM WaterTreatmentPlants GROUP BY treatment_type;", + "sql_explanation": "This query uses the COUNT(*) function to count all records in the \u0027WaterTreatmentPlants\u0027 table for each unique \u0027treatment_type\u0027." +}, { + "id": "4724", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average water usage per person in the South Atlantic region, grouped by state?", + "sql_context": "CREATE TABLE south_atlantic_water_usage(state VARCHAR(20), usage NUMERIC(10,2), population NUMERIC(10,2)); INSERT INTO south_atlantic_water_usage VALUES (\u0027Delaware\u0027, 50.67, 954678), (\u0027Florida\u0027, 60.78, 2056789), (\u0027Georgia\u0027, 70.89, 1067890), (\u0027Maryland\u0027, 80.90, 6178901), (\u0027North Carolina\u0027, 90.10, 1089012), (\u0027South Carolina\u0027, 100.20, 490123);", + "sql": "SELECT state, AVG(usage) FROM south_atlantic_water_usage GROUP BY state;", + "sql_explanation": "The SQL query uses the AVG function and the GROUP BY clause to calculate the average water usage per person in the South Atlantic region, grouped by state." +}, { + "id": "159", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average donation amount per day, for the month of December, in the year 2021?", + "sql_context": "CREATE TABLE Donations (id INT, donor_name VARCHAR(100), donation_amount DECIMAL(10,2), donation_date DATE, event_id INT);", + "sql": "SELECT DATE_TRUNC(\u0027day\u0027, donation_date) as donation_day, AVG(donation_amount) as avg_donation FROM Donations WHERE DATE_TRUNC(\u0027month\u0027, donation_date) \u003d DATE_TRUNC(\u0027month\u0027, \u00272021-12-01\u0027) AND DATE_TRUNC(\u0027year\u0027, donation_date) \u003d DATE_TRUNC(\u0027year\u0027, \u00272021-12-01\u0027) GROUP BY donation_day;", + "sql_explanation": "This query calculates the average donation amount per day in December 2021. It does this by using the DATE_TRUNC function to extract the day and month from the donation_date column and then grouping by the donation_day column while averaging the donation_amount column. The WHERE clause filters for donations made in the month of December and the year 2021." +}, { + "id": "966", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the names of the top 5 donors and their total contributions in the last 3 years.", + "sql_context": "CREATE TABLE Donors (donor_id INT, donor_name VARCHAR(50), donation_date DATE, amount INT); INSERT INTO Donors (donor_id, donor_name, donation_date, amount) VALUES (1, \u0027John Doe\u0027, \u00272021-01-01\u0027, 100), (2, \u0027Jane Smith\u0027, \u00272020-01-01\u0027, 50), (3, \u0027Jim Brown\u0027, \u00272019-01-01\u0027, 200);", + "sql": "SELECT donor_name, SUM(amount) AS Total_Contributions FROM Donors D WHERE donation_date \u003e\u003d DATE(NOW()) - INTERVAL 3 YEAR GROUP BY donor_name ORDER BY Total_Contributions DESC LIMIT 5", + "sql_explanation": "The SQL query lists the top 5 donors and their total contributions in the last 3 years by first filtering the records based on the donation date, then grouping them by donor name, and finally ordering the grouped records by their total contributions in descending order." +}, { + "id": "1071", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 2 cities with the highest number of arts education programs funded by \u0027Arts Foundation\u0027 in 2021 and 2022.", + "sql_context": "CREATE TABLE EducationPrograms (program_id INT, city VARCHAR(20), funding_source VARCHAR(20), year INT); INSERT INTO EducationPrograms (program_id, city, funding_source, year) VALUES (1, \u0027New York\u0027, \u0027Cultural Trust\u0027, 2021), (2, \u0027Miami\u0027, \u0027Cultural Trust\u0027, 2021), (3, \u0027Chicago\u0027, \u0027Cultural Trust\u0027, 2021), (4, \u0027New York\u0027, \u0027Arts Foundation\u0027, 2021), (5, \u0027Atlanta\u0027, \u0027City Grants\u0027, 2022), (6, \u0027New York\u0027, \u0027Arts Foundation\u0027, 2022);", + "sql": "SELECT city, COUNT(*) as program_count FROM EducationPrograms WHERE funding_source \u003d \u0027Arts Foundation\u0027 AND year IN (2021, 2022) GROUP BY city ORDER BY program_count DESC LIMIT 2", + "sql_explanation": "Filter records in the \u0027EducationPrograms\u0027 table to only include those with a funding source of \u0027Arts Foundation\u0027 and a year of either 2021 or 2022, then group results by city, count the number of records for each city, and order the results in descending order by the count. Finally, return only the top 2 cities with the highest count." +}, { + "id": "1117", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which performing arts events had the highest and lowest attendance by gender?", + "sql_context": "CREATE TABLE performing_arts_events (id INT, event_name VARCHAR(255), event_date DATE, attendee_gender VARCHAR(255));", + "sql": "SELECT event_name, attendee_gender, COUNT(attendee_gender) as attendance FROM performing_arts_events GROUP BY event_name, attendee_gender ORDER BY attendance DESC, event_name;", + "sql_explanation": "This query groups the records based on the event_name and attendee_gender columns in the performing_arts_events table. It calculates the attendance for each event and gender by using the COUNT function. The results are ordered in descending order based on the attendance and event name. The event with the highest attendance is displayed first, followed by the event with the lowest attendance." +}, { + "id": "1266", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 3 cities with the highest number of arts education programs funded by \u0027Cultural Trust\u0027 in 2021.", + "sql_context": "CREATE TABLE EducationPrograms (program_id INT, city VARCHAR(20), funding_source VARCHAR(20), year INT); INSERT INTO EducationPrograms (program_id, city, funding_source, year) VALUES (1, \u0027New York\u0027, \u0027Cultural Trust\u0027, 2021), (2, \u0027Miami\u0027, \u0027Cultural Trust\u0027, 2021), (3, \u0027Chicago\u0027, \u0027Cultural Trust\u0027, 2021), (4, \u0027New York\u0027, \u0027Arts Foundation\u0027, 2021);", + "sql": "SELECT city, COUNT(*) as program_count FROM EducationPrograms WHERE funding_source \u003d \u0027Cultural Trust\u0027 AND year \u003d 2021 GROUP BY city ORDER BY program_count DESC LIMIT 3", + "sql_explanation": "Filter records in the \u0027EducationPrograms\u0027 table to only include those with funding source \u0027Cultural Trust\u0027 and year 2021, then group results by city, count the number of records for each city, and order the results in descending order by the count. Finally, return only the top 3 cities with the highest count." +}, { + "id": "1343", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of new visitors by program genre in the first quarter of 2022?", + "sql_context": "CREATE SCHEMA events; CREATE TABLE events (event_id INT, event_name VARCHAR(255), event_genre VARCHAR(255), visit_date DATE, visitor_id INT); INSERT INTO events (event_id, event_name, event_genre, visit_date, visitor_id) VALUES (1, \u0027Concert\u0027, \u0027Music\u0027, \u00272022-01-10\u0027, 1001), (2, \u0027Play\u0027, \u0027Theater\u0027, \u00272022-02-12\u0027, 1002), (3, \u0027Workshop\u0027, \u0027Education\u0027, \u00272022-03-15\u0027, 1003);", + "sql": "SELECT event_genre, COUNT(DISTINCT CASE WHEN visit_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-03-31\u0027 THEN visitor_id END) as new_visitors FROM events GROUP BY event_genre;", + "sql_explanation": "This query calculates the number of new visitors by program genre in the first quarter of 2022. It uses the CASE statement to identify distinct visitor_id values where the visit_date falls within the specified date range, then groups the results by event_genre and counts the number of distinct visitor_id values for each group." +}, { + "id": "1508", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of events and total attendees by year", + "sql_context": "CREATE TABLE events_attendees_2 (event_id INT, attendee_id INT, event_date DATE); INSERT INTO events_attendees_2 (event_id, attendee_id, event_date) VALUES (1, 1, \u00272022-01-01\u0027), (2, 2, \u00272023-01-02\u0027), (3, 3, \u00272022-02-01\u0027), (4, 4, \u00272023-03-01\u0027);", + "sql": "SELECT YEAR(event_date) AS year, COUNT(DISTINCT event_id) AS num_events, COUNT(DISTINCT attendee_id) AS num_attendees FROM events_attendees_2 GROUP BY year;", + "sql_explanation": "We extract the year from the event_date column and group by those values to find the number of events and total attendees by year." +}, { + "id": "1644", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many new donors have donated in the last week and in which city do they reside?", + "sql_context": "CREATE TABLE Donors (DonorID INT, DonationDate DATE, DonorCity VARCHAR(50), DonorNew BOOLEAN); INSERT INTO Donors (DonorID, DonationDate, DonorCity, DonorNew) VALUES (1, \u00272022-07-05\u0027, \u0027New York\u0027, true), (2, \u00272022-07-02\u0027, \u0027Los Angeles\u0027, false), (3, \u00272022-07-15\u0027, \u0027Dallas\u0027, true);", + "sql": "SELECT COUNT(DISTINCT DonorID), DonorCity FROM Donors WHERE DonationDate \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 7 DAY) AND DonorNew \u003d true GROUP BY DonorCity;", + "sql_explanation": "Count the number of new donors who have donated in the last week and group the results by their city of residence." +}, { + "id": "2231", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the distribution of attendees by age group for each event in \u00272021\u0027?", + "sql_context": "CREATE TABLE Attendees (attendee_id INT, event_id INT, age_group VARCHAR(50), attendee_date DATE); INSERT INTO Attendees (attendee_id, event_id, age_group, attendee_date) VALUES (4, 4, \u002718-24\u0027, \u00272021-01-01\u0027), (5, 5, \u002725-34\u0027, \u00272021-02-01\u0027), (6, 6, \u002735-44\u0027, \u00272021-03-01\u0027);", + "sql": "SELECT event_id, age_group, COUNT(*) AS num_attendees FROM Attendees WHERE YEAR(attendee_date) \u003d 2021 GROUP BY event_id, age_group;", + "sql_explanation": "This query calculates the distribution of attendees by age group for each event in the year 2021. It does this by grouping the attendees by event and age group and then counting the number of attendees in each group." +}, { + "id": "2892", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of audience members by age group, for events held at the \u0027Art Gallery\u0027 in the past year?", + "sql_context": "CREATE TABLE ArtGallery (event_id INT, event_name VARCHAR(50), event_date DATE, age_group VARCHAR(20));", + "sql": "SELECT age_group, COUNT(*) FROM ArtGallery WHERE event_date \u003e\u003d DATEADD(year, -1, GETDATE()) GROUP BY age_group;", + "sql_explanation": "This query filters the records in the \u0027ArtGallery\u0027 table to include only those from the past year, then groups the records by the \u0027age_group\u0027 column and counts the number of records in each group." +}, { + "id": "3524", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many attendees were there at the \"Jazz in the Park\" event by age group?", + "sql_context": "CREATE TABLE event_attendance (event VARCHAR(255), age INT, attendees INT); INSERT INTO event_attendance (event, age, attendees) VALUES (\u0027Jazz in the Park\u0027, 18, 200), (\u0027Jazz in the Park\u0027, 19, 250), (\u0027Jazz in the Park\u0027, 20, 150), (\u0027Classical Music Concert\u0027, 18, 300);", + "sql": "SELECT age, SUM(attendees) FROM event_attendance WHERE event \u003d \u0027Jazz in the Park\u0027 GROUP BY age;", + "sql_explanation": "This query retrieves the number of attendees at the \"Jazz in the Park\" event by age group by summing the attendees column in the event_attendance table where the event is \"Jazz in the Park\" and grouping by age." +}, { + "id": "4510", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many events took place per city?", + "sql_context": "CREATE TABLE Events (ID INT, EventCity TEXT, EventDate DATE); INSERT INTO Events (ID, EventCity, EventDate) VALUES (1, \u0027Los Angeles\u0027, \u00272022-01-01\u0027), (2, \u0027New York\u0027, \u00272022-02-01\u0027), (3, \u0027Dallas\u0027, \u00272022-03-01\u0027), (4, \u0027Los Angeles\u0027, \u00272022-04-01\u0027);", + "sql": "SELECT EventCity, COUNT(*) as NumberOfEvents FROM Events GROUP BY EventCity;", + "sql_explanation": "This SQL query calculates the number of events that took place in each city in the \u0027Events\u0027 table. It does this by using the COUNT() function with no parameters to count the number of rows, and the GROUP BY clause to group the results by city." +}, { + "id": "1170", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Provide the number of threat intelligence reports generated per month for the past year, for the Middle East region.", + "sql_context": "CREATE TABLE threat_intelligence (report_id INT, report_date DATE, region TEXT); INSERT INTO threat_intelligence (report_id, report_date, region) VALUES (1, \u00272022-01-15\u0027, \u0027Middle East\u0027), (2, \u00272022-03-10\u0027, \u0027Middle East\u0027), (3, \u00272021-12-25\u0027, \u0027Middle East\u0027);", + "sql": "SELECT DATE_FORMAT(report_date, \u0027%Y-%m\u0027) as month, COUNT(*) as reports FROM threat_intelligence WHERE region \u003d \u0027Middle East\u0027 AND report_date \u003e\u003d \u00272021-01-01\u0027 GROUP BY month;", + "sql_explanation": "This query provides the number of threat intelligence reports generated per month for the past year, for the Middle East region by extracting the year and month from \u0027report_date\u0027 and grouping by month to count the number of reports in the Middle East region for the past year." +}, { + "id": "1223", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many military equipment maintenance requests were there per month in 2022?", + "sql_context": "CREATE TABLE MaintenancePerMonth (RequestID INT, RequestDate DATE); INSERT INTO MaintenancePerMonth (RequestID, RequestDate) VALUES (1, \u00272022-01-05\u0027), (2, \u00272022-02-12\u0027), (3, \u00272022-03-20\u0027), (4, \u00272022-04-25\u0027), (5, \u00272022-05-10\u0027), (6, \u00272022-06-18\u0027), (7, \u00272022-07-05\u0027), (8, \u00272022-08-12\u0027), (9, \u00272022-09-20\u0027), (10, \u00272022-10-30\u0027), (11, \u00272022-11-15\u0027), (12, \u00272022-12-28\u0027);", + "sql": "SELECT EXTRACT(MONTH FROM RequestDate) AS Month, COUNT(*) FROM MaintenancePerMonth WHERE RequestDate BETWEEN \u00272022-01-01\u0027 AND \u00272022-12-31\u0027 GROUP BY Month ORDER BY Month;", + "sql_explanation": "This SQL query extracts the month from RequestDate, counts the number of records for each month, filters the records for the year 2022, groups the result set by Month, and orders the result set by Month to find how many military equipment maintenance requests were there per month in 2022." +}, { + "id": "1337", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many military equipment maintenance requests were submitted per month in 2020?", + "sql_context": "CREATE TABLE maintenance (request_id INT, request_date DATE, equipment_type VARCHAR(255)); INSERT INTO maintenance (request_id, request_date, equipment_type) VALUES (1, \u00272020-02-12\u0027, \u0027tank\u0027), (2, \u00272020-04-15\u0027, \u0027plane\u0027), (3, \u00272019-10-27\u0027, \u0027ship\u0027);", + "sql": "SELECT EXTRACT(MONTH FROM request_date) AS month, COUNT(*) AS num_requests FROM maintenance WHERE request_date BETWEEN \u00272020-01-01\u0027 AND \u00272020-12-31\u0027 GROUP BY month;", + "sql_explanation": "The SQL query extracts the month from the request_date column and groups the results by month. It then counts the number of rows in each group to determine the number of requests submitted per month. The WHERE clause restricts the results to requests submitted in 2020." +}, { + "id": "1565", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the maximum and minimum maintenance cost of military equipment for each branch in the last 3 years.", + "sql_context": "CREATE TABLE military_equipment (equipment_id INT, branch VARCHAR(50), maintenance_cost FLOAT, date DATE);", + "sql": "SELECT branch, MAX(maintenance_cost), MIN(maintenance_cost) FROM military_equipment WHERE date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 3 YEAR) GROUP BY branch;", + "sql_explanation": "* This query calculates the maximum and minimum maintenance cost for military equipment for each branch in the last 3 years." +}, { + "id": "1635", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average response time for cybersecurity incidents, by severity, for the past 6 months?", + "sql_context": "CREATE TABLE cybersecurity_incidents(id INT, severity INT, response_time INT, date DATE);", + "sql": "SELECT severity, AVG(response_time) as avg_response_time FROM cybersecurity_incidents WHERE date \u003e DATE_SUB(NOW(), INTERVAL 6 MONTH) GROUP BY severity;", + "sql_explanation": "This query groups the cybersecurity incidents by severity, filters for the past 6 months, and returns the average response time for each severity level." +}, { + "id": "1764", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the number of military equipment maintenance records for each month", + "sql_context": "CREATE TABLE monthly_maintenance (id INT, equipment_type VARCHAR(255), maintenance_date DATE);", + "sql": "SELECT YEAR(maintenance_date), MONTH(maintenance_date), COUNT(*) FROM monthly_maintenance GROUP BY YEAR(maintenance_date), MONTH(maintenance_date);", + "sql_explanation": "This SQL query groups the military equipment maintenance records by the year and month of the maintenance date and counts the number of records for each group, returning the number of maintenance records for each month." +}, { + "id": "2609", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the veteran unemployment rate in each region?", + "sql_context": "CREATE TABLE veteran_employment (employment_id INT, region TEXT, veteran_unemployment_rate FLOAT); INSERT INTO veteran_employment (employment_id, region, veteran_unemployment_rate) VALUES (6, \u0027Northeast\u0027, 0.05), (7, \u0027Midwest\u0027, 0.06), (8, \u0027South\u0027, 0.07), (9, \u0027West\u0027, 0.08);", + "sql": "SELECT region, AVG(veteran_unemployment_rate) as avg_veteran_unemployment_rate FROM veteran_employment GROUP BY region;", + "sql_explanation": "This query calculates the veteran unemployment rate in each region by grouping the data by region and calculating the average veteran unemployment rate for each region." +}, { + "id": "2804", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 3 defense contractors by total contract value for the first half of 2021?", + "sql_context": "CREATE TABLE Contracts (id INT, contractor VARCHAR(255), half INT, value INT); INSERT INTO Contracts (id, contractor, half, value) VALUES (1, \u0027Lockheed Martin\u0027, 1, 1000000), (2, \u0027Raytheon\u0027, 1, 800000), (3, \u0027Boeing\u0027, 2, 1200000), (4, \u0027Northrop Grumman\u0027, 1, 1100000), (5, \u0027Lockheed Martin\u0027, 2, 1300000), (6, \u0027Raytheon\u0027, 2, 900000), (7, \u0027Boeing\u0027, 1, 1400000), (8, \u0027Northrop Grumman\u0027, 2, 1000000);", + "sql": "SELECT contractor, SUM(value) FROM Contracts WHERE half \u003d 1 GROUP BY contractor ORDER BY SUM(value) DESC LIMIT 3;", + "sql_explanation": "This SQL query identifies the top 3 defense contractors by total contract value for the first half of 2021 by grouping the \u0027Contracts\u0027 table by \u0027contractor\u0027 and calculating the sum of the \u0027value\u0027 column for each group. The resulting table is then sorted by the sum of the \u0027value\u0027 column in descending order and limited to the top 3 rows." +}, { + "id": "2852", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of military equipment types maintained by each division?", + "sql_context": "CREATE TABLE division (division_id INT, division_name VARCHAR(50)); INSERT INTO division (division_id, division_name) VALUES (1, \u0027Aviation\u0027), (2, \u0027Ground\u0027), (3, \u0027Naval\u0027); CREATE TABLE equipment (equipment_id INT, equipment_name VARCHAR(50), division_id INT); INSERT INTO equipment (equipment_id, equipment_name, division_id) VALUES (1, \u0027F-16 Fighting Falcon\u0027, 1), (2, \u0027M1 Abrams\u0027, 2), (3, \u0027USS Gerald R. Ford\u0027, 3);", + "sql": "SELECT division_id, COUNT(DISTINCT equipment_name) as total_equipment_types FROM equipment GROUP BY division_id;", + "sql_explanation": "The query calculates the number of distinct equipment types maintained by each division by grouping the equipment table by division_id and counting the number of distinct equipment_names for each group." +}, { + "id": "2857", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of defense contracts issued per quarter, ranked by total count in descending order?", + "sql_context": "CREATE TABLE Contract_Quarters (Contract_ID INT, Quarter VARCHAR(255)); INSERT INTO Contract_Quarters (Contract_ID, Quarter) VALUES (1, \u0027Q1 2018\u0027), (2, \u0027Q2 2019\u0027), (3, \u0027Q3 2020\u0027), (4, \u0027Q4 2021\u0027), (5, \u0027Q1 2018\u0027), (6, \u0027Q2 2019\u0027), (7, \u0027Q3 2020\u0027), (8, \u0027Q4 2021\u0027);", + "sql": "SELECT Quarter, COUNT(*) as Contract_Count FROM Contract_Quarters GROUP BY Quarter ORDER BY Contract_Count DESC;", + "sql_explanation": "The SQL query calculates the total number of defense contracts issued per quarter by using the COUNT function with the * wildcard and grouping by the Quarter column. It then orders the result set in descending order based on the Contract_Count column using the ORDER BY clause." +}, { + "id": "2919", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the number of military equipment maintenance tasks performed in each region for the month of July", + "sql_context": "CREATE TABLE military_equipment_maintenance (maintenance_id INT, region VARCHAR(255), maintenance_date DATE); INSERT INTO military_equipment_maintenance (maintenance_id, region, maintenance_date) VALUES (1, \u0027AFRICOM\u0027, \u00272022-07-01\u0027); INSERT INTO military_equipment_maintenance (maintenance_id, region, maintenance_date) VALUES (2, \u0027EUCOM\u0027, \u00272022-06-01\u0027);", + "sql": "SELECT region, COUNT(*) FROM military_equipment_maintenance WHERE MONTH(maintenance_date) \u003d 7 GROUP BY region;", + "sql_explanation": "This SQL query displays the number of military equipment maintenance tasks performed in each region for the month of July. It uses the WHERE clause to filter the military_equipment_maintenance table for maintenance tasks performed in July and the GROUP BY clause to group the results by region. The COUNT function is used to count the number of maintenance tasks in each group." +}, { + "id": "3389", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total count of defense contracts for each contracting agency, excluding contracts with a value of $0.", + "sql_context": "CREATE TABLE defense_contracts (contract_id INT, agency VARCHAR(255), value DECIMAL(10,2));INSERT INTO defense_contracts (contract_id, agency, value) VALUES (1, \u0027DoD\u0027, 1000000.00), (2, \u0027DoD\u0027, 0.00), (3, \u0027VA\u0027, 500000.00);", + "sql": "SELECT agency, COUNT(*) as total_contracts FROM defense_contracts WHERE value \u003e 0 GROUP BY agency;", + "sql_explanation": "The SQL query selects the \u0027agency\u0027 column and calculates the count of rows for each agency from the \u0027defense_contracts\u0027 table, filtering out contracts with a value of $0." +}, { + "id": "3593", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of defense contracts awarded per quarter?", + "sql_context": "CREATE TABLE Contracts (Quarter VARCHAR(6), Count INT); INSERT INTO Contracts (Quarter, Count) VALUES (\u0027Q1-2021\u0027, 350), (\u0027Q2-2021\u0027, 400), (\u0027Q3-2021\u0027, 450), (\u0027Q4-2021\u0027, 500);", + "sql": "SELECT STR_TO_DATE(Quarter, \u0027Q%d-%Y\u0027) AS Quarter, SUM(Count) FROM Contracts GROUP BY Quarter;", + "sql_explanation": "The SQL query uses the STR_TO_DATE() function to convert the quarter values into a date format that can be used with the GROUP BY clause. The SUM() function is then used to add up the number of defense contracts awarded for each quarter." +}, { + "id": "4084", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of threat intelligence reports generated per month in 2022?", + "sql_context": "CREATE TABLE Reports (Month VARCHAR(7), Count INT); INSERT INTO Reports (Month, Count) VALUES (\u0027Jan-2022\u0027, 100), (\u0027Feb-2022\u0027, 110), (\u0027Mar-2022\u0027, 120), (\u0027Apr-2022\u0027, 130), (\u0027May-2022\u0027, 140), (\u0027Jun-2022\u0027, 150), (\u0027Jul-2022\u0027, 160), (\u0027Aug-2022\u0027, 170), (\u0027Sep-2022\u0027, 180), (\u0027Oct-2022\u0027, 190), (\u0027Nov-2022\u0027, 200), (\u0027Dec-2022\u0027, 210);", + "sql": "SELECT STR_TO_DATE(Month, \u0027%b-%Y\u0027) AS Month, SUM(Count) FROM Reports GROUP BY Month;", + "sql_explanation": "The SQL query uses the STR_TO_DATE() function to convert the month values into a date format that can be used with the GROUP BY clause. The SUM() function is then used to add up the number of threat intelligence reports generated for each month in 2022." +}, { + "id": "4190", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of threat reports for each type in the \u0027ThreatReports\u0027 table", + "sql_context": "CREATE TABLE ThreatReports (id INT, type VARCHAR(255), report_count INT);", + "sql": "SELECT type, SUM(report_count) as total_reports FROM ThreatReports GROUP BY type;", + "sql_explanation": "This SQL query groups the ThreatReports table by the \u0027type\u0027 column and calculates the sum of the \u0027report_count\u0027 column for each group, providing the number of threat reports for each type." +}, { + "id": "4352", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the number of military personnel by rank", + "sql_context": "CREATE TABLE military_personnel (personnel_id INT, rank TEXT); INSERT INTO military_personnel (personnel_id, rank) VALUES (1, \u0027Private\u0027), (2, \u0027Corporal\u0027), (3, \u0027Sergeant\u0027), (4, \u0027Captain\u0027), (5, \u0027Major\u0027), (6, \u0027Colonel\u0027), (7, \u0027General\u0027);", + "sql": "SELECT rank, COUNT(*) as personnel_count FROM military_personnel GROUP BY rank;", + "sql_explanation": "* Queries the military_personnel table, counts the number of personnel for each rank, groups the results by rank, and returns the rank and personnel_count columns." +}, { + "id": "40", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the national security events with their dates and calculate the average impact level for each month.", + "sql_context": "CREATE TABLE national_security_events_dates (id INT, event VARCHAR, impact VARCHAR, event_date DATE); INSERT INTO national_security_events_dates (id, event, impact, event_date) VALUES (1, \u0027Terrorist Attack\u0027, \u0027High\u0027, \u00272022-01-01\u0027), (2, \u0027Cyber Espionage\u0027, \u0027Medium\u0027, \u00272022-02-15\u0027), (3, \u0027Nuclear Missile Test\u0027, \u0027Low\u0027, \u00272022-03-01\u0027);", + "sql": "SELECT DATEPART(YEAR, event_date) as year, DATEPART(MONTH, event_date) as month, AVG(CASE WHEN impact \u003d \u0027High\u0027 THEN 1.0 ELSE 0.0 END) as avg_high, AVG(CASE WHEN impact \u003d \u0027Medium\u0027 THEN 1.0 ELSE 0.0 END) as avg_medium, AVG(CASE WHEN impact \u003d \u0027Low\u0027 THEN 1.0 ELSE 0.0 END) as avg_low FROM national_security_events_dates GROUP BY DATEPART(YEAR, event_date), DATEPART(MONTH, event_date);", + "sql_explanation": "The SQL query calculates the average impact level for each month by using the AVG() function and the DATEPART() function to extract the year and month from the event_date." +}, { + "id": "681", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Present the cybersecurity strategies implemented by each government agency, the strategy type, and the year it was adopted, along with the status of its implementation.", + "sql_context": "CREATE TABLE cybersecurity_strategies (agency_name VARCHAR(255), strategy_type VARCHAR(255), adoption_year INT, implementation_status VARCHAR(255));", + "sql": "SELECT agency_name, strategy_type, adoption_year, COUNT(*) as num_implemented FROM cybersecurity_strategies WHERE implementation_status \u003d \u0027Implemented\u0027 GROUP BY agency_name, strategy_type, adoption_year;", + "sql_explanation": "The SQL query filters the cybersecurity strategies that have been implemented, then groups the results by agency_name, strategy_type, and adoption_year to present the number of strategies implemented by each agency for each type and year." +}, { + "id": "1486", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many cybersecurity incidents were reported in Europe and North America in the years 2019 and 2020?", + "sql_context": "CREATE TABLE cybersecurity_incidents (id INT, incident_type VARCHAR(255), year INT, affected_systems VARCHAR(255), region VARCHAR(255)); INSERT INTO cybersecurity_incidents (id, incident_type, year, affected_systems, region) VALUES (1, \u0027Data Breach\u0027, 2020, \u0027Web Servers\u0027, \u0027Asia\u0027), (2, \u0027Phishing\u0027, 2019, \u0027Email Accounts\u0027, \u0027Asia\u0027), (3, \u0027Malware\u0027, 2020, \u0027Workstations\u0027, \u0027Europe\u0027), (4, \u0027Ransomware\u0027, 2019, \u0027Databases\u0027, \u0027North America\u0027);", + "sql": "SELECT region, COUNT(*) as total_incidents FROM cybersecurity_incidents WHERE year IN (2019, 2020) AND region IN (\u0027Europe\u0027, \u0027North America\u0027) GROUP BY region;", + "sql_explanation": "This query uses the COUNT function to count the number of cybersecurity incidents in Europe and North America in the years 2019 and 2020. The GROUP BY clause is used to group the results by region." +}, { + "id": "1591", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of cybersecurity personnel in each country in the European region, with roles of \u0027Security Analyst\u0027 or \u0027Security Engineer\u0027?", + "sql_context": "CREATE TABLE personnel (id INT, country VARCHAR(50), role VARCHAR(50), region VARCHAR(50)); INSERT INTO personnel (id, country, role, region) VALUES (1, \u0027Germany\u0027, \u0027Security Analyst\u0027, \u0027Europe\u0027); INSERT INTO personnel (id, country, role, region) VALUES (2, \u0027France\u0027, \u0027Security Engineer\u0027, \u0027Europe\u0027); INSERT INTO personnel (id, country, role, region) VALUES (3, \u0027Spain\u0027, \u0027Security Manager\u0027, \u0027Europe\u0027);", + "sql": "SELECT region, AVG(CASE WHEN role IN (\u0027Security Analyst\u0027, \u0027Security Engineer\u0027) THEN 1 ELSE 0 END) FROM personnel WHERE region \u003d \u0027Europe\u0027 GROUP BY region;", + "sql_explanation": "This query groups the \u0027personnel\u0027 table by \u0027region\u0027 and calculates the average number of records in each group where the \u0027role\u0027 is \u0027Security Analyst\u0027 or \u0027Security Engineer\u0027." +}, { + "id": "1926", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of military personnel per country in the South American region?", + "sql_context": "CREATE TABLE MilitaryPersonnel (Id INT, Country VARCHAR(50), Service VARCHAR(50), Quantity INT);INSERT INTO MilitaryPersonnel (Id, Country, Service, Quantity) VALUES (1, \u0027Brazil\u0027, \u0027Army\u0027, 250000), (2, \u0027Argentina\u0027, \u0027Navy\u0027, 50000), (3, \u0027Colombia\u0027, \u0027Air Force\u0027, 40000);", + "sql": "SELECT Country, SUM(Quantity) AS TotalPersonnel FROM MilitaryPersonnel WHERE Country IN (\u0027Brazil\u0027, \u0027Argentina\u0027, \u0027Colombia\u0027) GROUP BY Country;", + "sql_explanation": "The SQL query calculates the total number of military personnel per country in the South American region by summing the Quantity column and grouping by Country." +}, { + "id": "2213", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of military technology patents filed by Asian countries?", + "sql_context": "CREATE TABLE MilitaryPatents (id INT PRIMARY KEY, country VARCHAR(50), technology VARCHAR(50), date DATE); INSERT INTO MilitaryPatents (id, country, technology, date) VALUES (1, \u0027China\u0027, \u0027Stealth Technology\u0027, \u00272020-01-01\u0027), (2, \u0027Japan\u0027, \u0027Artificial Intelligence\u0027, \u00272019-12-15\u0027), (3, \u0027South Korea\u0027, \u0027Cyber Security\u0027, \u00272020-03-02\u0027);", + "sql": "SELECT country, COUNT(*) as total_patents FROM MilitaryPatents WHERE country IN (\u0027China\u0027, \u0027Japan\u0027, \u0027South Korea\u0027) GROUP BY country;", + "sql_explanation": "This query calculates the total number of military technology patents filed by Asian countries. It filters the records based on country and groups them by country, then calculates the count of records for each group." +}, { + "id": "2243", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total military expenditure for each country in the \u0027Americas\u0027 region for the last 5 years?", + "sql_context": "CREATE TABLE if not exists military_expenditure (country VARCHAR(50), region VARCHAR(50), year INT, amount FLOAT);", + "sql": "SELECT country, SUM(amount) as total_amount FROM military_expenditure WHERE region \u003d \u0027Americas\u0027 AND year \u003e\u003d 2017 GROUP BY country;", + "sql_explanation": "This SQL query calculates the total military expenditure (amount) for each country in the \u0027Americas\u0027 region for the last 5 years by grouping the military_expenditure table by country and applying the SUM function. It limits the year to the last 5 years." +}, { + "id": "2279", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of intelligence operations conducted in the last 6 months by country.", + "sql_context": "CREATE TABLE intelligence_ops (id INT, country VARCHAR(30), operation_type VARCHAR(30), operation_date DATE); INSERT INTO intelligence_ops (id, country, operation_type, operation_date) VALUES (1, \u0027USA\u0027, \u0027Surveillance\u0027, \u00272021-06-15\u0027); INSERT INTO intelligence_ops (id, country, operation_type, operation_date) VALUES (2, \u0027Russia\u0027, \u0027Cyber Espionage\u0027, \u00272022-02-03\u0027);", + "sql": "SELECT country, COUNT(*) FROM intelligence_ops WHERE operation_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 6 MONTH) GROUP BY country;", + "sql_explanation": "This query shows the number of intelligence operations conducted in the last 6 months by country by selecting all records with an \u0027operation_date\u0027 within the last 6 months, grouping them by \u0027country\u0027, and then calculating the count of those records for each country." +}, { + "id": "2453", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of military technologies developed by each country since 2010?", + "sql_context": "CREATE TABLE if not exists military_technologies (country VARCHAR(50), technology_name VARCHAR(50), year INT);", + "sql": "SELECT country, COUNT(technology_name) as total_technologies FROM military_technologies WHERE year \u003e\u003d 2010 GROUP BY country;", + "sql_explanation": "This SQL query selects the country and counts the number of technology names from the military_technologies table where the year is greater than or equal to 2010. It then groups the results by country, providing the total number of military technologies developed by each country since 2010." +}, { + "id": "2528", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the most common type of cybersecurity incident in each region in the year 2020?", + "sql_context": "CREATE TABLE cybersecurity_incidents (id INT, incident_type VARCHAR(255), year INT, affected_systems VARCHAR(255), region VARCHAR(255)); INSERT INTO cybersecurity_incidents (id, incident_type, year, affected_systems, region) VALUES (1, \u0027Data Breach\u0027, 2020, \u0027Web Servers\u0027, \u0027Asia\u0027), (2, \u0027Phishing\u0027, 2019, \u0027Email Accounts\u0027, \u0027Asia\u0027), (3, \u0027Malware\u0027, 2020, \u0027Workstations\u0027, \u0027Europe\u0027), (4, \u0027Ransomware\u0027, 2019, \u0027Databases\u0027, \u0027North America\u0027), (5, \u0027Data Breach\u0027, 2020, \u0027Web Servers\u0027, \u0027North America\u0027), (6, \u0027Phishing\u0027, 2020, \u0027Email Accounts\u0027, \u0027Europe\u0027);", + "sql": "SELECT region, MAX(incident_type) as most_common_incident FROM cybersecurity_incidents WHERE year \u003d 2020 GROUP BY region;", + "sql_explanation": "This query uses the MAX function to find the most common type of cybersecurity incident in each region in the year 2020. The GROUP BY clause is used to group the results by region." +}, { + "id": "2650", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of military personnel in the Asia-Pacific region, and the number of personnel for each country?", + "sql_context": "CREATE TABLE military_personnel (id INT, name TEXT, country TEXT, region TEXT, rank TEXT, number INT);INSERT INTO military_personnel (id, name, country, region, rank, number) VALUES (1, \u0027John Doe\u0027, \u0027Country A\u0027, \u0027Asia-Pacific\u0027, \u0027Colonel\u0027, 1000), (2, \u0027Jane Smith\u0027, \u0027Country B\u0027, \u0027Asia-Pacific\u0027, \u0027General\u0027, 2000);", + "sql": "SELECT country, SUM(number) as total_personnel FROM military_personnel WHERE region \u003d \u0027Asia-Pacific\u0027 GROUP BY country;", + "sql_explanation": "This SQL query uses the GROUP BY statement to group the results by country, and then sums the number of military personnel for each country." +}, { + "id": "2943", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 2 most common types of military equipment in the \u0027MilitaryEquipment\u0027 table?", + "sql_context": "CREATE TABLE MilitaryEquipment (equipment_type VARCHAR(50), country VARCHAR(50), quantity INT); INSERT INTO MilitaryEquipment (equipment_type, country, quantity) VALUES (\u0027Tanks\u0027, \u0027USA\u0027, 8850); INSERT INTO MilitaryEquipment (equipment_type, country, quantity) VALUES (\u0027Fighter Jets\u0027, \u0027China\u0027, 2312); INSERT INTO MilitaryEquipment (equipment_type, country, quantity) VALUES (\u0027Submarines\u0027, \u0027Russia\u0027, 63);", + "sql": "SELECT equipment_type, COUNT(*) FROM MilitaryEquipment GROUP BY equipment_type ORDER BY COUNT(*) DESC LIMIT 2;", + "sql_explanation": "This query groups the \u0027MilitaryEquipment\u0027 table by \u0027equipment_type\u0027 column and returns the top 2 most common types by counting the number of rows, ordered by the count in descending order." +}, { + "id": "3320", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of intelligence personnel by region?", + "sql_context": "CREATE TABLE IntelligencePersonnelByRegion (Region VARCHAR(50), Personnel INT); INSERT INTO IntelligencePersonnelByRegion (Region, Personnel) VALUES (\u0027North America\u0027, 30000), (\u0027Asia\u0027, 25000), (\u0027Europe\u0027, 22000), (\u0027Middle East\u0027, 15000), (\u0027Africa\u0027, 10000);", + "sql": "SELECT Region, SUM(Personnel) as Total_Personnel FROM IntelligencePersonnelByRegion GROUP BY Region;", + "sql_explanation": "This SQL query calculates the total number of intelligence personnel by region by summing the \u0027Personnel\u0027 column grouped by \u0027Region\u0027, displaying the total number of personnel for each region." +}, { + "id": "4137", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many military personnel are stationed in each region based on the \u0027military_personnel\u0027 table?", + "sql_context": "CREATE TABLE military_personnel (id INT, personnel_name VARCHAR(255), region VARCHAR(255), rank VARCHAR(255), personnel_date DATE);", + "sql": "SELECT region, COUNT(*) as personnel_count FROM military_personnel GROUP BY region;", + "sql_explanation": "This query groups the \u0027military_personnel\u0027 table by region and calculates the number of military personnel for each region." +}, { + "id": "5402", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the total budget allocated for each category.", + "sql_context": "CREATE TABLE budget (category TEXT, amount INTEGER); INSERT INTO budget (category, amount) VALUES (\u0027national security\u0027, 15000), (\u0027intelligence operations\u0027, 10000), (\u0027cybersecurity\u0027, 12000);", + "sql": "SELECT category, SUM(amount) FROM budget GROUP BY category", + "sql_explanation": "This query groups the budget by category and calculates the total budget allocated for each category." +}, { + "id": "888", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many users signed up for the \u0027Elite\u0027 membership in the last quarter?", + "sql_context": "CREATE TABLE memberships (membership_id INT, membership_type VARCHAR(50), signup_date DATE); INSERT INTO memberships (membership_id, membership_type, signup_date) VALUES (1, \u0027Basic\u0027, \u00272022-01-10\u0027), (2, \u0027Premium\u0027, \u00272022-02-15\u0027), (3, \u0027Elite\u0027, \u00272022-03-20\u0027), (4, \u0027Basic\u0027, \u00272022-04-05\u0027);", + "sql": "SELECT membership_type, COUNT(membership_id) as new_members FROM memberships WHERE membership_type \u003d \u0027Elite\u0027 AND signup_date \u003e\u003d DATEADD(quarter, -1, CURRENT_DATE) GROUP BY membership_type;", + "sql_explanation": "This query calculates the number of users who signed up for the \u0027Elite\u0027 membership in the last quarter. It filters the records based on the membership_type and signup_date, then counts the number of records for \u0027Elite\u0027 using the COUNT function. Finally, it groups them by membership_type." +}, { + "id": "928", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the most active users in the last week.", + "sql_context": "CREATE TABLE user_activity (id INT, user_id INT, activity_level INT, activity_date DATE);", + "sql": "SELECT user_id, AVG(activity_level) as avg_activity_level FROM user_activity WHERE activity_date \u003e\u003d (CURRENT_DATE - INTERVAL \u00277 days\u0027) GROUP BY user_id ORDER BY avg_activity_level DESC;", + "sql_explanation": "Get user_id and average activity_level for the last week. Group by user_id and calculate the average activity_level. Order by avg_activity_level and get the top records." +}, { + "id": "977", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the number of users who have joined each month, for the past 12 months.", + "sql_context": "CREATE TABLE Memberships (id INT, user_id INT, start_date DATE); INSERT INTO Memberships (id, user_id, start_date) VALUES (1, 1, \u00272021-06-15\u0027), (2, 2, \u00272021-07-01\u0027), (3, 3, \u00272021-08-10\u0027), (4, 4, \u00272021-09-22\u0027), (5, 5, \u00272021-10-05\u0027);", + "sql": "SELECT EXTRACT(MONTH FROM start_date) AS month, COUNT(DISTINCT user_id) AS users FROM Memberships WHERE start_date \u003e\u003d DATEADD(MONTH, -12, CURRENT_DATE) GROUP BY month ORDER BY month;", + "sql_explanation": "This query displays the number of users who have joined each month for the past 12 months by extracting the month from the start_date column, grouping by the month, filtering for the past 12 months, and then counting the number of distinct user_id records." +}, { + "id": "1256", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the number of users who joined in each month in 2022.", + "sql_context": "CREATE TABLE memberships (id INT, member_state VARCHAR(50), membership_start_date DATE, membership_fee FLOAT); INSERT INTO memberships (id, member_state, membership_start_date, membership_fee) VALUES (1, \u0027New York\u0027, \u00272022-01-05\u0027, 50.0), (2, \u0027California\u0027, \u00272022-01-10\u0027, 75.0);", + "sql": "SELECT DATE_FORMAT(membership_start_date, \u0027%Y-%m\u0027) AS Month, COUNT(*) FROM memberships WHERE membership_start_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-12-31\u0027 GROUP BY Month;", + "sql_explanation": "The SQL query lists the number of users who joined in each month in 2022 by filtering the memberships table for membership_start_date between \u00272022-01-01\u0027 and \u00272022-12-31\u0027 and grouping the results by the DATE_FORMAT function applied to the membership_start_date column." +}, { + "id": "1431", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many users signed up for each membership type in the last month?", + "sql_context": "CREATE TABLE memberships (membership_id INT, membership_type VARCHAR(50), start_date DATE); INSERT INTO memberships (membership_id, membership_type, start_date) VALUES (1, \u0027Basic\u0027, \u00272022-05-01\u0027), (2, \u0027Premium\u0027, \u00272022-06-01\u0027), (3, \u0027Elite\u0027, \u00272022-07-01\u0027), (4, \u0027Basic\u0027, \u00272022-08-01\u0027);", + "sql": "SELECT membership_type, COUNT(membership_id) as users_signed_up FROM memberships WHERE start_date \u003e\u003d DATEADD(month, -1, CURRENT_DATE) GROUP BY membership_type;", + "sql_explanation": "This query calculates the number of users who signed up for each membership type in the last month. It filters the records based on the start_date and counts the number of records for each membership_type using the COUNT function. Finally, it groups them by membership_type." +}, { + "id": "2139", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify members who participated in both Running and Cycling activities.", + "sql_context": "CREATE TABLE member_workout (member_id INT, activity VARCHAR(50)); INSERT INTO member_workout (member_id, activity) VALUES (1, \u0027Running\u0027); INSERT INTO member_workout (member_id, activity) VALUES (1, \u0027Cycling\u0027); INSERT INTO member_workout (member_id, activity) VALUES (2, \u0027Yoga\u0027); INSERT INTO member_workout (member_id, activity) VALUES (3, \u0027Running\u0027);", + "sql": "SELECT member_id FROM member_workout WHERE activity IN (\u0027Running\u0027, \u0027Cycling\u0027) GROUP BY member_id HAVING COUNT(DISTINCT activity) \u003d 2;", + "sql_explanation": "This query identifies members from the member_workout table who participated in both Running and Cycling activities." +}, { + "id": "2220", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total time spent on yoga and swimming activities for each member?", + "sql_context": "CREATE TABLE activity_time (member_id INT, activity VARCHAR(20), time_spent INT); INSERT INTO activity_time (member_id, activity, time_spent) VALUES (1, \u0027Running\u0027, 60), (1, \u0027Cycling\u0027, 45), (2, \u0027Cycling\u0027, 90), (2, \u0027Yoga\u0027, 30), (3, \u0027Yoga\u0027, 60), (3, \u0027Swimming\u0027, 45), (4, \u0027Yoga\u0027, 45), (4, \u0027Swimming\u0027, 60), (5, \u0027Swimming\u0027, 75);", + "sql": "SELECT member_id, SUM(time_spent) AS total_time_spent FROM activity_time WHERE activity IN (\u0027Yoga\u0027, \u0027Swimming\u0027) GROUP BY member_id;", + "sql_explanation": "This query calculates the total time spent on yoga and swimming activities for each member. It uses the SUM function to find the total value of the time_spent column, and the WHERE clause with the IN operator to filter the records based on the activity column. It then uses the GROUP BY clause to group the records by the member_id column." +}, { + "id": "2462", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum heart rate for each member in the past year?", + "sql_context": "CREATE TABLE member_data(id INT, heart_rate INT, registration_date DATE); INSERT INTO member_data(id, heart_rate, registration_date) VALUES (1,70,\u00272021-01-02\u0027),(2,80,\u00272021-03-14\u0027),(3,65,\u00272021-05-29\u0027),(4,90,\u00272021-07-15\u0027),(5,75,\u00272021-10-01\u0027),(6,85,\u00272021-12-18\u0027);", + "sql": "SELECT id, MIN(heart_rate) FROM member_data WHERE registration_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) GROUP BY id;", + "sql_explanation": "The query filters the member_data table to include only rows from the past year and groups them by id. The minimum heart rate is then calculated for each group." +}, { + "id": "2561", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 2 users with the highest total duration of workouts in the \u0027workout_data\u0027 table?", + "sql_context": "CREATE TABLE workout_data (user_id INT, workout_type VARCHAR(20), duration INT); INSERT INTO workout_data (user_id, workout_type, duration) VALUES (1, \u0027Running\u0027, 30), (1, \u0027Cycling\u0027, 60), (2, \u0027Yoga\u0027, 45), (3, \u0027Pilates\u0027, 50), (1, \u0027Running\u0027, 45), (2, \u0027Yoga\u0027, 60), (3, \u0027Pilates\u0027, 75), (1, \u0027Running\u0027, 75), (2, \u0027Yoga\u0027, 90), (3, \u0027Pilates\u0027, 105), (8, \u0027Running\u0027, 120), (8, \u0027Cycling\u0027, 240), (9, \u0027Swimming\u0027, 300);", + "sql": "SELECT user_id, SUM(duration) as total_duration FROM workout_data GROUP BY user_id ORDER BY total_duration DESC LIMIT 2;", + "sql_explanation": "This SQL query groups the \u0027workout_data\u0027 table by \u0027user_id\u0027 and sums the duration of workouts for each user, providing the total duration of workouts for each user. The query then orders the results by total duration in descending order and limits the results to the top 2 users with the highest total duration of workouts." +}, { + "id": "2740", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which activities have the highest and lowest calories burned?", + "sql_context": "CREATE TABLE activity_data (member_id INT, activity VARCHAR(20), calories INT); INSERT INTO activity_data (member_id, activity, calories) VALUES (1, \u0027Running\u0027, 300), (2, \u0027Cycling\u0027, 400), (3, \u0027Yoga\u0027, 100), (4, \u0027Swimming\u0027, 250), (5, \u0027Pilates\u0027, 150);", + "sql": "SELECT activity, MAX(calories) AS max_calories, MIN(calories) AS min_calories FROM activity_data GROUP BY activity;", + "sql_explanation": "This query finds the activities with the highest and lowest calories burned. It uses the MAX and MIN functions to find the maximum and minimum values of the calories column, and the GROUP BY clause to group the records by the activity column." +}, { + "id": "2861", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of \u0027yoga\u0027 workouts for each member?\u0027", + "sql_context": "CREATE TABLE workouts (id INT, member_id INT, activity_type VARCHAR(50), duration INT); INSERT INTO workouts (id, member_id, activity_type, duration) VALUES (1, 1, \u0027yoga\u0027, 60), (2, 1, \u0027cardio\u0027, 45), (3, 2, \u0027yoga\u0027, 75), (4, 2, \u0027strength\u0027, 60), (5, 3, \u0027yoga\u0027, 90), (6, 3, \u0027cardio\u0027, 45);", + "sql": "SELECT member_id, COUNT(*) AS total_yoga_workouts FROM workouts WHERE activity_type \u003d \u0027yoga\u0027 GROUP BY member_id;", + "sql_explanation": "The SQL query filters the records for the \u0027yoga\u0027 activity type and groups them by member_id. It then calculates the total number of workouts for each member by counting the records in each group." +}, { + "id": "3984", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the total minutes spent in fitness classes by each member?", + "sql_context": "CREATE TABLE class_minutes(member_id INT, class_type VARCHAR(20), minutes INT); INSERT INTO class_minutes(member_id, class_type, minutes) VALUES (1, \u0027Yoga\u0027, 60); INSERT INTO class_minutes(member_id, class_type, minutes) VALUES (1, \u0027Pilates\u0027, 45); INSERT INTO class_minutes(member_id, class_type, minutes) VALUES (2, \u0027Yoga\u0027, 90); INSERT INTO class_minutes(member_id, class_type, minutes) VALUES (3, \u0027Pilates\u0027, 30);", + "sql": "SELECT member_id, SUM(minutes) as total_minutes FROM class_minutes GROUP BY member_id;", + "sql_explanation": "This query calculates the total minutes spent in fitness classes for each member by grouping the class_minutes table by member_id and applying the SUM() function to the minutes column." +}, { + "id": "4476", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Compare the average weight lost by members in \u0027LA\u0027 and \u0027TX\u0027.", + "sql_context": "CREATE TABLE member_weight_loss (member_id INT, location VARCHAR(2), weight_lost FLOAT); INSERT INTO member_weight_loss (member_id, location, weight_lost) VALUES (1, \u0027LA\u0027, 5.0), (2, \u0027TX\u0027, 7.5), (3, \u0027LA\u0027, 3.5), (4, \u0027TX\u0027, 6.0), (5, \u0027LA\u0027, 4.5);", + "sql": "SELECT location, AVG(weight_lost) FROM member_weight_loss GROUP BY location;", + "sql_explanation": "The SQL query compares the average weight lost by members in \u0027LA\u0027 and \u0027TX\u0027 by grouping the rows by the location column and calculating the average weight lost for each group. The GROUP BY keyword is used to divide the rows into groups, and the AVG function is used to calculate the average weight lost for each group." +}, { + "id": "4601", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average heart rate for members from each country?", + "sql_context": "CREATE TABLE member_demographics (member_id INT, country VARCHAR(50), heart_rate INT); INSERT INTO member_demographics (member_id, country, heart_rate) VALUES (1, \u0027USA\u0027, 70), (2, \u0027Canada\u0027, 80), (3, \u0027Mexico\u0027, 65), (4, \u0027Brazil\u0027, 75), (5, \u0027Argentina\u0027, 78), (6, \u0027USA\u0027, 75), (7, \u0027Canada\u0027, 70), (8, \u0027Mexico\u0027, 80), (9, \u0027Brazil\u0027, 85), (10, \u0027Argentina\u0027, 72);", + "sql": "SELECT country, AVG(heart_rate) FROM member_demographics GROUP BY country;", + "sql_explanation": "The SQL query calculates the average heart rate for members from each country by using the GROUP BY clause to group records by country and AVG function to calculate the average heart rate." +}, { + "id": "5457", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum heart rate recorded during workouts in each city?", + "sql_context": "CREATE TABLE Workouts (WorkoutID INT, MemberID INT, City VARCHAR(50), HeartRate INT); INSERT INTO Workouts (WorkoutID, MemberID, City, HeartRate) VALUES (1,1,\u0027New York\u0027,120),(2,2,\u0027Los Angeles\u0027,130),(3,3,\u0027Chicago\u0027,100);", + "sql": "SELECT City, MAX(HeartRate) FROM Workouts GROUP BY City;", + "sql_explanation": "This query groups the Workouts table by City and calculates the maximum heart rate for each City by finding the maximum HeartRate value for rows with each unique City." +}, { + "id": "1608", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the number of unique research grants awarded to each researcher in the Business division, ordered by the number of grants, pivoted by researcher gender.", + "sql_context": "CREATE TABLE grant (id INT, researcher VARCHAR(50), gender VARCHAR(10), division VARCHAR(30), amount FLOAT, date DATE); INSERT INTO grant (id, researcher, gender, division, amount, date) VALUES (1, \u0027Xavier\u0027, \u0027Male\u0027, \u0027Business\u0027, 50000.00, \u00272020-03-01\u0027), (2, \u0027Yara\u0027, \u0027Female\u0027, \u0027Business\u0027, 50000.00, \u00272019-06-15\u0027);", + "sql": "SELECT gender, researcher, COUNT(DISTINCT id) as num_grants FROM grant WHERE division \u003d \u0027Business\u0027 GROUP BY gender, researcher ORDER BY num_grants DESC;", + "sql_explanation": "This query lists the number of unique research grants awarded to each researcher in the Business division, ordered by the number of grants, pivoted by researcher gender, by grouping by \u0027gender\u0027 and \u0027researcher\u0027 and counting the number of distinct \u0027id\u0027 values for each group in the \u0027grant\u0027 table where \u0027division\u0027 is \u0027Business\u0027, then ordering the groups by the count in descending order." +}, { + "id": "2302", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the number of graduate students enrolled in each Engineering discipline", + "sql_context": "CREATE TABLE graduate_students(student_id INT, name VARCHAR(50), gender VARCHAR(10), discipline VARCHAR(20)); INSERT INTO graduate_students VALUES (1, \u0027Aarav\u0027, \u0027Male\u0027, \u0027Electrical Engineering\u0027); INSERT INTO graduate_students VALUES (2, \u0027Bella\u0027, \u0027Female\u0027, \u0027Mechanical Engineering\u0027); INSERT INTO graduate_students VALUES (3, \u0027Charlie\u0027, \u0027Non-binary\u0027, \u0027Civil Engineering\u0027);", + "sql": "SELECT discipline, COUNT(*) as enrolled_students FROM graduate_students WHERE discipline LIKE \u0027Engineering%\u0027 GROUP BY discipline;", + "sql_explanation": "The query filters for Engineering disciplines and groups by discipline, counting the number of enrolled graduate students." +}, { + "id": "2562", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many graduate students have published more than 1 paper in each department?", + "sql_context": "CREATE TABLE department_publications (id INT, department VARCHAR(255), num_publications INT); INSERT INTO department_publications (id, department, num_publications) VALUES (1, \u0027Computer Science\u0027, 1), (2, \u0027Physics\u0027, 2), (3, \u0027Mathematics\u0027, 1), (4, \u0027Computer Science\u0027, 3);", + "sql": "SELECT department, COUNT(*) as num_students FROM department_publications WHERE num_publications \u003e 1 GROUP BY department;", + "sql_explanation": "This query groups the department_publications table by department, then counts the number of students who have published more than 1 paper in each department." +}, { + "id": "3003", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the publication rate of graduate students in the Humanities department?", + "sql_context": "CREATE TABLE students (id INT, name VARCHAR(100), department VARCHAR(50), publication_count INT); INSERT INTO students VALUES (1, \u0027Bob Brown\u0027, \u0027Humanities\u0027, 3);", + "sql": "SELECT department, AVG(publication_count) FROM students WHERE department \u003d \u0027Humanities\u0027 GROUP BY department;", + "sql_explanation": "The SQL query calculates the publication rate of graduate students in the Humanities department. It first selects the Humanities department and calculates the average publication count for that department. Then, it groups the results by department to get the final output." +}, { + "id": "3099", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the total number of research grants awarded to each department, sorted by the total amount.", + "sql_context": "CREATE TABLE grant (id INT, department VARCHAR(30), amount FLOAT, date DATE); INSERT INTO grant (id, department, amount, date) VALUES (1, \u0027Physics\u0027, 200000.00, \u00272021-01-01\u0027), (2, \u0027Chemistry\u0027, 150000.00, \u00272020-07-14\u0027);", + "sql": "SELECT department, SUM(amount) as total_amount FROM grant GROUP BY department ORDER BY total_amount DESC;", + "sql_explanation": "This query shows the total number of research grants awarded to each department, sorted by the total amount, by grouping by \u0027department\u0027 and calculating the sum of the \u0027amount\u0027 column for each group, then ordering the groups by the sum in descending order." +}, { + "id": "3632", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many graduate students are enrolled in each department?", + "sql_context": "CREATE TABLE graduate_students (student_id INT, name TEXT, gpa DECIMAL(3,2), department TEXT);", + "sql": "SELECT gs.department, COUNT(gs.student_id) FROM graduate_students gs GROUP BY gs.department;", + "sql_explanation": "We perform a group by operation on the graduate_students table, grouping the results by department. Then, we calculate the count of students in each department to determine the enrollment in each department." +}, { + "id": "3739", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many students from each country are enrolled in graduate programs?", + "sql_context": "CREATE TABLE graduate_students (student_id INT, name VARCHAR(50), country VARCHAR(50));", + "sql": "SELECT gs.country, COUNT(*) AS num_students FROM graduate_students gs GROUP BY gs.country;", + "sql_explanation": "The SQL query first groups the records in the graduate_students table by the country field. It then calculates the number of records in each group. The result is a table showing the number of graduate students from each country." +}, { + "id": "493", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 2 countries with the highest total donation amounts in the \u0027Health\u0027 sector for the year 2020, ordered by the donation amount in descending order.", + "sql_context": "CREATE TABLE Donors (donor_id INT, donor_name VARCHAR(255), donation_amount INT, sector VARCHAR(255), region VARCHAR(255), donation_date DATE); INSERT INTO Donors (donor_id, donor_name, donation_amount, sector, region, donation_date) VALUES (4, \u0027DonorC\u0027, 150000, \u0027Health\u0027, \u0027Africa\u0027, \u00272020-01-01\u0027);", + "sql": "SELECT region AS country, SUM(donation_amount) AS total_donation FROM Donors WHERE sector \u003d \u0027Health\u0027 AND donation_date \u003e\u003d \u00272020-01-01\u0027 AND donation_date \u003c \u00272021-01-01\u0027 GROUP BY region ORDER BY total_donation DESC LIMIT 2;", + "sql_explanation": "The SQL query filters the Donors table for rows where the sector is \u0027Health\u0027 and the donation_date is in the year 2020. It then groups the filtered results by region and calculates the total donation amount for each group. Finally, it orders the groups by total_donation in descending order and selects the top 2 region and total_donation rows." +}, { + "id": "1125", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average expenditure per month on community development projects in Africa, broken down by project category?", + "sql_context": "CREATE TABLE community_development (project_id INT, ngo_id INT, start_date DATE, end_date DATE, category VARCHAR(255), expenditure DECIMAL(10,2)); INSERT INTO community_development VALUES (1, 1, \u00272020-01-01\u0027, \u00272020-12-31\u0027, \u0027Agriculture\u0027, 15000); INSERT INTO community_development VALUES (2, 1, \u00272020-01-01\u0027, \u00272020-12-31\u0027, \u0027Education\u0027, 20000); INSERT INTO community_development VALUES (3, 2, \u00272020-01-01\u0027, \u00272020-12-31\u0027, \u0027Healthcare\u0027, 30000);", + "sql": "SELECT category, AVG(expenditure / (DATEDIFF(end_date, start_date) / 30)) as avg_monthly_expenditure FROM community_development WHERE ngo.region \u003d \u0027Africa\u0027 GROUP BY category;", + "sql_explanation": "This query first calculates the average monthly expenditure for each project by dividing the total expenditure by the number of months between the start and end dates. It then groups the results by the category field and calculates the average monthly expenditure for each category." +}, { + "id": "1351", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the total amount of donations received by each NGO in 2022?", + "sql_context": "CREATE TABLE ngo_donations (id INT PRIMARY KEY, ngo_name TEXT, donation_amount FLOAT, donation_date DATE); INSERT INTO ngo_donations (id, ngo_name, donation_amount, donation_date) VALUES (1, \u0027Medicins Sans Frontieres\u0027, 500, \u00272022-01-01\u0027); CREATE TABLE ngo_info (id INT PRIMARY KEY, ngo_name TEXT, ngo_address TEXT); INSERT INTO ngo_info (id, ngo_name, ngo_address) VALUES (1, \u0027Medicins Sans Frontieres\u0027, \u0027Belgium, Brussels\u0027);", + "sql": "SELECT ngo_name, SUM(donation_amount) as total_donations FROM ngo_donations WHERE donation_date \u003e\u003d \u00272022-01-01\u0027 AND donation_date \u003c \u00272023-01-01\u0027 GROUP BY ngo_name;", + "sql_explanation": "The query groups the ngo_donations table by ngo_name and filters the records for the year 2022. It then calculates the sum of donation_amount for each group, returning the total amount of donations received by each NGO in 2022." +}, { + "id": "1465", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the total amount of donations by each organization for the \u0027Food Security\u0027 program in 2021?", + "sql_context": "CREATE TABLE organizations (id INT, name VARCHAR(255)); INSERT INTO organizations (id, name) VALUES (1, \u0027WFP\u0027), (2, \u0027UNICEF\u0027), (3, \u0027CARE\u0027); CREATE TABLE donations (id INT, organization_id INT, program VARCHAR(255), amount DECIMAL(10,2), donation_date DATE); INSERT INTO donations (id, organization_id, program, amount, donation_date) VALUES (1, 1, \u0027Food Security\u0027, 5000, \u00272021-01-01\u0027), (2, 1, \u0027Health\u0027, 7000, \u00272021-02-01\u0027), (3, 2, \u0027Food Security\u0027, 3000, \u00272021-03-01\u0027), (4, 2, \u0027Health\u0027, 6000, \u00272021-04-01\u0027), (5, 3, \u0027Food Security\u0027, 4000, \u00272021-05-01\u0027);", + "sql": "SELECT organization_id, SUM(amount) as total_donations FROM donations WHERE program \u003d \u0027Food Security\u0027 AND YEAR(donation_date) \u003d 2021 GROUP BY organization_id;", + "sql_explanation": "The SQL query groups the donations by organization_id and program, filters for the \u0027Food Security\u0027 program and the year 2021, and calculates the sum of the amount column for each group." +}, { + "id": "1744", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of community development projects in each country?", + "sql_context": "CREATE TABLE projects (id INT, country VARCHAR(50), project_type VARCHAR(50), start_date DATE); INSERT INTO projects (id, country, project_type, start_date) VALUES (1, \u0027USA\u0027, \u0027Community Development\u0027, \u00272021-01-01\u0027), (2, \u0027Canada\u0027, \u0027Community Development\u0027, \u00272021-02-15\u0027), (3, \u0027Mexico\u0027, \u0027Community Development\u0027, \u00272021-03-01\u0027); INSERT INTO projects (id, country, project_type, start_date) VALUES (4, \u0027Brazil\u0027, \u0027Infrastructure\u0027, \u00272021-04-01\u0027), (5, \u0027Colombia\u0027, \u0027Education\u0027, \u00272021-05-01\u0027);", + "sql": "SELECT country, project_type, COUNT(*) as total_projects FROM projects WHERE project_type \u003d \u0027Community Development\u0027 GROUP BY country, project_type;", + "sql_explanation": "This SQL query calculates the total number of community development projects in each country. It does this by filtering the projects table for records with a project type of \u0027Community Development\u0027, then grouping the results by country and project type and calculating the count of records for each group." +}, { + "id": "1965", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of refugees who have received food assistance in the past 6 months from the \u0027Food for Life\u0027 program, grouped by their region?", + "sql_context": "CREATE TABLE refugees(id INT, region TEXT, assistance TEXT, date DATE); INSERT INTO refugees(id, region, assistance, date) VALUES (1, \u0027Africa\u0027, \u0027Food for Life\u0027, \u00272022-01-01\u0027), (2, \u0027Asia\u0027, \u0027Health Care\u0027, \u00272022-02-01\u0027), (3, \u0027Africa\u0027, \u0027Food for Life\u0027, \u00272022-06-01\u0027);", + "sql": "SELECT region, COUNT(*) FROM refugees WHERE assistance \u003d \u0027Food for Life\u0027 AND date \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 6 MONTH) GROUP BY region;", + "sql_explanation": "This SQL query counts the number of refugees who have received food assistance from the \u0027Food for Life\u0027 program in the past 6 months, grouped by their region. It filters the data where assistance is \u0027Food for Life\u0027 and date is within the past 6 months using the DATE_SUB function. It then groups the data by region using the GROUP BY clause." +}, { + "id": "1992", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of aid given to Syrian refugees by the United Nations since 2018?", + "sql_context": "CREATE TABLE un_aid (id INT, recipient VARCHAR(50), aid_type VARCHAR(50), amount FLOAT, date DATE); INSERT INTO un_aid (id, recipient, aid_type, amount, date) VALUES (1, \u0027Syrian refugees\u0027, \u0027cash assistance\u0027, 500000, \u00272018-01-01\u0027);", + "sql": "SELECT recipient, SUM(amount) as total_un_aid FROM un_aid WHERE recipient \u003d \u0027Syrian refugees\u0027 AND date \u003e\u003d \u00272018-01-01\u0027 GROUP BY recipient;", + "sql_explanation": "This query calculates the total amount of aid given to Syrian refugees by the United Nations since 2018." +}, { + "id": "2271", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total amount of donations received by each organization in 2020?", + "sql_context": "CREATE TABLE Donations (org_name TEXT, donation_amount INTEGER, donation_date DATE); INSERT INTO Donations (org_name, donation_amount, donation_date) VALUES (\u0027Organization A\u0027, 5000, \u00272020-01-01\u0027); INSERT INTO Donations (org_name, donation_amount, donation_date) VALUES (\u0027Organization B\u0027, 7000, \u00272020-02-15\u0027);", + "sql": "SELECT org_name, SUM(donation_amount) FROM Donations WHERE donation_date BETWEEN \u00272020-01-01\u0027 AND \u00272020-12-31\u0027 GROUP BY org_name;", + "sql_explanation": "The SQL query selects the organization name and sum of donation amounts for each organization from the Donations table, filtering for records between January 1, 2020 and December 31, 2020. It then groups the result by organization name to provide a total donation amount for each organization in 2020." +}, { + "id": "2544", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of food assistance provided by UN agencies to refugees in Uganda and Kenya, grouped by agency?", + "sql_context": "CREATE TABLE food_assistance (id INT, agency VARCHAR(255), country VARCHAR(255), amount DECIMAL(10, 2)); INSERT INTO food_assistance (id, agency, country, amount) VALUES (\u00271\u0027, \u0027WFP\u0027, \u0027Uganda\u0027, \u0027700000\u0027), (\u00272\u0027, \u0027UNHCR\u0027, \u0027Uganda\u0027, \u0027800000\u0027), (\u00273\u0027, \u0027FAO\u0027, \u0027Uganda\u0027, \u0027600000\u0027), (\u00274\u0027, \u0027WFP\u0027, \u0027Kenya\u0027, \u0027900000\u0027), (\u00275\u0027, \u0027UNHCR\u0027, \u0027Kenya\u0027, \u0027500000\u0027), (\u00276\u0027, \u0027FAO\u0027, \u0027Kenya\u0027, \u0027400000\u0027);", + "sql": "SELECT agency, SUM(amount) as total_assistance FROM food_assistance WHERE country IN (\u0027Uganda\u0027, \u0027Kenya\u0027) GROUP BY agency;", + "sql_explanation": "This SQL query filters the food_assistance table for entries from Uganda and Kenya, then groups the results by agency and calculates the total amount of food assistance for each agency." +}, { + "id": "3241", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many people were displaced due to natural disasters in Indonesia and Philippines?", + "sql_context": "CREATE TABLE displaced_persons (id INT, country VARCHAR(20), person_id INT, displacement_date DATE);", + "sql": "SELECT country, COUNT(DISTINCT person_id) as displaced_people FROM displaced_persons GROUP BY country;", + "sql_explanation": "This SQL query calculates the number of people who were displaced due to natural disasters in Indonesia and Philippines. It groups the displaced_persons table by country and then calculates the count of distinct person_id\u0027s for each group." +}, { + "id": "3624", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum amount of funding received by a single organization in the year 2021?", + "sql_context": "CREATE TABLE funding (id INT, organization VARCHAR(255), year INT, amount DECIMAL(10,2));", + "sql": "SELECT MAX(amount) FROM funding WHERE year \u003d 2021 GROUP BY organization HAVING COUNT(*) \u003d 1;", + "sql_explanation": "This SQL query calculates the maximum amount of funding received by a single organization in the year 2021. It does this by using the MAX() function on the \u0027amount\u0027 column, filtering the \u0027funding\u0027 table to only include rows where the \u0027year\u0027 column is equal to 2021, and grouping the results by the \u0027organization\u0027 column. The HAVING clause is then used to only include groups where the count of rows is equal to 1, ensuring that the maximum amount of funding is only calculated for a single organization." +}, { + "id": "3734", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the name and age of the oldest donor by gender?", + "sql_context": "CREATE TABLE donors (id INT, name TEXT, age INT, gender TEXT, contribution FLOAT, location TEXT); INSERT INTO donors (id, name, age, gender, contribution, location) VALUES (1, \u0027Alice Johnson\u0027, 45, \u0027Female\u0027, 500.00, \u0027San Francisco\u0027); INSERT INTO donors (id, name, age, gender, contribution, location) VALUES (2, \u0027Bob Brown\u0027, 50, \u0027Male\u0027, 1000.00, \u0027Chicago\u0027);", + "sql": "SELECT gender, MAX(age) as max_age, MIN(name) as oldest_donor FROM donors GROUP BY gender;", + "sql_explanation": "The SQL query uses the MAX() and MIN() functions to find the age and name of the oldest donor by gender." +}, { + "id": "3800", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum amount of funds spent on refugee support in each region?", + "sql_context": "CREATE TABLE funds (id INT, category TEXT, region TEXT, amount DECIMAL(10,2)); INSERT INTO funds (id, category, region, amount) VALUES (1, \u0027Refugee Support\u0027, \u0027Middle East\u0027, 250000.00), (2, \u0027Disaster Response\u0027, \u0027Asia\u0027, 300000.00), (3, \u0027Community Development\u0027, \u0027Africa\u0027, 150000.00);", + "sql": "SELECT region, MAX(amount) FROM funds WHERE category \u003d \u0027Refugee Support\u0027 GROUP BY region;", + "sql_explanation": "This query finds the maximum amount of funds spent on refugee support in each region. It uses the MAX function to find the highest value in the amount column for rows with \u0027Refugee Support\u0027 in the category column and GROUP BY clause to group by region." +}, { + "id": "3868", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of donations received by organizations in India and Pakistan?", + "sql_context": "CREATE TABLE donations (id INT, country VARCHAR(20), organization_id INT, donation_amount DECIMAL(10, 2), donation_date DATE);", + "sql": "SELECT country, SUM(donation_amount) as total_donations FROM donations GROUP BY country;", + "sql_explanation": "This SQL query calculates the total amount of donations received by organizations in India and Pakistan. It groups the donations table by country and then calculates the sum of the donation_amount column for each group." +}, { + "id": "4029", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the average weight of items donated for each disaster in the \u0027disaster_donations\u0027 table.", + "sql_context": "CREATE TABLE disaster_donations (id INT, disaster VARCHAR(50), item VARCHAR(50), weight INT); INSERT INTO disaster_donations (id, disaster, item, weight) VALUES (1, \u0027Flood\u0027, \u0027Water\u0027, 50), (2, \u0027Flood\u0027, \u0027Food\u0027, 100), (3, \u0027Earthquake\u0027, \u0027Tents\u0027, 200), (4, \u0027Earthquake\u0027, \u0027Medicine\u0027, 150);", + "sql": "SELECT disaster, AVG(weight) as avg_weight FROM disaster_donations GROUP BY disaster;", + "sql_explanation": "This query calculates the average weight of items donated for each disaster in the \u0027disaster_donations\u0027 table." +}, { + "id": "4047", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of advocacy campaigns and their statuses in \u0027advocacy\u0027 schema?", + "sql_context": "CREATE TABLE campaigns (campaign_id INT, campaign_name VARCHAR(255), status VARCHAR(255)); INSERT INTO campaigns (campaign_id, campaign_name, status) VALUES (1, \u0027Campaign A\u0027, \u0027Active\u0027);", + "sql": "SELECT COUNT(campaign_id) as total_campaigns, status FROM campaigns GROUP BY status;", + "sql_explanation": "This query counts the number of advocacy campaigns and their statuses by grouping by the \u0027status\u0027 column and counting the \u0027campaign_id\u0027 column." +}, { + "id": "4114", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many primary and secondary schools are there in Afghanistan, ordered by school type?", + "sql_context": "CREATE TABLE Afghanistan (id INT, name TEXT, type TEXT, location TEXT); INSERT INTO Afghanistan (id, name, type, location) VALUES (1, \u0027School A\u0027, \u0027Primary\u0027, \u0027Kabul\u0027); INSERT INTO Afghanistan (id, name, type, location) VALUES (2, \u0027School B\u0027, \u0027Secondary\u0027, \u0027Kandahar\u0027); INSERT INTO Afghanistan (id, name, type, location) VALUES (3, \u0027School C\u0027, \u0027Primary\u0027, \u0027Herat\u0027);", + "sql": "SELECT type, COUNT(*) AS school_count FROM Afghanistan GROUP BY type ORDER BY type;", + "sql_explanation": "This query calculates the number of primary and secondary schools in Afghanistan by grouping the rows in the Afghanistan table by the type column and then counting the number of rows in each group. It orders the result by school type." +}, { + "id": "4333", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average amount of donations made by individual donors from the USA?", + "sql_context": "CREATE TABLE donors (id INT, name TEXT, country TEXT, donation_amount DECIMAL); INSERT INTO donors (id, name, country, donation_amount) VALUES (1, \u0027John Doe\u0027, \u0027USA\u0027, 50.00), (2, \u0027Jane Smith\u0027, \u0027Canada\u0027, 100.00), (3, \u0027Maria Garcia\u0027, \u0027USA\u0027, 25.00);", + "sql": "SELECT AVG(donation_amount) FROM donors WHERE country \u003d \u0027USA\u0027 GROUP BY country;", + "sql_explanation": "This query calculates the average donation amount for donors from the USA by grouping donors based on their country and then calculating the average donation amount." +}, { + "id": "4538", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many community development projects were completed in 2021, and which districts were they in?", + "sql_context": "CREATE TABLE projects (id INT, district TEXT, year INT, type TEXT); INSERT INTO projects (id, district, year, type) VALUES (1, \u0027DistrictA\u0027, 2021, \u0027Health\u0027), (2, \u0027DistrictB\u0027, 2020, \u0027Education\u0027), (3, \u0027DistrictC\u0027, 2021, \u0027WaterSanitation\u0027);", + "sql": "SELECT district, COUNT(*) FROM projects WHERE year \u003d 2021 GROUP BY district;", + "sql_explanation": "This query filters the \u0027projects\u0027 table based on the year 2021, and then groups the districts with the number of completed projects in that year." +}, { + "id": "4745", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of donations received by each organization in HQ12?", + "sql_context": "CREATE TABLE org_donations (org_id INT, hq VARCHAR(5), amount DECIMAL(10,2)); INSERT INTO org_donations (org_id, hq, amount) VALUES (1, \u0027HQ12\u0027, 5000.00), (2, \u0027HQ12\u0027, 3000.00), (3, \u0027HQ13\u0027, 7000.00);", + "sql": "SELECT hq, SUM(amount) FROM org_donations WHERE hq \u003d \u0027HQ12\u0027 GROUP BY hq;", + "sql_explanation": "This query filters the org_donations table to only include rows where the hq is HQ12, then groups the results by hq and calculates the sum of the amount column for each group." +}, { + "id": "5079", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of medical visits per person in Afghanistan and Pakistan?", + "sql_context": "CREATE TABLE medical_visits (id INT, country VARCHAR(255), person_id INT, visits INT); INSERT INTO medical_visits (id, country, person_id, visits) VALUES (1, \u0027Afghanistan\u0027, 1, 3), (2, \u0027Afghanistan\u0027, 2, 4), (3, \u0027Pakistan\u0027, 3, 5), (4, \u0027Pakistan\u0027, 4, 6);", + "sql": "SELECT country, AVG(visits) FROM medical_visits GROUP BY country;", + "sql_explanation": "This query groups rows by country and then calculates the average visits column for each group." +}, { + "id": "1215", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total CO2 emission reduction in metric tons by project?", + "sql_context": "CREATE TABLE co2_emission_reduction (id INT, project VARCHAR(255), reduction INT, reduction_unit VARCHAR(10));", + "sql": "SELECT project, SUM(reduction) as total_co2_reduction FROM co2_emission_reduction WHERE reduction_unit \u003d \u0027metric tons\u0027 GROUP BY project HAVING total_co2_reduction \u003e 1000;", + "sql_explanation": "This query calculates the total CO2 emission reduction in metric tons by project, using the SUM function to add up the reduction column. The HAVING clause filters the results to only include projects with more than 1000 metric tons of CO2 reduction." +}, { + "id": "1282", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total waste generation by material type in 2023 for the top 3 contributors", + "sql_context": "CREATE TABLE waste_generation (year INT, location VARCHAR(255), material VARCHAR(255), weight_tons INT); INSERT INTO waste_generation (year, location, material, weight_tons) VALUES (2023, \u0027Denver\u0027, \u0027Plastic\u0027, 11000), (2023, \u0027Denver\u0027, \u0027Paper\u0027, 13000), (2023, \u0027Denver\u0027, \u0027Glass\u0027, 8000), (2023, \u0027Portland\u0027, \u0027Plastic\u0027, 12000), (2023, \u0027Portland\u0027, \u0027Paper\u0027, 15000), (2023, \u0027Portland\u0027, \u0027Glass\u0027, 9000), (2023, \u0027Austin\u0027, \u0027Plastic\u0027, 13000), (2023, \u0027Austin\u0027, \u0027Paper\u0027, 16000), (2023, \u0027Austin\u0027, \u0027Glass\u0027, 10000);", + "sql": "SELECT location, material, SUM(weight_tons) as total_weight FROM waste_generation WHERE year \u003d 2023 GROUP BY location, material ORDER BY SUM(weight_tons) DESC LIMIT 3;", + "sql_explanation": "This SQL query calculates the total waste generation by material type in 2023 for the top 3 contributors. It does so by summing the weight_tons for each location and material type in the waste_generation table where the year is 2023, grouping by location and material type, and then ordering the result by the sum of weight_tons in descending order and limiting the result to the top 3 records." +}, { + "id": "1680", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total waste generation in the Asian region for the year 2020, separated by country?", + "sql_context": "CREATE TABLE WasteGeneration (country VARCHAR(50), year INT, waste_quantity INT); INSERT INTO WasteGeneration (country, year, waste_quantity) VALUES (\u0027India\u0027, 2020, 100000), (\u0027China\u0027, 2020, 120000), (\u0027Japan\u0027, 2020, 80000);", + "sql": "SELECT country, SUM(waste_quantity) FROM WasteGeneration WHERE year \u003d 2020 AND country IN (\u0027Asia/India\u0027, \u0027Asia/China\u0027, \u0027Asia/Japan\u0027) GROUP BY country;", + "sql_explanation": "This query filters the data for the year 2020 and the Asian countries, then sums the waste quantity for each country separately." +}, { + "id": "2340", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get the total waste generation for \u0027South America\u0027 in 2018 and 2019 from the \u0027waste_generation\u0027 table", + "sql_context": "CREATE TABLE waste_generation (id INT, country VARCHAR(50), year INT, total_waste_gen FLOAT);", + "sql": "SELECT year, SUM(total_waste_gen) FROM waste_generation WHERE year IN (2018, 2019) AND country \u003d \u0027South America\u0027 GROUP BY year;", + "sql_explanation": "This query retrieves the total waste generation for \u0027South America\u0027 in 2018 and 2019 by summing up the \u0027total_waste_gen\u0027 column values where the \u0027year\u0027 column values are 2018 and 2019, and the \u0027country\u0027 column value is \u0027South America\u0027. The results are grouped by the \u0027year\u0027 column." +}, { + "id": "2897", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total waste generation in North America for each year?", + "sql_context": "CREATE TABLE waste_generation (country VARCHAR(50), year INT, total_waste INT); INSERT INTO waste_generation (country, year, total_waste) VALUES (\u0027USA\u0027, 2015, 250000), (\u0027Canada\u0027, 2015, 150000), (\u0027USA\u0027, 2016, 260000);", + "sql": "SELECT year, SUM(total_waste) FROM waste_generation WHERE country IN (\u0027USA\u0027, \u0027Canada\u0027, \u0027Mexico\u0027) GROUP BY year;", + "sql_explanation": "This SQL query calculates the total waste generation in North America for each year. It does this by summing up the total waste for each year in the waste_generation table where the country is one of the North American countries. The result is grouped by year, so we get a separate sum for each year." +}, { + "id": "2923", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total waste generation trend in metric tons per year for the Caribbean region from 2017 to 2021?", + "sql_context": "CREATE TABLE waste_generation_trend_caribbean (region VARCHAR(50), year INT, waste_amount FLOAT); INSERT INTO waste_generation_trend_caribbean (region, year, waste_amount) VALUES (\u0027Caribbean\u0027, 2017, 120000.0), (\u0027Caribbean\u0027, 2018, 130000.0), (\u0027Caribbean\u0027, 2019, 140000.0), (\u0027Caribbean\u0027, 2020, 150000.0), (\u0027Caribbean\u0027, 2021, 160000.0);", + "sql": "SELECT year, SUM(waste_amount) FROM waste_generation_trend_caribbean WHERE region \u003d \u0027Caribbean\u0027 GROUP BY year;", + "sql_explanation": "The SQL query retrieves the total waste generation trend in metric tons per year for the Caribbean region from 2017 to 2021 by summing the waste_amount for each year and grouping by year." +}, { + "id": "3001", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the total capacity (in cubic meters) of landfills for each state that have a total capacity greater than 100000 cubic meters.", + "sql_context": "CREATE TABLE LandfillData (LandfillID INT, State VARCHAR(255), Capacity DECIMAL(10,2)); INSERT INTO LandfillData (LandfillID, State, Capacity) VALUES (1, \u0027California\u0027, 120000), (2, \u0027New York\u0027, 80000), (3, \u0027Texas\u0027, 150000);", + "sql": "SELECT State, SUM(Capacity) AS TotalCapacity FROM LandfillData GROUP BY State HAVING SUM(Capacity) \u003e 100000;", + "sql_explanation": "This query calculates the total capacity (in cubic meters) of landfills for each state. It sums the Capacity column for rows with each unique State. It then groups the results by the State and filters out rows with a TotalCapacity less than or equal to 100000 cubic meters using the HAVING clause." +}, { + "id": "3333", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a view for recycling centers in each state.", + "sql_context": "CREATE TABLE recycling_centers (state VARCHAR(20), num_centers INT);", + "sql": "CREATE VIEW state_recycling_centers AS SELECT state, COUNT(*) FROM recycling_centers GROUP BY state;", + "sql_explanation": "This query creates a view named state_recycling_centers that lists the number of recycling centers in each state, grouped by state." +}, { + "id": "3577", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total waste generation in grams for each country in 2020?", + "sql_context": "CREATE TABLE WasteGeneration (country VARCHAR(255), year INT, waste_generation FLOAT); INSERT INTO WasteGeneration (country, year, waste_generation) VALUES (\u0027USA\u0027, 2020, 5000), (\u0027Canada\u0027, 2020, 4000), (\u0027Mexico\u0027, 2020, 3000);", + "sql": "SELECT country, SUM(waste_generation) FROM WasteGeneration WHERE year \u003d 2020 GROUP BY country;", + "sql_explanation": "This query calculates the total waste generation in grams for each country in the WasteGeneration table for the year 2020. It uses the SUM() aggregation function to add up the waste_generation values for each country and the GROUP BY clause to group the results by country." +}, { + "id": "3860", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total waste generation in gram by each city in the year 2020?", + "sql_context": "CREATE TABLE city_waste_generation (city VARCHAR(255), waste_gram INT, year INT); INSERT INTO city_waste_generation (city, waste_gram, year) VALUES (\u0027CityA\u0027, 1200, 2020), (\u0027CityB\u0027, 1500, 2020), (\u0027CityC\u0027, 1800, 2020);", + "sql": "SELECT city, SUM(waste_gram) FROM city_waste_generation WHERE year \u003d 2020 GROUP BY city;", + "sql_explanation": "This query calculates the total waste generation in gram for each city in the year 2020. It filters the data to only include records from 2020 and then groups the data by city, summing the waste_gram column for each group." +}, { + "id": "4365", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total waste generation in kg for each city in the year 2020?", + "sql_context": "CREATE TABLE waste_generation(city VARCHAR(255), year INT, amount FLOAT); INSERT INTO waste_generation(city, year, amount) VALUES(\u0027CityA\u0027, 2020, 123.45), (\u0027CityB\u0027, 2020, 678.90);", + "sql": "SELECT city, SUM(amount) FROM waste_generation WHERE year \u003d 2020 GROUP BY city;", + "sql_explanation": "The SQL query calculates the total waste generation in kg for each city in the year 2020 by summing the \u0027amount\u0027 column for each city where the \u0027year\u0027 is 2020, then grouping the results by city." +}, { + "id": "734", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the dispensary with the lowest total sales revenue in the second quarter of 2021.", + "sql_context": "CREATE TABLE DispensarySales (dispensary_id INT, sale_revenue DECIMAL(10,2), sale_date DATE);", + "sql": "SELECT dispensary_id, SUM(sale_revenue) as total_revenue FROM DispensarySales WHERE sale_date \u003e\u003d \u00272021-04-01\u0027 AND sale_date \u003c\u003d \u00272021-06-30\u0027 GROUP BY dispensary_id ORDER BY total_revenue ASC LIMIT 1;", + "sql_explanation": "The SQL query identifies the dispensary with the lowest total sales revenue in Q2 2021 by grouping dispensary_id and summing sale_revenue for sales between April 1 and June 30, 2021, then ordering the results in ascending order by the sum of sale_revenue and limiting the results to the top row." +}, { + "id": "950", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 3 hybrid strains with the highest total retail price in Oregon.", + "sql_context": "CREATE TABLE DispensarySales(id INT, dispensary VARCHAR(255), state VARCHAR(255), strain VARCHAR(255), retail_price DECIMAL(10,2));", + "sql": "SELECT strain, SUM(retail_price) as total_retail_price FROM DispensarySales WHERE state \u003d \u0027Oregon\u0027 AND strain LIKE \u0027%hybrid%\u0027 GROUP BY strain ORDER BY total_retail_price DESC LIMIT 3;", + "sql_explanation": "Group sales by strain and filter for Oregon and hybrid strains. Calculate the total retail price and order by this value in descending order. Use the LIMIT clause to display the top 3 hybrid strains." +}, { + "id": "1053", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the total sales and quantities sold for each product category in dispensary A?", + "sql_context": "CREATE TABLE DispensarySales (DispensaryName VARCHAR(255), ProductCategory VARCHAR(255), QuantitySold INT, TotalSales DECIMAL(10,2)); INSERT INTO DispensarySales (DispensaryName, ProductCategory, QuantitySold, TotalSales) VALUES (\u0027Dispensary A\u0027, \u0027Flower\u0027, 150, 2500.00), (\u0027Dispensary A\u0027, \u0027Concentrates\u0027, 80, 1600.00);", + "sql": "SELECT ProductCategory, SUM(QuantitySold) AS TotalQuantitySold, SUM(TotalSales) AS TotalSales FROM DispensarySales WHERE DispensaryName \u003d \u0027Dispensary A\u0027 GROUP BY ProductCategory;", + "sql_explanation": "This SQL query calculates the total sales and quantities sold for each product category in Dispensary A by summing the QuantitySold and TotalSales columns, grouped by ProductCategory." +}, { + "id": "1387", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many compliance violations occurred in each month of 2021?", + "sql_context": "CREATE TABLE compliance_violations (id INT, dispensary_id INT, violation_date DATE, description TEXT); INSERT INTO compliance_violations (id, dispensary_id, violation_date, description) VALUES (1, 1, \u00272021-02-15\u0027, \u0027Inadequate labeling\u0027), (2, 2, \u00272021-03-02\u0027, \u0027Improper storage\u0027), (3, 3, \u00272021-06-28\u0027, \u0027Expired products\u0027), (4, 4, \u00272021-07-14\u0027, \u0027Lack of inventory controls\u0027), (5, 1, \u00272021-08-12\u0027, \u0027Inadequate labeling\u0027), (6, 2, \u00272021-12-30\u0027, \u0027Improper storage\u0027);", + "sql": "SELECT EXTRACT(MONTH FROM violation_date) AS month, COUNT(*) FROM compliance_violations WHERE violation_date BETWEEN \u00272021-01-01\u0027 AND \u00272021-12-31\u0027 GROUP BY month;", + "sql_explanation": "This query first extracts the month from the violation_date column. Then, it groups those rows by month and calculates the number of rows for each group." +}, { + "id": "1985", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of days between the first and last sale for each strain type in Oregon dispensaries.", + "sql_context": "CREATE TABLE DispensarySales(id INT, dispensary VARCHAR(255), state VARCHAR(255), strain_type VARCHAR(255), sale_date DATE);", + "sql": "SELECT strain_type, MAX(sale_date) - MIN(sale_date) as days_between_sales FROM DispensarySales WHERE state \u003d \u0027Oregon\u0027 GROUP BY strain_type;", + "sql_explanation": "Group sales by strain type and filter for Oregon dispensaries. Find the maximum and minimum sale dates and subtract the minimum sale date from the maximum sale date to find the number of days between first and last sale for each strain type." +}, { + "id": "3447", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total revenue for each state in the first quarter of 2022?", + "sql_context": "CREATE TABLE sales (id INT, state VARCHAR(20), revenue DECIMAL(10,2), month INT, year INT);", + "sql": "SELECT state, SUM(revenue) FROM sales WHERE month BETWEEN 1 AND 3 AND year \u003d 2022 GROUP BY state;", + "sql_explanation": "This query calculates the total revenue for each state in the first quarter of 2022 by summing the revenue column in the sales table where the month is between 1 and 3 and the year is 2022, and then grouping the result by state." +}, { + "id": "4739", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average THC content for each strain?", + "sql_context": "CREATE TABLE Strains (id INT, strain TEXT, thc_content REAL); INSERT INTO Strains (id, strain, thc_content) VALUES (1, \u0027Strain A\u0027, 20.5), (2, \u0027Strain B\u0027, 18.3), (3, \u0027Strain C\u0027, 22.7);", + "sql": "SELECT strain, AVG(thc_content) AS avg_thc FROM Strains GROUP BY strain;", + "sql_explanation": "This query selects the strain and calculates the average THC content from the Strains table. It groups the results by strain to find the average THC content for each strain." +}, { + "id": "5357", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average yield for strains in each category?", + "sql_context": "CREATE TABLE strains (id INT, name TEXT, category TEXT, yield FLOAT); INSERT INTO strains (id, name, category, yield) VALUES (1, \u0027Purple Kush\u0027, \u0027Indica\u0027, 0.5), (2, \u0027Northern Lights\u0027, \u0027Indica\u0027, 0.6), (3, \u0027Granddaddy Purple\u0027, \u0027Indica\u0027, 0.7), (4, \u0027Sour Diesel\u0027, \u0027Sativa\u0027, 0.6), (5, \u0027Blue Dream\u0027, \u0027Hybrid\u0027, 0.7), (6, \u0027Green Crack\u0027, \u0027Sativa\u0027, 0.8), (7, \u0027OG Kush\u0027, \u0027Hybrid\u0027, 0.9);", + "sql": "SELECT category, AVG(yield) FROM strains GROUP BY category;", + "sql_explanation": "This query first groups the strains table by the category column. Then, for each group, it calculates the average yield." +}, { + "id": "731", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many public works projects were completed in each quarter of the last 2 years?", + "sql_context": "CREATE TABLE projects_by_quarter (id INT, project_name VARCHAR(255), completion_quarter INT, completion_year INT); INSERT INTO projects_by_quarter (id, project_name, completion_quarter, completion_year) VALUES (1, \u0027Highway Expansion\u0027, 3, 2021), (2, \u0027Water Treatment Plant Upgrade\u0027, 4, 2021);", + "sql": "SELECT completion_quarter, completion_year, COUNT(*) as num_projects FROM projects_by_quarter WHERE completion_year \u003e\u003d YEAR(DATEADD(year, -2, GETDATE())) GROUP BY completion_quarter, completion_year;", + "sql_explanation": "This SQL query selects the number of public works projects completed in each quarter of the last 2 years. It uses the WHERE clause to filter the records based on the completion year and the GETDATE() function to get the current date. The YEAR function is used to extract the year from the current date and subtract 2 years from it. The GROUP BY clause is used to group the records by completion quarter and completion year. The COUNT function is used to count the number of projects in each group." +}, { + "id": "1286", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the number of bridges, their respective maintenance schedules, and the total maintenance cost in each province of Canada, along with their respective bridge types (e.g., suspension, beam, arch).", + "sql_context": "CREATE TABLE BridgesCanada (BridgeID INT, Name VARCHAR(255), Province VARCHAR(255), MaintenanceSchedule VARCHAR(255), MaintenanceCost FLOAT, Type VARCHAR(255)); INSERT INTO BridgesCanada VALUES (1, \u0027Bridge A\u0027, \u0027Quebec\u0027, \u0027Quarterly\u0027, 5000, \u0027Suspension\u0027); INSERT INTO BridgesCanada VALUES (2, \u0027Bridge B\u0027, \u0027Ontario\u0027, \u0027Semi-Annually\u0027, 7500, \u0027Beam\u0027); INSERT INTO BridgesCanada VALUES (3, \u0027Bridge C\u0027, \u0027British Columbia\u0027, \u0027Annually\u0027, 3000, \u0027Arch\u0027);", + "sql": "SELECT Province, Type, COUNT(*) as BridgeCount, MaintenanceSchedule, SUM(MaintenanceCost) as TotalCost FROM BridgesCanada GROUP BY Province, Type, MaintenanceSchedule;", + "sql_explanation": "This SQL query groups the BridgesCanada table by Province, Type, and MaintenanceSchedule, selecting the number of bridges, maintenance schedule, and total maintenance cost for each Province, Type, and MaintenanceSchedule combination." +}, { + "id": "1892", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total construction cost for infrastructure projects in California from 2018 to 2020, broken down by project type?", + "sql_context": "CREATE TABLE InfrastructureCostsCA (State TEXT, Year INTEGER, ProjectType TEXT, ConstructionCost REAL); INSERT INTO InfrastructureCostsCA (State, Year, ProjectType, ConstructionCost) VALUES (\u0027California\u0027, 2018, \u0027Bridge\u0027, 1800000.0), (\u0027California\u0027, 2018, \u0027Highway\u0027, 2400000.0), (\u0027California\u0027, 2018, \u0027Tunnel\u0027, 3300000.0), (\u0027California\u0027, 2019, \u0027Bridge\u0027, 1900000.0), (\u0027California\u0027, 2019, \u0027Highway\u0027, 2500000.0), (\u0027California\u0027, 2019, \u0027Tunnel\u0027, 3400000.0), (\u0027California\u0027, 2020, \u0027Bridge\u0027, 2000000.0), (\u0027California\u0027, 2020, \u0027Highway\u0027, 2600000.0), (\u0027California\u0027, 2020, \u0027Tunnel\u0027, 3500000.0);", + "sql": "SELECT Year, ProjectType, SUM(ConstructionCost) as TotalCost FROM InfrastructureCostsCA WHERE State \u003d \u0027California\u0027 GROUP BY Year, ProjectType;", + "sql_explanation": "The SQL query groups the records by Year and ProjectType and calculates the total construction cost for infrastructure projects in California from 2018 to 2020, broken down by project type." +}, { + "id": "2019", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total construction cost for bridge projects in Florida, Georgia, and South Carolina from 2015 to 2020?", + "sql_context": "CREATE TABLE BridgeCosts (State TEXT, Year INTEGER, ConstructionCost REAL); INSERT INTO BridgeCosts (State, Year, ConstructionCost) VALUES (\u0027Florida\u0027, 2015, 1800000.0), (\u0027Florida\u0027, 2016, 1900000.0), (\u0027Florida\u0027, 2017, 2000000.0), (\u0027Florida\u0027, 2018, 2100000.0), (\u0027Florida\u0027, 2019, 2200000.0), (\u0027Florida\u0027, 2020, 2300000.0), (\u0027Georgia\u0027, 2015, 1850000.0), (\u0027Georgia\u0027, 2016, 1950000.0), (\u0027Georgia\u0027, 2017, 2050000.0), (\u0027Georgia\u0027, 2018, 2150000.0), (\u0027Georgia\u0027, 2019, 2250000.0), (\u0027Georgia\u0027, 2020, 2350000.0), (\u0027South Carolina\u0027, 2015, 1900000.0), (\u0027South Carolina\u0027, 2016, 2000000.0), (\u0027South Carolina\u0027, 2017, 2100000.0), (\u0027South Carolina\u0027, 2018, 2200000.0), (\u0027South Carolina\u0027, 2019, 2300000.0), (\u0027South Carolina\u0027, 2020, 2400000.0);", + "sql": "SELECT State, SUM(ConstructionCost) as TotalCost FROM BridgeCosts WHERE State IN (\u0027Florida\u0027, \u0027Georgia\u0027, \u0027South Carolina\u0027) GROUP BY State;", + "sql_explanation": "The SQL query groups the records by State and calculates the total construction cost for bridge projects in Florida, Georgia, and South Carolina from 2015 to 2020." +}, { + "id": "2047", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of public works projects completed per state in the last year?", + "sql_context": "CREATE TABLE projects_by_state (id INT, project_name VARCHAR(255), state VARCHAR(255), completion_year INT); INSERT INTO projects_by_state (id, project_name, state, completion_year) VALUES (1, \u0027Highway Expansion\u0027, \u0027California\u0027, 2021), (2, \u0027Water Treatment Plant Upgrade\u0027, \u0027Texas\u0027, 2021), (3, \u0027Road Repair\u0027, \u0027New York\u0027, 2021);", + "sql": "SELECT state, COUNT(*) as num_projects FROM projects_by_state WHERE completion_year \u003d YEAR(DATEADD(year, -1, GETDATE())) GROUP BY state;", + "sql_explanation": "This SQL query calculates the number of public works projects completed per state in the last year. It uses the WHERE clause to filter the records based on the completion year and the GETDATE() function to get the current date. The YEAR function is used to extract the year from the current date and subtract 1 year from it. The GROUP BY clause is used to group the records by state and the COUNT function is used to count the number of projects in each group." +}, { + "id": "2363", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum resilience score for infrastructure projects in the Southern region of the US for the year 2020?", + "sql_context": "CREATE TABLE Infrastructure_Projects (Project_ID INT, Project_Name VARCHAR(255), Resilience_Score FLOAT, Year INT, Location VARCHAR(255));", + "sql": "SELECT Year, MAX(Resilience_Score) FROM Infrastructure_Projects WHERE Location LIKE \u0027%Southern%\u0027 AND Year \u003d 2020 GROUP BY Year;", + "sql_explanation": "This SQL query calculates the maximum resilience score for infrastructure projects in the Southern region of the US for the year 2020. It does this by finding the maximum value in the Resilience_Score column and grouping the results by Year." +}, { + "id": "3145", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many infrastructure projects were completed per year?", + "sql_context": "CREATE TABLE InfrastructureProjects (Id INT, Name VARCHAR(255), Location VARCHAR(255), CompletionYear INT); INSERT INTO InfrastructureProjects (Id, Name, Location, CompletionYear) VALUES (1, \u0027Dam\u0027, \u0027City A\u0027, 2020), (2, \u0027Bridge\u0027, \u0027City B\u0027, 2019), (3, \u0027Road\u0027, \u0027City C\u0027, 2020), (4, \u0027Tunnel\u0027, \u0027City D\u0027, 2018), (5, \u0027Highway\u0027, \u0027City E\u0027, 2017);", + "sql": "SELECT CompletionYear, COUNT(*) as NumberOfProjects FROM InfrastructureProjects GROUP BY CompletionYear;", + "sql_explanation": "This SQL query calculates the number of infrastructure projects completed per year by grouping the data by the CompletionYear column and counting the number of rows in each group." +}, { + "id": "3644", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average resilience score for each city in Japan, grouped by metric?", + "sql_context": "CREATE TABLE Resilience (Id INT, City VARCHAR(50), Metric VARCHAR(50), Value FLOAT, Year INT); INSERT INTO Resilience (Id, City, Metric, Value, Year) VALUES (1, \u0027Tokyo\u0027, \u0027Flood Resistance\u0027, 70, 2010); INSERT INTO Resilience (Id, City, Metric, Value, Year) VALUES (2, \u0027Osaka\u0027, \u0027Earthquake Resistance\u0027, 80, 2015);", + "sql": "SELECT City, AVG(Value) as Average_Resilience, Metric FROM Resilience GROUP BY City, Metric;", + "sql_explanation": "This query groups the data by City and Metric, then calculates the average resilience value for each group." +}, { + "id": "3711", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many public works projects were completed in 2020 for each region?", + "sql_context": "CREATE TABLE PublicWorks (id INT, region VARCHAR(20), year INT, completed INT); INSERT INTO PublicWorks (id, region, year, completed) VALUES (1, \u0027Northeast\u0027, 2020, 1), (2, \u0027Southwest\u0027, 2019, 1), (3, \u0027Northeast\u0027, 2020, 1);", + "sql": "SELECT region, COUNT(*) as num_projects FROM PublicWorks WHERE year \u003d 2020 GROUP BY region;", + "sql_explanation": "This query counts the number of public works projects completed in 2020 for each region by filtering the \u0027PublicWorks\u0027 table for projects from that year and grouping the results by \u0027region\u0027. It then counts the number of rows in each group." +}, { + "id": "4377", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of roads in each condition category in the Road_Condition_Assessments table", + "sql_context": "CREATE TABLE Road_Condition_Assessments (assessment_id INT, road_id INT, road_name VARCHAR(50), condition VARCHAR(50), assessment_date DATE);", + "sql": "SELECT condition, COUNT(*) FROM Road_Condition_Assessments GROUP BY condition;", + "sql_explanation": "This SQL query counts the number of roads in each condition category in the Road_Condition_Assessments table by using the COUNT() function and the GROUP BY clause on the condition column." +}, { + "id": "4957", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total cost of resilience projects for each state?", + "sql_context": "CREATE TABLE Infrastructure_Projects (id INT, name VARCHAR(100), state VARCHAR(50), cost FLOAT); INSERT INTO Infrastructure_Projects (id, name, state, cost) VALUES (1, \u0027Seawall Upgrade\u0027, \u0027California\u0027, 5000000); INSERT INTO Infrastructure_Projects (id, name, state, cost) VALUES (2, \u0027Floodgate Construction\u0027, \u0027Texas\u0027, 12000000);", + "sql": "SELECT state, SUM(cost) FROM Infrastructure_Projects GROUP BY state;", + "sql_explanation": "This query calculates the total cost of resilience projects for each state by grouping the \u0027Infrastructure_Projects\u0027 table by the \u0027state\u0027 column and calculating the sum of the \u0027cost\u0027 column for each group." +}, { + "id": "5002", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average project cost for each category?", + "sql_context": "CREATE TABLE Projects (category VARCHAR(20), project_cost INT); INSERT INTO Projects (category, project_cost) VALUES (\u0027Bridge\u0027, 5000000), (\u0027Road\u0027, 3000000), (\u0027Water Treatment\u0027, 6500000), (\u0027Dams Safety\u0027, 7500000), (\u0027Transit System\u0027, 9000000);", + "sql": "SELECT category, AVG(project_cost) FROM Projects GROUP BY category;", + "sql_explanation": "This SQL query calculates the average project cost for each category by averaging the project_cost values grouped by the category." +}, { + "id": "5155", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of projects in each category?", + "sql_context": "CREATE TABLE Infrastructure (id INT, category VARCHAR(20)); INSERT INTO Infrastructure (id, category) VALUES (1, \u0027Transportation\u0027), (2, \u0027WaterSupply\u0027), (3, \u0027Transportation\u0027), (4, \u0027WaterSupply\u0027), (5, \u0027SewerSystems\u0027);", + "sql": "SELECT category, COUNT(*) FROM Infrastructure GROUP BY category;", + "sql_explanation": "This query counts the number of projects in each category by selecting all records, grouping them by category, and computing the count of their records for each category." +}, { + "id": "5165", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total cost of bridges constructed in each country?", + "sql_context": "CREATE TABLE CountryBridges (BridgeID INT, Country VARCHAR(20), Cost FLOAT); INSERT INTO CountryBridges (BridgeID, Country, Cost) VALUES (1, \u0027United States\u0027, 5000000);", + "sql": "SELECT Country, SUM(Cost) FROM CountryBridges GROUP BY Country;", + "sql_explanation": "This query calculates the total cost of bridges constructed in each country. It selects the Country column and uses the SUM function to find the total cost. It then groups the results by Country." +}, { + "id": "1120", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which community development initiatives were implemented in India\u0027s rural areas between 2015 and 2017, and what was their combined budget?", + "sql_context": "CREATE TABLE community_initiatives (initiative VARCHAR(50), country VARCHAR(50), start_year INT, end_year INT, budget FLOAT); INSERT INTO community_initiatives (initiative, country, start_year, end_year, budget) VALUES (\u0027Rural Employment Scheme\u0027, \u0027India\u0027, 2015, 2017, 50000000), (\u0027Rural Housing Scheme\u0027, \u0027India\u0027, 2015, 2017, 75000000);", + "sql": "SELECT initiative, SUM(budget) FROM community_initiatives WHERE country \u003d \u0027India\u0027 AND start_year BETWEEN 2015 AND 2017 AND end_year BETWEEN 2015 AND 2017 GROUP BY initiative;", + "sql_explanation": "The SQL query filters the community development initiatives implemented in India\u0027s rural areas between 2015 and 2017 and then calculates their combined budget using the SUM() function." +}, { + "id": "1295", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the maximum value of the agricultural innovation metrics for the last quarter, by metric name?", + "sql_context": "CREATE TABLE agricultural_innovation_metrics (id INT PRIMARY KEY, metric_name VARCHAR(50), value DECIMAL(10, 2), measurement_date DATE);", + "sql": "SELECT metric_name, MAX(value) as max_value FROM agricultural_innovation_metrics WHERE measurement_date \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 3 MONTH) GROUP BY metric_name;", + "sql_explanation": "This SQL query calculates the maximum value of the agricultural innovation metrics for the last quarter, grouped by metric name. It does this by filtering the records based on the measurement_date column, grouping the records by the metric_name column, and calculating the maximum value of the value column." +}, { + "id": "1310", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of farmers who have received training in sustainable farming practices, by country, in the past year, broken down by age group?", + "sql_context": "CREATE TABLE farmers (id INT, name VARCHAR(50), country VARCHAR(50), age INT, training_sustainable BOOLEAN, training_date DATE);", + "sql": "SELECT country, age, COUNT(*) as total_trained FROM farmers WHERE training_sustainable \u003d TRUE AND date(training_date) \u003e\u003d date(\u0027now\u0027,\u0027-1 year\u0027) GROUP BY country, age;", + "sql_explanation": "This query calculates the number of farmers who have received training in sustainable farming practices, by country, in the past year, broken down by age group. It does this by grouping the farmers table by country and age and calculating the total number of trained farmers. The date filter is applied to the training_date column." +}, { + "id": "1421", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 5 donors for \u0027African rural development\u0027 based on total donated amount?", + "sql_context": "CREATE TABLE donors (id INT, name TEXT, region TEXT, donated_amount FLOAT); INSERT INTO donors (id, name, region, donated_amount) VALUES (1, \u0027Donor 1\u0027, \u0027Africa\u0027, 1000000), (2, \u0027Donor 2\u0027, \u0027Asia\u0027, 750000), (3, \u0027Donor 3\u0027, \u0027Africa\u0027, 1250000), (4, \u0027Donor 4\u0027, \u0027Europe\u0027, 500000), (5, \u0027Donor 5\u0027, \u0027Africa\u0027, 1500000);", + "sql": "SELECT donors.name, SUM(donors.donated_amount) FROM donors WHERE donors.region \u003d \u0027Africa\u0027 GROUP BY donors.name ORDER BY SUM(donors.donated_amount) DESC LIMIT 5;", + "sql_explanation": "This query lists the top 5 donors for \u0027African rural development\u0027 based on total donated amount. It does this by selecting all rows from the \u0027donors\u0027 table where the region is \u0027Africa\u0027, grouping those rows by donor name, calculating the total donated amount for each group, and then ordering those groups by total donated amount in descending order and selecting the top 5." +}, { + "id": "1898", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of rural infrastructure projects and their completion rate for each country, categorized by project type?", + "sql_context": "CREATE TABLE project (project_id INT, project_start_date DATE, project_end_date DATE, project_type VARCHAR(50), country_code CHAR(3), completed BOOLEAN); INSERT INTO project (project_id, project_start_date, project_end_date, project_type, country_code, completed) VALUES (1, \u00272020-01-01\u0027, \u00272021-12-31\u0027, \u0027Road Construction\u0027, \u0027AFG\u0027, true), (2, \u00272019-06-15\u0027, \u00272021-05-30\u0027, \u0027Water Supply\u0027, \u0027AGO\u0027, false);", + "sql": "SELECT country_code, project_type, COUNT(*) AS num_projects, SUM(completed) AS num_completed FROM project GROUP BY country_code, project_type;", + "sql_explanation": "The query groups the projects by country code and project type and calculates the total number of projects (COUNT(*)) and the number of completed projects (SUM(completed)) for each group." +}, { + "id": "1997", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of unique agricultural innovation projects implemented in each country, sorted by the number of projects in descending order.", + "sql_context": "CREATE TABLE agricultural_innovation (id INT, project_name VARCHAR(50), project_type VARCHAR(50), country VARCHAR(50), cost DECIMAL(10,2)); INSERT INTO agricultural_innovation VALUES (1, \u0027Solar Irrigation System\u0027, \u0027Agricultural Innovation\u0027, \u0027Kenya\u0027, 25000.00), (2, \u0027Modern Greenhouse\u0027, \u0027Agricultural Innovation\u0027, \u0027Tanzania\u0027, 30000.00), (3, \u0027Precision Agriculture Tools\u0027, \u0027Agricultural Innovation\u0027, \u0027Kenya\u0027, 28000.00), (4, \u0027Vertical Farming\u0027, \u0027Agricultural Innovation\u0027, \u0027Uganda\u0027, 22000.00), (5, \u0027Drip Irrigation\u0027, \u0027Agricultural Innovation\u0027, \u0027Rwanda\u0027, 18000.00);", + "sql": "SELECT country, COUNT(DISTINCT project_name) AS projects_count FROM agricultural_innovation GROUP BY country ORDER BY projects_count DESC;", + "sql_explanation": "This query creates a table \u0027agricultural_innovation\u0027 with relevant data. It then uses the GROUP BY and COUNT(DISTINCT) functions to count the number of unique agricultural innovation projects in each country. Finally, it sorts the results by the number of projects in descending order." +}, { + "id": "2101", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which agricultural innovation projects in Bolivia had the highest cost in 2019?", + "sql_context": "CREATE TABLE agricultural_innovation_bolivia (id INT, country VARCHAR(255), project VARCHAR(255), cost FLOAT, year INT); INSERT INTO agricultural_innovation_bolivia (id, country, project, cost, year) VALUES (1, \u0027Bolivia\u0027, \u0027New Seed Variety\u0027, 2500000, 2019), (2, \u0027Bolivia\u0027, \u0027Drip Irrigation\u0027, 3000000, 2019), (3, \u0027Bolivia\u0027, \u0027Precision Farming\u0027, 2000000, 2019);", + "sql": "SELECT project, MAX(cost) as max_cost FROM agricultural_innovation_bolivia WHERE country \u003d \u0027Bolivia\u0027 AND year \u003d 2019 GROUP BY project;", + "sql_explanation": "This query identifies the agricultural innovation projects with the highest cost in Bolivia in 2019 by grouping projects by country and year, calculating the maximum cost for each project, and returning the \u0027project\u0027 and \u0027max_cost\u0027 columns. The query does not include an ORDER BY clause, so the results will not be sorted." +}, { + "id": "2424", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which rural infrastructure projects were not completed within their target timeline?", + "sql_context": "CREATE TABLE infrastructure_projects (project_id INT, project_type VARCHAR(255), target_completion_year INT, actual_completion_year INT); INSERT INTO infrastructure_projects (project_id, project_type, target_completion_year, actual_completion_year) VALUES (1, \u0027Irrigation System\u0027, 2019, 2021), (2, \u0027Rural Road\u0027, 2020, 2019), (3, \u0027Electricity Grid\u0027, 2021, 2022), (4, \u0027Community Center\u0027, 2021, 2020);", + "sql": "SELECT project_type FROM infrastructure_projects WHERE target_completion_year \u003e actual_completion_year GROUP BY project_type;", + "sql_explanation": "Get the rural infrastructure projects that were not completed within their target timeline by grouping the infrastructure_projects table by project_type and filtering the results where the target_completion_year is greater than the actual_completion_year." +}, { + "id": "2753", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average yield for \u0027Rice\u0027 and \u0027Wheat\u0027 crops?", + "sql_context": "CREATE TABLE AgriculturalProductivity (id INT, farmer_id INT, crop_name VARCHAR(50), yield INT, year INT); INSERT INTO AgriculturalProductivity (id, farmer_id, crop_name, yield, year) VALUES (1, 1, \u0027Corn\u0027, 80, 2020); INSERT INTO AgriculturalProductivity (id, farmer_id, crop_name, yield, year) VALUES (2, 2, \u0027Soybeans\u0027, 60, 2021); INSERT INTO AgriculturalProductivity (id, farmer_id, crop_name, yield, year) VALUES (3, 3, \u0027Rice\u0027, 90, 2020); INSERT INTO AgriculturalProductivity (id, farmer_id, crop_name, yield, year) VALUES (4, 4, \u0027Wheat\u0027, 75, 2021);", + "sql": "SELECT crop_name, AVG(yield) FROM AgriculturalProductivity WHERE crop_name IN (\u0027Rice\u0027, \u0027Wheat\u0027) GROUP BY crop_name;", + "sql_explanation": "This SQL query calculates the average yield for \u0027Rice\u0027 and \u0027Wheat\u0027 crops by filtering the \u0027AgriculturalProductivity\u0027 table using a WHERE clause with a condition on the \u0027crop_name\u0027 column, then grouping the table by the \u0027crop_name\u0027 column and averaging the \u0027yield\u0027 column." +}, { + "id": "2806", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many rural infrastructure projects have been completed in each state of Australia?", + "sql_context": "CREATE TABLE rural_infrastructure (id INT, state VARCHAR(50), project_type VARCHAR(50), status VARCHAR(50)); INSERT INTO rural_infrastructure (id, state, project_type, status) VALUES (1, \u0027New South Wales\u0027, \u0027Roads\u0027, \u0027Completed\u0027), (2, \u0027Victoria\u0027, \u0027Irrigation\u0027, \u0027In Progress\u0027), (3, \u0027Queensland\u0027, \u0027Rural Electrification\u0027, \u0027Completed\u0027), (4, \u0027Western Australia\u0027, \u0027Community Center\u0027, \u0027Completed\u0027);", + "sql": "SELECT state, COUNT(*) as completed_projects FROM rural_infrastructure WHERE status \u003d \u0027Completed\u0027 GROUP BY state;", + "sql_explanation": "The SQL query filters the rural_infrastructure table to only include rows with a status of \u0027Completed\u0027. It then groups the results by state and calculates the number of completed projects for each state." +}, { + "id": "3203", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the total budget for agricultural innovation projects in district 16 and district 17.", + "sql_context": "CREATE TABLE agri_projects (project_id INT, district_id INT, budget FLOAT, project_category VARCHAR(50)); INSERT INTO agri_projects (project_id, district_id, budget, project_category) VALUES (1, 16, 120000, \u0027Crop Research\u0027), (2, 16, 80000, \u0027Livestock Research\u0027), (3, 17, 180000, \u0027Farm Machinery\u0027), (4, 17, 90000, \u0027Fertilizer Trials\u0027);", + "sql": "SELECT district_id, SUM(budget) FROM agri_projects GROUP BY district_id HAVING district_id IN (16, 17);", + "sql_explanation": "List the total budget for agricultural innovation projects in district 16 and district 17 by grouping the records based on the district ID and computing the sum of the budgets, then filtering the results based on the district ID." +}, { + "id": "3725", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average crop yield per hectare for each region, in the \u0027rural_development\u0027 schema, for crops with a yield greater than 2000?", + "sql_context": "CREATE TABLE crop_yields(region VARCHAR(50), crop VARCHAR(50), yield INT); INSERT INTO crop_yields VALUES (\u0027Africa\u0027, \u0027Corn\u0027, 2500), (\u0027Asia\u0027, \u0027Rice\u0027, 3000), (\u0027South America\u0027, \u0027Soybeans\u0027, 1500);", + "sql": "SELECT region, AVG(yield) as avg_yield FROM crop_yields WHERE yield \u003e 2000 GROUP BY region;", + "sql_explanation": "The SQL query calculates the average crop yield per hectare for each region with a yield greater than 2000 using the AVG function and the GROUP BY clause. The WHERE clause filters the results based on the yield." +}, { + "id": "1174", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which spacecraft have had the most maintenance issues and what was the average duration of each maintenance issue?", + "sql_context": "CREATE TABLE MaintenanceIssues (id INT, spacecraft VARCHAR(255), issue_date DATE, resolution_date DATE); INSERT INTO MaintenanceIssues (id, spacecraft, issue_date, resolution_date) VALUES (1, \u0027ISS\u0027, \u00272022-01-01\u0027, \u00272022-01-05\u0027); INSERT INTO MaintenanceIssues (id, spacecraft, issue_date, resolution_date) VALUES (2, \u0027ISS\u0027, \u00272022-01-03\u0027, \u00272022-01-06\u0027);", + "sql": "SELECT spacecraft, COUNT(*) as issue_count, AVG(DATEDIFF(resolution_date, issue_date)) as avg_duration FROM MaintenanceIssues GROUP BY spacecraft ORDER BY issue_count DESC;", + "sql_explanation": "The SQL query identifies which spacecraft have had the most maintenance issues and the average duration of each maintenance issue by using the COUNT() function to count the number of maintenance issues for each spacecraft and the AVG() function to calculate the average duration of each maintenance issue. The GROUP BY clause is used to group the results by spacecraft and the ORDER BY clause is used to sort the results by the number of maintenance issues in descending order." +}, { + "id": "1572", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the names of all spacecraft that have been launched more than once and the number of times they have been launched, in descending order of the number of times launched.", + "sql_context": "CREATE TABLE Space_Missions(id INT, mission_name VARCHAR(50), launch_date DATE, spacecraft_name VARCHAR(50));", + "sql": "SELECT spacecraft_name, COUNT(*) as Number_of_Launches FROM Space_Missions GROUP BY spacecraft_name HAVING COUNT(*) \u003e 1 ORDER BY Number_of_Launches DESC;", + "sql_explanation": "The query uses the COUNT(*) function to get the number of times each spacecraft has been launched and the GROUP BY clause to group the results by spacecraft name. The HAVING clause filters the results to only include spacecraft that have been launched more than once. The ORDER BY clause sorts the results in descending order based on the number of times launched." +}, { + "id": "2448", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the earliest and latest launch date for each country\u0027s space missions?", + "sql_context": "CREATE TABLE space_missions (id INT, country VARCHAR(255), launch_date DATE);", + "sql": "SELECT country, MIN(launch_date) as earliest_launch, MAX(launch_date) as latest_launch FROM space_missions GROUP BY country;", + "sql_explanation": "For each country, find the earliest and latest launch dates of their space missions. Minimum and maximum values are determined by sorting all launch_date values for a given country in ascending and descending order, respectively, and selecting the first and last values." +}, { + "id": "2566", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "find the maximum mass of spacecraft manufactured by each country", + "sql_context": "CREATE TABLE Spacecraft_Manufacturing(name VARCHAR(50), mass FLOAT, country VARCHAR(50));", + "sql": "CREATE VIEW Spacecraft_Max_Mass AS SELECT country, MAX(mass) AS max_mass FROM Spacecraft_Manufacturing GROUP BY country;", + "sql_explanation": "The SQL query calculates the maximum mass of spacecraft manufactured by each country by creating a view that groups the Spacecraft_Manufacturing table by country and calculates the maximum mass using the MAX() function." +}, { + "id": "2871", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the total mission durations for each space mission?", + "sql_context": "CREATE TABLE Space_Missions (Mission_Name VARCHAR(50), Astronaut_ID INT, Mission_Duration INT); INSERT INTO Space_Missions (Mission_Name, Astronaut_ID, Mission_Duration) VALUES (\u0027Artemis I\u0027, 1, 25); INSERT INTO Space_Missions (Mission_Name, Astronaut_ID, Mission_Duration) VALUES (\u0027Artemis II\u0027, 2, 300); INSERT INTO Space_Missions (Mission_Name, Astronaut_ID, Mission_Duration) VALUES (\u0027Artemis III\u0027, 3, 365);", + "sql": "SELECT Mission_Name, SUM(Mission_Duration) as Total_Mission_Duration FROM Space_Missions GROUP BY Mission_Name;", + "sql_explanation": "This SQL query groups the data by the Mission_Name column and calculates the total mission duration for each space mission." +}, { + "id": "2978", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the total mission durations for each space agency?", + "sql_context": "CREATE TABLE Space_Agencies (Agency_Name VARCHAR(50), Mission_Name VARCHAR(50), Mission_Duration INT); INSERT INTO Space_Agencies (Agency_Name, Mission_Name, Mission_Duration) VALUES (\u0027NASA\u0027, \u0027Apollo 11\u0027, 195); INSERT INTO Space_Agencies (Agency_Name, Mission_Name, Mission_Duration) VALUES (\u0027ESA\u0027, \u0027Rosetta\u0027, 2560); INSERT INTO Space_Agencies (Agency_Name, Mission_Name, Mission_Duration) VALUES (\u0027Roscosmos\u0027, \u0027Mars 96\u0027, 275);", + "sql": "SELECT Agency_Name, SUM(Mission_Duration) as Total_Mission_Duration FROM Space_Agencies GROUP BY Agency_Name;", + "sql_explanation": "This SQL query groups the data by the Agency_Name column and calculates the total mission duration for each space agency." +}, { + "id": "2979", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total flight time for astronauts from NASA?", + "sql_context": "CREATE TABLE Astronauts (id INT, name VARCHAR(255), gender VARCHAR(255), agency VARCHAR(255), mission VARCHAR(255), role VARCHAR(255), flight_hours DECIMAL(5,2)); INSERT INTO Astronauts (id, name, gender, agency, mission, role, flight_hours) VALUES (1, \u0027Neil Armstrong\u0027, \u0027Male\u0027, \u0027NASA\u0027, \u0027Apollo 11\u0027, \u0027Commander\u0027, 21.5); INSERT INTO Astronauts (id, name, gender, agency, mission, role, flight_hours) VALUES (2, \u0027Buzz Aldrin\u0027, \u0027Male\u0027, \u0027NASA\u0027, \u0027Apollo 11\u0027, \u0027Lunar Module Pilot\u0027, 19.0);", + "sql": "SELECT agency, SUM(flight_hours) as total_flight_hours FROM Astronauts WHERE agency \u003d \u0027NASA\u0027 GROUP BY agency;", + "sql_explanation": "This query filters astronauts by the NASA agency and calculates the total flight hours using an aggregate function." +}, { + "id": "3036", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of spacecraft manufacturing costs by year?", + "sql_context": "CREATE TABLE SpacecraftManufacturing (id INT, year INT, cost FLOAT);", + "sql": "SELECT year, AVG(cost) as avg_cost, STDDEV(cost) as stddev_cost FROM SpacecraftManufacturing GROUP BY year;", + "sql_explanation": "This query calculates the average and standard deviation of spacecraft manufacturing costs by year by summing up the \u0027cost\u0027 column where the \u0027year\u0027 column is equal to the current year and grouping the results by the \u0027year\u0027 column. This provides a distribution of spacecraft manufacturing costs by year." +}, { + "id": "3218", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many successful missions were there for each space agency?", + "sql_context": "CREATE TABLE missions (mission_id INT, name VARCHAR(50), space_agency VARCHAR(50), mission_status VARCHAR(10));", + "sql": "SELECT space_agency, COUNT(*) FROM missions WHERE mission_status \u003d \u0027successful\u0027 GROUP BY space_agency;", + "sql_explanation": "* This query filters the missions table to only show successful missions using the mission_status column and then groups the results by space_agency. The COUNT(*) function counts the number of successful missions for each space agency." +}, { + "id": "3245", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average diameter in light years for irregular galaxies?", + "sql_context": "CREATE TABLE Galaxies (id INT, name VARCHAR(255), type VARCHAR(255), right_ascension VARCHAR(255), declination VARCHAR(255), diameter_ly DECIMAL(10,2), distance_Mpc DECIMAL(10,2)); INSERT INTO Galaxies (id, name, type, right_ascension, declination, diameter_ly, distance_Mpc) VALUES (5, \u0027Large Magellanic Cloud\u0027, \u0027Irregular\u0027, \u00275h 23m 34.5s\u0027, \u0027-69° 45′ 47â€ŗ\u0027, 14000, 0.05); INSERT INTO Galaxies (id, name, type, right_ascension, declination, diameter_ly, distance_Mpc) VALUES (6, \u0027Small Magellanic Cloud\u0027, \u0027Irregular\u0027, \u00270h 52m 45.0s\u0027, \u0027-72° 49′ 43â€ŗ\u0027, 7000, 0.06);", + "sql": "SELECT type, AVG(diameter_ly) as avg_diameter_ly FROM Galaxies WHERE type \u003d \u0027Irregular\u0027 GROUP BY type;", + "sql_explanation": "This query filters galaxies by the type Irregular and calculates the average diameter in light years using an aggregate function." +}, { + "id": "3766", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average weight of spacecrafts launched by country?", + "sql_context": "CREATE TABLE spacecrafts (id INT, name VARCHAR(50), launch_country VARCHAR(50), weight FLOAT); INSERT INTO spacecrafts VALUES (1, \u0027Voyager 1\u0027, \u0027USA\u0027, 795.5), (2, \u0027Voyager 2\u0027, \u0027USA\u0027, 782.5), (3, \u0027Galileo\u0027, \u0027USA\u0027, 2325.0), (4, \u0027Cassini\u0027, \u0027France\u0027, 2125.0), (5, \u0027Rosetta\u0027, \u0027Europe\u0027, 3000.0);", + "sql": "SELECT launch_country, AVG(weight) as avg_weight FROM spacecrafts GROUP BY launch_country;", + "sql_explanation": "Calculate the average weight of spacecrafts for each launch_country by using the AVG function and grouping by launch_country." +}, { + "id": "4307", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of spacecrafts produced by each company?", + "sql_context": "CREATE TABLE SpacecraftManufacturing (id INT, company VARCHAR(255), spacecraft VARCHAR(255));", + "sql": "SELECT company, COUNT(spacecraft) FROM SpacecraftManufacturing GROUP BY company;", + "sql_explanation": "This SQL query lists the total number of spacecrafts produced by each company by performing a group by on the \u0027SpacecraftManufacturing\u0027 table using the \u0027company\u0027 column. The query then uses the COUNT() function to count the number of rows in the \u0027SpacecraftManufacturing\u0027 table for each group." +}, { + "id": "4842", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average mass of spacecrafts manufactured by each company?", + "sql_context": "CREATE TABLE spacecrafts (manufacturer VARCHAR(255), mass FLOAT); INSERT INTO spacecrafts (manufacturer, mass) VALUES (\u0027SpaceCorp\u0027, 10000); INSERT INTO spacecrafts (manufacturer, mass) VALUES (\u0027AstroCorp\u0027, 18000); INSERT INTO spacecrafts (manufacturer, mass) VALUES (\u0027Galactic Inc\u0027, 15000);", + "sql": "SELECT manufacturer, AVG(mass) FROM spacecrafts GROUP BY manufacturer;", + "sql_explanation": "This query calculates the average mass of spacecrafts manufactured by each company by selecting the \u0027manufacturer\u0027 and the average (AVG) of the \u0027mass\u0027 column, grouped by \u0027manufacturer\u0027." +}, { + "id": "5631", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of astronauts\u0027 ages?", + "sql_context": "CREATE TABLE Astronauts (AstronautID INT, Age INT, Gender VARCHAR(10), Name VARCHAR(50), Nationality VARCHAR(50));", + "sql": "SELECT Age, COUNT(*) FROM Astronauts GROUP BY Age;", + "sql_explanation": "This query groups the \u0027Astronauts\u0027 table by \u0027Age\u0027 and counts the number of occurrences for each age." +}, { + "id": "316", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the monthly average closing balance per customer for the last 6 months, ordered by the most recent month?", + "sql_context": "CREATE TABLE customer_account (customer_id INT, account_number INT, balance DECIMAL(10, 2), closing_date DATE); INSERT INTO customer_account (customer_id, account_number, balance, closing_date) VALUES (1, 1001, 15000, \u00272021-08-31\u0027), (1, 1002, 20000, \u00272021-08-31\u0027), (2, 1003, 30000, \u00272021-08-31\u0027);", + "sql": "SELECT customer_id, AVG(balance) as avg_balance, EXTRACT(MONTH FROM closing_date) as month FROM customer_account WHERE closing_date BETWEEN DATE_SUB(CURRENT_DATE, INTERVAL 6 MONTH) AND CURRENT_DATE GROUP BY customer_id, month ORDER BY month DESC;", + "sql_explanation": "The SQL query calculates the average closing balance per customer for the past 6 months. It uses the customer_account table, which includes the customer ID, account number, balance, and closing date. The query groups the data by customer_id and month, calculates the average balance for each group, and orders the results by the most recent month." +}, { + "id": "434", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of fraudulent transactions detected per day in the last week?", + "sql_context": "CREATE TABLE transactions (id INT, transaction_date DATE, is_fraud BOOLEAN); INSERT INTO transactions (id, transaction_date, is_fraud) VALUES (1, \u00272022-03-01\u0027, FALSE); INSERT INTO transactions (id, transaction_date, is_fraud) VALUES (2, \u00272022-03-03\u0027, TRUE);", + "sql": "SELECT DATE(t.transaction_date) as transaction_date, COUNT(*) as num_fraudulent_transactions FROM transactions t WHERE t.transaction_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 7 DAY) AND t.is_fraud \u003d TRUE GROUP BY transaction_date;", + "sql_explanation": "The SQL query filters transactions in the last 7 days and that are flagged as fraudulent, groups by transaction date, and calculates the number of fraudulent transactions." +}, { + "id": "585", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total transaction value for each month of the year 2022?", + "sql_context": "CREATE TABLE transactions (transaction_id INT, transaction_date DATE, transaction_category VARCHAR(255), transaction_value DECIMAL(10,2)); INSERT INTO transactions (transaction_id, transaction_date, transaction_category, transaction_value) VALUES (1, \u00272022-01-02\u0027, \u0027Food\u0027, 75.00), (2, \u00272022-02-05\u0027, \u0027Electronics\u0027, 350.00), (3, \u00272022-03-10\u0027, \u0027Clothing\u0027, 200.00);", + "sql": "SELECT YEAR(transaction_date) as year, MONTH(transaction_date) as month, SUM(transaction_value) as total_value FROM transactions WHERE transaction_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-12-31\u0027 GROUP BY year, month;", + "sql_explanation": "The SQL query calculates the total transaction value for each month of the year 2022 by using the YEAR() and MONTH() functions to extract the year and month, the SUM() function to calculate the total transaction value, and the GROUP BY clause to group the results by year and month. The WHERE clause filters the transactions by date range." +}, { + "id": "1098", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total transaction value per category for the first quarter of 2021?", + "sql_context": "CREATE TABLE transactions (transaction_id INT, transaction_date DATE, transaction_category VARCHAR(255), transaction_value DECIMAL(10,2)); INSERT INTO transactions (transaction_id, transaction_date, transaction_category, transaction_value) VALUES (1, \u00272021-01-02\u0027, \u0027Food\u0027, 50.00), (2, \u00272021-01-05\u0027, \u0027Electronics\u0027, 300.00), (3, \u00272021-02-10\u0027, \u0027Clothing\u0027, 150.00);", + "sql": "SELECT transaction_category, SUM(transaction_value) as total_value FROM transactions WHERE transaction_date BETWEEN \u00272021-01-01\u0027 AND \u00272021-03-31\u0027 GROUP BY transaction_category;", + "sql_explanation": "The SQL query calculates the total transaction value for each category in the first quarter of 2021 by using the SUM() function and the GROUP BY clause. The WHERE clause filters the transactions by date range." +}, { + "id": "1353", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the daily average transaction amount for each payment method in Q1 2022?", + "sql_context": "CREATE TABLE transactions (transaction_id INT, customer_id INT, transaction_amount DECIMAL(10,2), transaction_date DATE, payment_method VARCHAR(50));", + "sql": "SELECT AVG(transaction_amount), payment_method FROM transactions WHERE transactions.transaction_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-03-31\u0027 GROUP BY payment_method;", + "sql_explanation": "This query calculates the daily average transaction amount for each payment method in Q1 2022 by using the GROUP BY clause to group the transactions by payment method, and then calculating the average transaction amount for each group." +}, { + "id": "1889", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the total number of customers in each country who have made at least one transaction in the past year.", + "sql_context": "CREATE TABLE customers (customer_id INT, name VARCHAR(50), country VARCHAR(50), last_transaction_date DATE);", + "sql": "SELECT country, COUNT(*) as num_customers FROM customers c WHERE last_transaction_date \u003e\u003d (CURRENT_DATE - INTERVAL \u00271 year\u0027) GROUP BY country;", + "sql_explanation": "This query determines the total number of customers in each country who have made at least one transaction in the past year. It uses the GROUP BY clause to group the results by country and the COUNT() function to count the number of customers in each group. The query filters the customers to include only those who have made a transaction in the past year, as determined by the last_transaction_date column. It does not use any set operations, as it only requires a simple count of customers for each country." +}, { + "id": "2403", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total assets value for each client as of 2022-01-01?", + "sql_context": "CREATE TABLE clients (client_id INT, name TEXT, assets_value FLOAT); INSERT INTO clients (client_id, name, assets_value) VALUES (1, \u0027John Doe\u0027, 150000.00), (2, \u0027Jane Smith\u0027, 220000.00);", + "sql": "SELECT name, SUM(assets_value) as total_assets_value FROM clients WHERE DATE(assets_value_date) \u003d \u00272022-01-01\u0027 GROUP BY name;", + "sql_explanation": "This SQL query calculates the total assets value for each client on a specific date (2022-01-01) by using the SUM() function on the assets_value column and grouping the results by the name of the clients." +}, { + "id": "2416", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total investment in bonds for customers who have invested more than $5000 in bonds?", + "sql_context": "CREATE TABLE Investments (InvestmentID INT, CustomerID INT, InvestmentType VARCHAR(20), Amount DECIMAL(10,2)); INSERT INTO Investments (InvestmentID, CustomerID, InvestmentType, Amount) VALUES (1, 1, \u0027Stocks\u0027, 5000.00); INSERT INTO Investments (InvestmentID, CustomerID, InvestmentType, Amount) VALUES (2, 1, \u0027Bonds\u0027, 7000.00); INSERT INTO Investments (InvestmentID, CustomerID, InvestmentType, Amount) VALUES (3, 2, \u0027Stocks\u0027, 3000.00); INSERT INTO Investments (InvestmentID, CustomerID, InvestmentType, Amount) VALUES (4, 2, \u0027Real Estate\u0027, 9000.00); INSERT INTO Investments (InvestmentID, CustomerID, InvestmentType, Amount) VALUES (5, 3, \u0027Bonds\u0027, 6000.00);", + "sql": "SELECT CustomerID, SUM(Amount) FROM Investments WHERE InvestmentType \u003d \u0027Bonds\u0027 GROUP BY CustomerID HAVING SUM(Amount) \u003e 5000;", + "sql_explanation": "Calculate the total investment in bonds for customers with more than $5000 invested in bonds." +}, { + "id": "2663", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the transaction dates and the total transaction amount for transactions made by customers residing in India.", + "sql_context": "CREATE TABLE transactions_4 (id INT, customer_id INT, amount DECIMAL(10,2), tx_date DATE, country VARCHAR(255)); INSERT INTO transactions_4 (id, customer_id, amount, tx_date, country) VALUES (1, 1, 100.00, \u00272022-01-01\u0027, \u0027India\u0027), (2, 2, 50.00, \u00272022-01-01\u0027, \u0027USA\u0027), (3, 3, 200.00, \u00272022-01-02\u0027, \u0027Canada\u0027), (4, 1, 300.00, \u00272022-01-03\u0027, \u0027India\u0027), (5, 4, 1000.00, \u00272022-01-04\u0027, \u0027USA\u0027);", + "sql": "SELECT tx_date, SUM(amount) as total_transaction_amount FROM transactions_4 WHERE country \u003d \u0027India\u0027 GROUP BY tx_date;", + "sql_explanation": "The SQL query filters the transactions_4 table for transactions made by customers residing in India and groups by transaction date. It then calculates the sum of the transaction amount for each transaction date, returning the transaction dates and total transaction amount for transactions made by customers residing in India." +}, { + "id": "2927", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total sales for each product, excluding sales made by a specific salesperson.", + "sql_context": "CREATE TABLE sales_data (salesperson VARCHAR(20), product VARCHAR(20), sales_amount DECIMAL(10,2)); INSERT INTO sales_data VALUES (\u0027John\u0027, \u0027Laptop\u0027, 1200.00), (\u0027John\u0027, \u0027Phone\u0027, 500.00), (\u0027Jane\u0027, \u0027Phone\u0027, 300.00), (\u0027Jane\u0027, \u0027Tablet\u0027, 800.00);", + "sql": "SELECT product, SUM(sales_amount) AS total_sales FROM sales_data WHERE salesperson !\u003d \u0027John\u0027 GROUP BY product;", + "sql_explanation": "The SQL query calculates the total sales for each product, excluding sales made by the salesperson \"John\". The SUM() function is used to calculate the total sales amount, and the results are grouped by product using the GROUP BY clause." +}, { + "id": "3266", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the total assets under management (AUM) for each investment strategy, including a 3% management fee?", + "sql_context": "CREATE TABLE Investment_Strategies (strategy_id INT, strategy_name VARCHAR(30), AUM DECIMAL(12,2)); INSERT INTO Investment_Strategies (strategy_id, strategy_name, AUM) VALUES (1, \u0027Equity\u0027, 5000000.00), (2, \u0027Fixed Income\u0027, 3000000.00), (3, \u0027Alternatives\u0027, 2000000.00);", + "sql": "SELECT strategy_name, SUM(AUM * 1.03) AS total_AUM FROM Investment_Strategies GROUP BY strategy_name;", + "sql_explanation": "Group the Investment Strategies table by strategy_name, multiply the AUM by 1.03 to include a 3% management fee, and calculate the total AUM for each investment strategy." +}, { + "id": "3425", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the minimum transaction date for each customer in the \"Customers\" table.", + "sql_context": "CREATE TABLE Customers (CustomerID INT, TransactionDate DATE, TransactionAmount DECIMAL(10,2));", + "sql": "SELECT CustomerID, MIN(TransactionDate) as MinTransactionDate FROM Customers GROUP BY CustomerID;", + "sql_explanation": "This query calculates the minimum transaction date for each customer in the Customers table. It groups the transactions by CustomerID and then calculates the minimum transaction date for each group." +}, { + "id": "5212", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average credit score for customers in each state?", + "sql_context": "CREATE TABLE customers (id INT, state VARCHAR(50), credit_score INT); INSERT INTO customers (id, state, credit_score) VALUES (1, \u0027California\u0027, 700), (2, \u0027New York\u0027, 750), (3, \u0027Texas\u0027, 650);", + "sql": "SELECT state, AVG(credit_score) FROM customers GROUP BY state;", + "sql_explanation": "This query calculates the average credit score for customers in each state by averaging the credit_score column and grouping by the state column." +}, { + "id": "5449", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many customers are there from each country?", + "sql_context": "CREATE TABLE customers (id INT, name TEXT, age INT, country TEXT, assets FLOAT); INSERT INTO customers (id, name, age, country, assets) VALUES (1, \u0027John Doe\u0027, 45, \u0027USA\u0027, 250000.00); INSERT INTO customers (id, name, age, country, assets) VALUES (2, \u0027Jane Smith\u0027, 34, \u0027Canada\u0027, 320000.00); INSERT INTO customers (id, name, age, country, assets) VALUES (3, \u0027Alice Johnson\u0027, 29, \u0027USA\u0027, 750000.00);", + "sql": "SELECT country, COUNT(*) FROM customers GROUP BY country;", + "sql_explanation": "This query groups customers by their country and counts the number of customers in each country using the GROUP BY clause." +}, { + "id": "1080", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average energy efficiency (in kWh/m2) of buildings in the \u0027asia_pacific\u0027 region, partitioned by country and ordered by energy efficiency in ascending order?", + "sql_context": "CREATE TABLE buildings (id INT, country VARCHAR(50), region VARCHAR(50), energy_efficiency FLOAT); INSERT INTO buildings (id, country, region, energy_efficiency) VALUES (1, \u0027China\u0027, \u0027asia_pacific\u0027, 2.34), (2, \u0027Japan\u0027, \u0027asia_pacific\u0027, 1.23), (3, \u0027India\u0027, \u0027asia_pacific\u0027, 3.45);", + "sql": "SELECT region, country, AVG(energy_efficiency) as avg_energy_efficiency FROM buildings WHERE region \u003d \u0027asia_pacific\u0027 GROUP BY country, region ORDER BY avg_energy_efficiency ASC;", + "sql_explanation": "This query calculates the average energy efficiency of buildings in the \u0027asia_pacific\u0027 region, partitioned by country and ordered by energy efficiency in ascending order. It uses the GROUP BY clause to group the results by country and region, the AVG function to calculate the average energy efficiency value for each group, and the ORDER BY clause with the ASC modifier to sort the results in ascending order." +}, { + "id": "1432", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 3 countries with the most clean energy policies implemented between 2015 and 2020?", + "sql_context": "CREATE TABLE clean_energy_policies (id INT, policy_name TEXT, country TEXT, start_year INT, end_year INT);", + "sql": "SELECT country, COUNT(*) as policy_count FROM clean_energy_policies WHERE start_year BETWEEN 2015 AND 2020 GROUP BY country ORDER BY policy_count DESC LIMIT 3;", + "sql_explanation": "This query identifies the top 3 countries with the most clean energy policies implemented between 2015 and 2020 by grouping the rows in the clean_energy_policies table by country, counting the number of rows for each country, and then ordering the results by policy count in descending order and returning the top 3 rows." +}, { + "id": "2148", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of wind energy projects in the region of Andalusia, Spain, grouped by project location?", + "sql_context": "CREATE TABLE spain_projects (id INT, project_name VARCHAR(100), region VARCHAR(50), project_location VARCHAR(50), project_type VARCHAR(50)); INSERT INTO spain_projects (id, project_name, region, project_location, project_type) VALUES (1, \u0027Wind Project A\u0027, \u0027Andalusia\u0027, \u0027Southern Andalusia\u0027, \u0027Wind\u0027), (2, \u0027Wind Project B\u0027, \u0027Andalusia\u0027, \u0027Northern Andalusia\u0027, \u0027Wind\u0027), (3, \u0027Solar Project A\u0027, \u0027Andalusia\u0027, \u0027Southern Andalusia\u0027, \u0027Solar\u0027);", + "sql": "SELECT project_location, COUNT(*) FROM spain_projects WHERE region \u003d \u0027Andalusia\u0027 AND project_type \u003d \u0027Wind\u0027 GROUP BY project_location;", + "sql_explanation": "The SQL query filters the spain_projects table to only include rows where the region is \u0027Andalusia\u0027 and the project_type is \u0027Wind\u0027. It then groups the results by project_location and calculates the count of rows for each group." +}, { + "id": "2263", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum installed capacity of hydroelectric power plants in each of the top 3 countries with the most hydroelectric power plants?", + "sql_context": "CREATE TABLE hydroelectric_power_plants (id INT, name VARCHAR(100), country VARCHAR(50), capacity FLOAT, completion_date DATE); INSERT INTO hydroelectric_power_plants (id, name, country, capacity, completion_date) VALUES (1, \u0027Hydroelectric Power Plant A\u0027, \u0027China\u0027, 2000, \u00272010-01-01\u0027); INSERT INTO hydroelectric_power_plants (id, name, country, capacity, completion_date) VALUES (2, \u0027Hydroelectric Power Plant B\u0027, \u0027Brazil\u0027, 2500, \u00272011-01-01\u0027);", + "sql": "SELECT country, MAX(capacity) AS max_capacity FROM hydroelectric_power_plants GROUP BY country ORDER BY max_capacity DESC LIMIT 3;", + "sql_explanation": "This SQL query calculates the maximum installed capacity of hydroelectric power plants in each of the top 3 countries with the most hydroelectric power plants. It groups the data by country and calculates the maximum capacity for each group using the MAX() function, then orders the results by maximum capacity in descending order and limits the results to the top 3 countries." +}, { + "id": "2542", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average energy_efficiency of the solar_panels installed in the US, ordered by the id in descending order?", + "sql_context": "CREATE TABLE solar_panels (id INT, country VARCHAR(50), energy_efficiency FLOAT);", + "sql": "SELECT AVG(energy_efficiency) AS avg_efficiency FROM solar_panels WHERE country \u003d \u0027US\u0027 GROUP BY country ORDER BY id DESC;", + "sql_explanation": "The SQL query calculates the average energy efficiency of solar panels installed in the United States by using the AVG function and orders the result by id in descending order." +}, { + "id": "2710", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of electric vehicle charging stations in the state of California, grouped by station type?", + "sql_context": "CREATE TABLE ev_charging_stations (id INT, station_name VARCHAR(255), state VARCHAR(255), station_type VARCHAR(255), num_stalls INT);", + "sql": "SELECT station_type, COUNT(station_name) FROM ev_charging_stations WHERE state \u003d \u0027California\u0027 GROUP BY station_type;", + "sql_explanation": "The SQL query calculates the total number of electric vehicle charging stations in California, grouped by station type. It uses the COUNT() function and groups the results by station_type." +}, { + "id": "3156", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total carbon emissions reduction achieved by clean energy policies in \u0027CleanPolicy\u0027 table, in each country?", + "sql_context": "CREATE TABLE CleanPolicy (policy_id INT, country VARCHAR(50), emissions_reduction INT);", + "sql": "SELECT country, SUM(emissions_reduction) as total_emissions_reduction FROM CleanPolicy GROUP BY country;", + "sql_explanation": "The SQL query calculates the total carbon emissions reduction achieved by clean energy policies in \u0027CleanPolicy\u0027 table, in each country. It uses the SUM function to find the total emissions reduction and GROUP BY clause to group the data based on country." +}, { + "id": "3890", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get the number of electric vehicle charging stations installed in each city in the ev_charging_stations table.", + "sql_context": "CREATE TABLE ev_charging_stations (city VARCHAR(50), station_type VARCHAR(50), num_stations INT);", + "sql": "SELECT city, COUNT(*) as num_charging_stations FROM ev_charging_stations GROUP BY city;", + "sql_explanation": "1. The SELECT statement retrieves the city column and the COUNT of all records. 2. The GROUP BY clause groups the selected columns by city. 3. The result is the number of electric vehicle charging stations installed in each city in the ev_charging_stations table." +}, { + "id": "4013", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many carbon offsets have been purchased in each country?", + "sql_context": "CREATE TABLE offset_purchases (country VARCHAR(50), offsets INT); INSERT INTO offset_purchases (country, offsets) VALUES (\u0027IN\u0027, 500), (\u0027BR\u0027, 300), (\u0027ZA\u0027, 250);", + "sql": "SELECT country, SUM(offsets) as total_offsets FROM offset_purchases GROUP BY country;", + "sql_explanation": "This query groups the carbon offset purchases by country and calculates the sum of offsets for each group (country)." +}, { + "id": "5080", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many clean energy policies have been implemented in each state?", + "sql_context": "CREATE TABLE clean_energy_policies (id INT, state VARCHAR(255), policy_name VARCHAR(255), policy_type VARCHAR(255), year INT);", + "sql": "SELECT state, COUNT(*) FROM clean_energy_policies GROUP BY state;", + "sql_explanation": "The SQL query calculates the count of records for each state in the clean_energy_policies table and returns the state and count columns." +}, { + "id": "965", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the ratio of broadband to mobile customers in each city?", + "sql_context": "CREATE TABLE city_customers (city VARCHAR(50), customer_type VARCHAR(20), customer_id INT); INSERT INTO city_customers (city, customer_type, customer_id) VALUES (\u0027Delhi\u0027, \u0027broadband\u0027, 1), (\u0027Delhi\u0027, \u0027broadband\u0027, 2), (\u0027Delhi\u0027, \u0027mobile\u0027, 3), (\u0027Mumbai\u0027, \u0027broadband\u0027, 4), (\u0027Mumbai\u0027, \u0027mobile\u0027, 5), (\u0027Bangalore\u0027, \u0027broadband\u0027, 6), (\u0027Bangalore\u0027, \u0027mobile\u0027, 7);", + "sql": "SELECT city, COUNT(*) FILTER (WHERE customer_type \u003d \u0027broadband\u0027) * 1.0 / COUNT(*) FILTER (WHERE customer_type \u003d \u0027mobile\u0027) as broadband_mobile_ratio FROM city_customers GROUP BY city;", + "sql_explanation": "This SQL query calculates the ratio of broadband to mobile customers in each city. The COUNT() function with the FILTER clause counts the number of broadband and mobile customers separately, and the outer query calculates the ratio of those counts." +}, { + "id": "1132", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of network infrastructure investments for a specific country in the last year?", + "sql_context": "CREATE TABLE network_investments (investment_id INT, investment_date DATE, country VARCHAR(50), investment_amount INT);", + "sql": "SELECT country, SUM(investment_amount) FROM network_investments WHERE country \u003d \u0027CountryName\u0027 AND investment_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) GROUP BY country;", + "sql_explanation": "The SQL query calculates the total number of network infrastructure investments for a specific country in the last year by grouping the data based on the country and calculating the sum of investment_amount for each group where the investment_date is within the last year. This provides the total number of network infrastructure investments for a specific country in the last year." +}, { + "id": "1164", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the peak usage time for each day of the week?", + "sql_context": "CREATE TABLE usage_timestamps (usage_time TIMESTAMP, data_usage FLOAT); INSERT INTO usage_timestamps (usage_time, data_usage) VALUES (\u00272022-01-01 09:00:00\u0027, 5000), (\u00272022-01-01 10:00:00\u0027, 6000), (\u00272022-01-02 11:00:00\u0027, 7000);", + "sql": "SELECT DATE_FORMAT(usage_time, \u0027%W\u0027) AS day_of_week, HOUR(usage_time) AS hour_of_day, MAX(data_usage) AS peak_usage FROM usage_timestamps GROUP BY day_of_week, hour_of_day;", + "sql_explanation": "The SQL query extracts the day of the week and hour of the day from the usage_time field and groups the usage_timestamps table by day of the week and hour of day, then calculates the peak usage for each hour of the day." +}, { + "id": "1208", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many mobile devices were sold in each state of the United States, for the month of July, 2022?", + "sql_context": "CREATE TABLE sales (sale_id INT, sale_date DATE, quantity INT, state VARCHAR(50));", + "sql": "SELECT EXTRACT(MONTH FROM sale_date) AS month, state, SUM(quantity) AS total_sales FROM sales WHERE YEAR(sale_date) \u003d 2022 AND MONTH(sale_date) \u003d 7 GROUP BY month, state;", + "sql_explanation": "This SQL query calculates the number of mobile devices sold in each state of the United States, for the month of July, 2022. It extracts the month from the sale_date column, and groups the results by month and state. It then calculates the sum of quantities for each group, representing the total number of mobile devices sold." +}, { + "id": "1494", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many customer complaints were there in each state last month?", + "sql_context": "CREATE TABLE customer_complaints (complaint_id INT, complaint_date DATE, complaint_type VARCHAR(255), state VARCHAR(255));", + "sql": "SELECT state, COUNT(complaint_id) as total_complaints FROM customer_complaints WHERE complaint_date \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 1 MONTH) GROUP BY state;", + "sql_explanation": "This query selects the state column and calculates the number of customer complaints (based on complaint_id) for each state. It filters the records to only include those from the past month by checking the complaint_date. Finally, it groups the results by state." +}, { + "id": "1780", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the total revenue generated in Q1 of 2022 for each region offering the \u0027Mobile\u0027 service.", + "sql_context": "CREATE TABLE Subscribers (subscriber_id INT, service VARCHAR(20), region VARCHAR(20), revenue FLOAT, payment_date DATE); INSERT INTO Subscribers (subscriber_id, service, region, revenue, payment_date) VALUES (1, \u0027Broadband\u0027, \u0027Metro\u0027, 50.00, \u00272022-01-01\u0027), (2, \u0027Mobile\u0027, \u0027Urban\u0027, 35.00, \u00272022-01-15\u0027), (3, \u0027Mobile\u0027, \u0027Rural\u0027, 20.00, \u00272022-01-31\u0027), (4, \u0027Mobile\u0027, \u0027Metro\u0027, 40.00, \u00272022-01-05\u0027);", + "sql": "SELECT region, SUM(revenue) FROM Subscribers WHERE service \u003d \u0027Mobile\u0027 AND QUARTER(payment_date) \u003d 1 AND YEAR(payment_date) \u003d 2022 GROUP BY region;", + "sql_explanation": "This query lists the total revenue for each region offering the \u0027Mobile\u0027 service in Q1 2022 by summing the \u0027revenue\u0027 column where the \u0027service\u0027 is \u0027Mobile\u0027, the quarter and year match Q1 2022, and grouping by \u0027region\u0027." +}, { + "id": "1874", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average revenue per user for each product category?", + "sql_context": "CREATE TABLE user_revenue (product_category VARCHAR(20), user_id INT, revenue INT); INSERT INTO user_revenue (product_category, user_id, revenue) VALUES (\u0027Electronics\u0027, 1, 50), (\u0027Electronics\u0027, 2, 100), (\u0027Fashion\u0027, 3, 20), (\u0027Fashion\u0027, 4, 70), (\u0027Home Appliances\u0027, 5, 150), (\u0027Home Appliances\u0027, 6, 250);", + "sql": "SELECT product_category, AVG(revenue / NULLIF(COUNT(DISTINCT user_id), 0)) AS avg_revenue_per_user FROM user_revenue GROUP BY product_category;", + "sql_explanation": "The SQL query calculates the average revenue per user for each product category by dividing the revenue column by the count of distinct user_id column and then applying the AVG function on the result. It uses the NULLIF function to handle the case when there are no records for a particular product category." +}, { + "id": "2127", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the number of mobile and broadband subscribers in each country, grouped by region and type, with a cross join.", + "sql_context": "CREATE TABLE subscribers(id INT, subscription_type VARCHAR(10), region VARCHAR(10), country VARCHAR(10)); INSERT INTO subscribers VALUES (1, \u0027mobile\u0027, \u0027South\u0027, \u0027USA\u0027); INSERT INTO subscribers VALUES (2, \u0027broadband\u0027, \u0027South\u0027, \u0027Mexico\u0027); INSERT INTO subscribers VALUES (3, \u0027mobile\u0027, \u0027East\u0027, \u0027China\u0027);", + "sql": "SELECT region, country, subscription_type, COUNT(*) as total_subscribers FROM subscribers GROUP BY region, country, subscription_type;", + "sql_explanation": "This query groups the subscribers by region, country, and subscription type, then counts the number of subscribers in each group using the COUNT function. The result is a table with the number of mobile and broadband subscribers in each country, grouped by region and type." +}, { + "id": "2167", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of mobile subscribers who have used roaming services in each continent?", + "sql_context": "CREATE TABLE mobile_subscribers_roaming (subscriber_id INT, roaming_country VARCHAR(255)); INSERT INTO mobile_subscribers_roaming (subscriber_id, roaming_country) VALUES (1, \u0027Canada\u0027), (2, \u0027Mexico\u0027), (3, \u0027Brazil\u0027), (4, \u0027Argentina\u0027), (5, \u0027United States\u0027), (6, \u0027France\u0027);", + "sql": "SELECT CONTINENT(roaming_country), COUNT(DISTINCT subscriber_id) FROM mobile_subscribers_roaming GROUP BY CONTINENT(roaming_country);", + "sql_explanation": "This query calculates the number of distinct mobile subscribers who have used roaming services in each continent using the COUNT function with the DISTINCT keyword and the GROUP BY clause on the CONTINENT(roaming_country) field." +}, { + "id": "2225", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of subscribers who have been in compliance with regulatory requirements for each quarter?", + "sql_context": "CREATE TABLE regulatory_compliance (compliance_date DATE, subscriber_id INT); INSERT INTO regulatory_compliance (compliance_date, subscriber_id) VALUES (\u00272022-01-01\u0027, 1), (\u00272022-02-01\u0027, 2);", + "sql": "SELECT DATE_FORMAT(compliance_date, \u0027%Y-%q\u0027) AS quarter, COUNT(DISTINCT subscriber_id) FROM regulatory_compliance GROUP BY quarter;", + "sql_explanation": "This query extracts the quarter from the compliance_date field using the DATE_FORMAT function, and then calculates the number of distinct subscribers for each quarter using the COUNT function with the DISTINCT keyword and the GROUP BY clause on the quarter field." +}, { + "id": "2418", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of customer complaints received in each state in the last month?", + "sql_context": "CREATE TABLE customer_complaints (id INT, state VARCHAR(50), complaint_date DATE, complaint_type VARCHAR(50));", + "sql": "SELECT state, COUNT(*) FROM customer_complaints WHERE complaint_date \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 1 MONTH) GROUP BY state;", + "sql_explanation": "This query calculates the number of customer complaints received in each state in the last month by selecting the state and the count of complaints for each complaint in the customer_complaints table where the complaint_date is in the last month, grouping the results by state." +}, { + "id": "2876", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of mobile subscribers who have used roaming services in each country?", + "sql_context": "CREATE TABLE mobile_subscribers_roaming (subscriber_id INT, roaming_country VARCHAR(255)); INSERT INTO mobile_subscribers_roaming (subscriber_id, roaming_country) VALUES (1, \u0027Canada\u0027), (2, \u0027Mexico\u0027);", + "sql": "SELECT roaming_country, COUNT(DISTINCT subscriber_id) FROM mobile_subscribers_roaming GROUP BY roaming_country;", + "sql_explanation": "This query calculates the number of distinct mobile subscribers who have used roaming services in each country using the COUNT function with the DISTINCT keyword and the GROUP BY clause on the roaming_country field." +}, { + "id": "2877", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total investment in each network type?", + "sql_context": "CREATE TABLE customer_usage (subscriber_id INT, name VARCHAR(255), region VARCHAR(255), mobile_number VARCHAR(20), broadband_speed DECIMAL(10, 2), minutes_used INT, data_used DECIMAL(10, 2));", + "sql": "SELECT network_type, SUM(investment_amount) AS total_investment FROM network_investments GROUP BY network_type;", + "sql_explanation": "This SQL query calculates the total investment in each network type. It uses the GROUP BY clause to group the records by the network_type column. The SUM function is used to calculate the total investment for each group by summing the investment_amount for each record in the group." +}, { + "id": "3072", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum mobile data usage in India for users in the age range of 18-30?", + "sql_context": "CREATE TABLE user_data (user_id INT, age INT, state VARCHAR(2), data_usage FLOAT); INSERT INTO user_data (user_id, age, state, data_usage) VALUES (1, 25, \u0027MH\u0027, 3.5), (2, 22, \u0027TN\u0027, 4.2), (3, 35, \u0027UP\u0027, 3.8);", + "sql": "SELECT state, MIN(data_usage) as min_data_usage FROM user_data WHERE age BETWEEN 18 AND 30 GROUP BY state;", + "sql_explanation": "This SQL query finds the minimum mobile data usage for users in the age range of 18-30 by using the MIN function and the WHERE clause to filter for users in the correct age range. The GROUP BY clause is used to group the results by state." +}, { + "id": "3207", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the total number of 5G base stations in the \"urban\" region, grouped by state.", + "sql_context": "CREATE TABLE infrastructure (id INT, technology VARCHAR(10), region VARCHAR(10), state VARCHAR(10)); INSERT INTO infrastructure (id, technology, region, state) VALUES (1, \u00275G\u0027, \u0027urban\u0027, \u0027NY\u0027), (2, \u00275G\u0027, \u0027urban\u0027, \u0027CA\u0027), (3, \u00274G\u0027, \u0027urban\u0027, \u0027TX\u0027);", + "sql": "SELECT state, COUNT(*) FROM infrastructure WHERE technology \u003d \u00275G\u0027 AND region \u003d \u0027urban\u0027 GROUP BY state;", + "sql_explanation": "Filter 5G base stations in the urban region, group them by state, and count the total number of base stations in each state." +}, { + "id": "3659", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average data usage by mobile customers in California, grouped by the city?", + "sql_context": "CREATE TABLE mobile_customers (customer_id INT, data_usage FLOAT, city VARCHAR(20), state VARCHAR(20)); INSERT INTO mobile_customers (customer_id, data_usage, city, state) VALUES (1, 3.5, \u0027San Francisco\u0027, \u0027California\u0027), (2, 6.2, \u0027Los Angeles\u0027, \u0027California\u0027);", + "sql": "SELECT city, AVG(data_usage) FROM mobile_customers WHERE state \u003d \u0027California\u0027 GROUP BY city;", + "sql_explanation": "This SQL query calculates the average data usage by mobile customers in California, grouped by the city, by selecting the city and average data usage columns, filtering for the state of California, and grouping by the city." +}, { + "id": "3938", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of network infrastructure investments in each country?", + "sql_context": "CREATE TABLE network_investments (investment_id int, investment_amount float, country varchar(20)); INSERT INTO network_investments (investment_id, investment_amount, country) VALUES (1, 1000000, \u0027USA\u0027), (2, 2000000, \u0027Canada\u0027), (3, 1500000, \u0027Mexico\u0027); CREATE TABLE network_upgrades (upgrade_id int, upgrade_date date, investment_id int); INSERT INTO network_upgrades (upgrade_id, upgrade_date, investment_id) VALUES (1, \u00272021-01-01\u0027, 1), (2, \u00272021-02-01\u0027, 2), (3, \u00272021-03-01\u0027, 3);", + "sql": "SELECT country, COUNT(*) as num_investments FROM network_investments GROUP BY country;", + "sql_explanation": "The SQL query calculates the total number of network infrastructure investments in each country by using the GROUP BY clause to group the data by country and the COUNT function to count the number of investments in each group (country). The query does not require a join with any other table, as all the necessary information is contained in the network_investments table." +}, { + "id": "4644", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average data usage for each mobile plan in the \u0027subscriber_data\u0027 table, grouped by plan type?", + "sql_context": "CREATE TABLE subscriber_data (subscriber_id INT, plan_type VARCHAR(20), data_usage FLOAT); INSERT INTO subscriber_data VALUES (1, \u0027Basic\u0027, 2.5), (2, \u0027Premium\u0027, 4.7), (3, \u0027Basic\u0027, 3.2);", + "sql": "SELECT plan_type, AVG(data_usage) FROM subscriber_data GROUP BY plan_type;", + "sql_explanation": "The SQL query calculates the average data usage for each mobile plan by grouping the \u0027subscriber_data\u0027 table by \u0027plan_type\u0027 and applying the AVG function." +}, { + "id": "5232", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many mobile subscribers are there in each state?", + "sql_context": "CREATE TABLE mobile_subscribers (subscriber_id INT, state VARCHAR(20)); INSERT INTO mobile_subscribers (subscriber_id, state) VALUES (1, \u0027NY\u0027), (2, \u0027NY\u0027), (3, \u0027NJ\u0027), (4, \u0027CA\u0027), (5, \u0027CA\u0027);", + "sql": "SELECT state, COUNT(*) FROM mobile_subscribers GROUP BY state;", + "sql_explanation": "This query uses the COUNT aggregate function along with the GROUP BY clause to group the results by the state column and count the number of rows in each group." +}, { + "id": "682", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which defense contractors have been involved in contracts with a value greater than $75M, along with their respective headquarters and the total value of contracts they have participated in, ordered by the total contract value in descending order?", + "sql_context": "CREATE TABLE Contractors (contractor_id INT, contractor_name VARCHAR(50), contractor_HQ VARCHAR(50), contract_value FLOAT); INSERT INTO Contractors (contractor_id, contractor_name, contractor_HQ, contract_value) VALUES (5, \u0027MegaDefense\u0027, \u0027USA\u0027, 80000000), (6, \u0027SecureCorp\u0027, \u0027UK\u0027, 100000000), (7, \u0027ShieldTech\u0027, \u0027Israel\u0027, 78000000), (8, \u0027WarriorSystems\u0027, \u0027India\u0027, 95000000);", + "sql": "SELECT contractor_name, contractor_HQ, SUM(contract_value) AS TotalContractValue FROM Contractors WHERE contract_value \u003e 75000000 GROUP BY contractor_name, contractor_HQ ORDER BY TotalContractValue DESC;", + "sql_explanation": "This query lists defense contractors involved in contracts with a value greater than $75M, along with their headquarters and the total value of contracts they have participated in, ordered by the total contract value in descending order." +}, { + "id": "827", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total military equipment sale value to India, grouped by the year and quarter of the sale date?", + "sql_context": "CREATE TABLE Military_Equipment_Sales (sale_date DATE, equipment_name VARCHAR(50), customer_country VARCHAR(50), sale_value INT); INSERT INTO Military_Equipment_Sales (sale_date, equipment_name, customer_country, sale_value) VALUES (\u00272019-01-01\u0027, \u0027Tank A\u0027, \u0027India\u0027, 3000000); INSERT INTO Military_Equipment_Sales (sale_date, equipment_name, customer_country, sale_value) VALUES (\u00272021-04-01\u0027, \u0027Helicopter B\u0027, \u0027India\u0027, 8000000);", + "sql": "SELECT EXTRACT(YEAR FROM sale_date) AS year, EXTRACT(QUARTER FROM sale_date) AS quarter, SUM(sale_value) FROM Military_Equipment_Sales WHERE customer_country \u003d \u0027India\u0027 GROUP BY year, quarter;", + "sql_explanation": "This query calculates the total military equipment sale value to India for each year and quarter by extracting the year and quarter from the sale_date, grouping by year and quarter, and summing the sale_value." +}, { + "id": "1297", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum contract negotiation duration for military equipment sales to Mexico?", + "sql_context": "CREATE TABLE Contract_Negotiations (contract_id INT, equipment_name VARCHAR(50), customer_country VARCHAR(50), negotiation_date DATE, negotiation_duration INT); INSERT INTO Contract_Negotiations (contract_id, equipment_name, customer_country, negotiation_date, negotiation_duration) VALUES (1, \u0027Tank A\u0027, \u0027Mexico\u0027, \u00272020-01-01\u0027, 30); INSERT INTO Contract_Negotiations (contract_id, equipment_name, customer_country, negotiation_date, negotiation_duration) VALUES (2, \u0027Helicopter B\u0027, \u0027Mexico\u0027, \u00272021-01-01\u0027, 50);", + "sql": "SELECT customer_country, MIN(negotiation_duration) AS min_negotiation_duration FROM Contract_Negotiations WHERE customer_country \u003d \u0027Mexico\u0027 GROUP BY customer_country;", + "sql_explanation": "This query calculates the minimum contract negotiation duration for military equipment sales to Mexico. It groups the data by customer_country and then calculates the minimum negotiation_duration within each group." +}, { + "id": "2092", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of defense projects in each country and their average duration?", + "sql_context": "CREATE TABLE defense_projects(id INT, project_name VARCHAR(50), start_date DATE, end_date DATE, status VARCHAR(20), country VARCHAR(20));", + "sql": "SELECT country, AVG(DATEDIFF(end_date, start_date)) AS avg_duration, COUNT(*) AS total_projects FROM defense_projects GROUP BY country;", + "sql_explanation": "This SQL query retrieves the total number of defense projects in each country and their average duration by selecting the country, average duration (calculated using the DATEDIFF function), and count of projects from the defense_projects table, grouped by country." +}, { + "id": "2105", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many defense projects were initiated in 2019, grouped by their geopolitical region?", + "sql_context": "CREATE TABLE Defense_Projects(id INT, project_name VARCHAR(255), start_year INT, end_year INT, geopolitical_region VARCHAR(255));", + "sql": "SELECT geopolitical_region, COUNT(*) as Number_Of_Projects FROM Defense_Projects WHERE start_year \u003d 2019 GROUP BY geopolitical_region;", + "sql_explanation": "This query calculates the number of defense projects initiated in 2019 by grouping the data by the geopolitical region. It counts the number of projects in each geopolitical region." +}, { + "id": "2120", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of military equipment sales by month in 2020 and display the result in a YYYY-MM format.", + "sql_context": "CREATE TABLE MonthlySales (sale_id INT, equipment_type VARCHAR(50), sale_value FLOAT, sale_date DATE); INSERT INTO MonthlySales (sale_id, equipment_type, sale_value, sale_date) VALUES (1, \u0027Tanks\u0027, 15000000, \u00272020-01-15\u0027), (2, \u0027Fighter Jets\u0027, 50000000, \u00272020-03-20\u0027), (3, \u0027Armored Vehicles\u0027, 7000000, \u00272020-01-05\u0027), (4, \u0027Helicopters\u0027, 12000000, \u00272020-08-01\u0027);", + "sql": "SELECT DATE_FORMAT(sale_date, \u0027%Y-%m\u0027) AS Month, COUNT(*) AS SalesCount FROM MonthlySales WHERE YEAR(sale_date) \u003d 2020 GROUP BY Month;", + "sql_explanation": "This query counts the number of military equipment sales by month in 2020, displaying the result in the YYYY-MM format." +}, { + "id": "2141", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the defense project timelines that had a start date in Q1 2022, grouped by project status.", + "sql_context": "CREATE TABLE defense_projects (id INT, project_name VARCHAR, project_status VARCHAR, start_date DATE, end_date DATE); INSERT INTO defense_projects (id, project_name, project_status, start_date, end_date) VALUES (1, \u0027Project G\u0027, \u0027In Progress\u0027, \u00272022-01-05\u0027, \u00272023-06-30\u0027); INSERT INTO defense_projects (id, project_name, project_status, start_date, end_date) VALUES (2, \u0027Project H\u0027, \u0027Completed\u0027, \u00272022-03-18\u0027, \u00272022-05-05\u0027); INSERT INTO defense_projects (id, project_name, project_status, start_date, end_date) VALUES (3, \u0027Project I\u0027, \u0027On Hold\u0027, \u00272022-02-20\u0027, \u00272022-12-22\u0027);", + "sql": "SELECT project_status, COUNT(*) FROM defense_projects WHERE start_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-03-31\u0027 GROUP BY project_status;", + "sql_explanation": "This query lists the defense project timelines that had a start date in Q1 2022, grouped by project status by selecting the project_status and using the COUNT function to count the number of records for each project_status. The WHERE clause filters for start_dates within the specified range." +}, { + "id": "3428", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of military equipment sales to \u0027Brazil\u0027 for each equipment type from the \u0027sales\u0027 table", + "sql_context": "CREATE TABLE sales (id INT, equipment_type VARCHAR(255), sale_date DATE, quantity INT, country VARCHAR(255)); INSERT INTO sales (id, equipment_type, sale_date, quantity, country) VALUES (1, \u0027tank\u0027, \u00272019-07-15\u0027, 5, \u0027US\u0027); INSERT INTO sales (id, equipment_type, sale_date, quantity, country) VALUES (2, \u0027fighter_jet\u0027, \u00272020-11-27\u0027, 12, \u0027UK\u0027); INSERT INTO sales (id, equipment_type, sale_date, quantity, country) VALUES (4, \u0027radar\u0027, \u00272021-02-03\u0027, 2, \u0027Brazil\u0027);", + "sql": "SELECT equipment_type, SUM(quantity) FROM sales WHERE country \u003d \u0027Brazil\u0027 GROUP BY equipment_type;", + "sql_explanation": "This query shows the number of military equipment sales to \u0027Brazil\u0027 for each equipment type by grouping the \u0027equipment_type\u0027 and summing the \u0027quantity\u0027 column where the \u0027country\u0027 is \u0027Brazil\u0027." +}, { + "id": "3606", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of military equipment sold by each seller and the total quantity sold, ordered by the total quantity sold in descending order?", + "sql_context": "CREATE TABLE military_sales (id INT PRIMARY KEY, seller VARCHAR(255), buyer VARCHAR(255), equipment_type VARCHAR(255), quantity INT);", + "sql": "SELECT seller, SUM(quantity) FROM military_sales GROUP BY seller ORDER BY SUM(quantity) DESC;", + "sql_explanation": "The query groups the military sales by the seller and calculates the total quantity sold, and orders them by the total quantity sold in descending order." +}, { + "id": "5116", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average contract value for each salesperson?", + "sql_context": "CREATE TABLE salesperson (id INT, name VARCHAR(255), contract_value INT); INSERT INTO salesperson (id, name, contract_value) VALUES (1, \u0027John Doe\u0027, 500000), (2, \u0027Jane Smith\u0027, 600000), (3, \u0027Bob Johnson\u0027, 400000), (4, \u0027Alice Williams\u0027, 700000), (5, \u0027Charlie Brown\u0027, 800000);", + "sql": "SELECT name, AVG(contract_value) FROM salesperson GROUP BY name;", + "sql_explanation": "The SQL query calculates the average contract value for each salesperson by using the AVG function to find the average contract value and the GROUP BY clause to group the results by salesperson name." +}, { + "id": "518", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of disability complaints by type and resolution status?", + "sql_context": "CREATE TABLE Disability_Complaints (id INT, complaint_id INT, complaint_type VARCHAR(50), resolution_status VARCHAR(50)); INSERT INTO Disability_Complaints (id, complaint_id, complaint_type, resolution_status) VALUES (1, 3001, \u0027Accessibility\u0027, \u0027Resolved\u0027), (2, 3002, \u0027Discrimination\u0027, \u0027Unresolved\u0027);", + "sql": "SELECT Disability_Complaints.complaint_type, Disability_Complaints.resolution_status, COUNT(*) as total FROM Disability_Complaints GROUP BY Disability_Complaints.complaint_type, Disability_Complaints.resolution_status;", + "sql_explanation": "This SQL query counts the total number of disability complaints by type and resolution status. It uses the GROUP BY clause to group the results by complaint type and resolution status. Then, the COUNT function is used to count the number of complaints for each group." +}, { + "id": "851", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many students with visual impairments have received accommodations in the last year, broken down by country?", + "sql_context": "CREATE TABLE student_accommodations (student_id INT, disability_type VARCHAR(255), country VARCHAR(255), date DATE); INSERT INTO student_accommodations (student_id, disability_type, country, date) VALUES (1, \u0027Visual Impairment\u0027, \u0027USA\u0027, \u00272021-03-22\u0027); INSERT INTO student_accommodations (student_id, disability_type, country, date) VALUES (2, \u0027Visual Impairment\u0027, \u0027Canada\u0027, \u00272021-04-01\u0027);", + "sql": "SELECT country, COUNT(*) as num_students FROM student_accommodations WHERE disability_type \u003d \u0027Visual Impairment\u0027 AND date BETWEEN DATE_SUB(NOW(), INTERVAL 1 YEAR) AND NOW() GROUP BY country;", + "sql_explanation": "This query selects the country and COUNT(*) as num_students from the student_accommodations table where the disability_type is \u0027Visual Impairment\u0027 and the date is within the last year. It groups the results by country and calculates the number of students with visual impairments who have received accommodations during this time period, broken down by country." +}, { + "id": "1177", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the number of students with visual impairments who received accommodations in each state in 2022.", + "sql_context": "CREATE TABLE Accommodations (student_id INT, accommodation_type VARCHAR(50), state VARCHAR(50), request_date DATE, visual_impairment BOOLEAN);", + "sql": "SELECT state, COUNT(DISTINCT student_id) FROM Accommodations WHERE request_date \u003e\u003d \u00272022-01-01\u0027 AND request_date \u003c \u00272023-01-01\u0027 AND visual_impairment \u003d TRUE GROUP BY state;", + "sql_explanation": "This query creates a table named \u0027Accommodations\u0027 with 5 columns: student_id, accommodation_type, state, request_date, and visual_impairment. Then it inserts a sample record for a student with visual impairment who received an accommodation in a state in 2022 in the \u0027Accommodations\u0027 table. The query then counts the total number of students with visual impairments who received accommodations in each state in 2022 by performing a group by operation on the \u0027Accommodations\u0027 table based on the \u0027state\u0027 column and filtering the data based on the given conditions." +}, { + "id": "1190", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of disability accommodations provided per individual with a disability in each region for the past year?", + "sql_context": "CREATE TABLE Disability_Accommodations (id INT, individual_id INT, region VARCHAR(50), accommodation_count INT, accommodation_date DATE);", + "sql": "SELECT region, AVG(accommodation_count) as avg_accommodation_count FROM Disability_Accommodations WHERE accommodation_date \u003e\u003d DATEADD(year, -1, GETDATE()) GROUP BY region;", + "sql_explanation": "The SQL query calculates the average number of disability accommodations provided per individual with a disability in each region for the past year by filtering the Disability_Accommodations table using the accommodation_date. It then groups the results by region and calculates the average number of accommodations provided." +}, { + "id": "1306", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of students with hearing impairments who utilized sign language interpreters in 2020?", + "sql_context": "CREATE TABLE hearing_impairments (student_id INT, student_name TEXT, accommodation_type TEXT, accommodation_year INT); INSERT INTO hearing_impairments (student_id, student_name, accommodation_type, accommodation_year) VALUES (1, \u0027Hannah\u0027, \u0027Sign Language Interpreter\u0027, 2020), (2, \u0027Jacob\u0027, \u0027Assistive Listening Device\u0027, 2020), (3, \u0027Katie\u0027, \u0027Sign Language Interpreter\u0027, 2020), (4, \u0027Liam\u0027, \u0027Note Taker\u0027, 2020);", + "sql": "SELECT accommodation_year, COUNT(*) AS interpreters_used FROM hearing_impairments WHERE accommodation_type \u003d \u0027Sign Language Interpreter\u0027 GROUP BY accommodation_year;", + "sql_explanation": "This query calculates the number of students with hearing impairments who utilized sign language interpreters in 2020. It does so by using the WHERE clause to filter the hearing_impairments table to only include students who used sign language interpreters. Then, it uses the GROUP BY clause to group the results by accommodation_year, and the COUNT function to count the number of sign language interpreter accommodations." +}, { + "id": "1328", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the ratio of individuals with hearing impairments to those with visual impairments in each location?", + "sql_context": "CREATE TABLE Individuals (id INT, impairment TEXT, location TEXT); INSERT INTO Individuals (id, impairment, location) VALUES (1, \u0027Visual\u0027, \u0027Texas\u0027), (2, \u0027Hearing\u0027, \u0027Texas\u0027), (3, \u0027Visual\u0027, \u0027California\u0027), (4, \u0027Hearing\u0027, \u0027California\u0027);", + "sql": "SELECT location, COUNT(CASE WHEN impairment \u003d \u0027Hearing\u0027 THEN 1 END) / COUNT(CASE WHEN impairment \u003d \u0027Visual\u0027 THEN 1 END) AS ratio FROM Individuals GROUP BY location;", + "sql_explanation": "This query calculates the ratio of individuals with hearing impairments to those with visual impairments in each location by grouping the \u0027Individuals\u0027 table by the \u0027location\u0027 column and then calculating the ratio of individuals with hearing impairments to those with visual impairments using conditional aggregation." +}, { + "id": "1468", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the breakdown of accommodations provided for students with autism spectrum disorder in SchoolD?", + "sql_context": "CREATE TABLE Accommodations (Student VARCHAR(255), School VARCHAR(255), Accommodation VARCHAR(255)); INSERT INTO Accommodations (Student, School, Accommodation) VALUES (\u0027Student1\u0027, \u0027SchoolD\u0027, \u0027Visual Schedule\u0027), (\u0027Student2\u0027, \u0027SchoolD\u0027, \u0027Social Story\u0027), (\u0027Student3\u0027, \u0027SchoolD\u0027, \u0027Sensory Toolbox\u0027);", + "sql": "SELECT Accommodation, COUNT(*) as CountOfAccommodations FROM Accommodations WHERE School \u003d \u0027SchoolD\u0027 AND Accommodation LIKE \u0027%Autism%\u0027 GROUP BY Accommodation;", + "sql_explanation": "This query calculates the breakdown of accommodations provided for students with autism spectrum disorder in SchoolD. It filters accommodations based on the school \u0027SchoolD\u0027 and the keyword \u0027Autism\u0027, groups the results by accommodation, and calculates the number of accommodations for each accommodation using the COUNT function." +}, { + "id": "1633", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum, minimum, and average budget for support programs by disability type?", + "sql_context": "CREATE TABLE support_programs (program_id INT, program_name VARCHAR(50), budget INT, disability_type VARCHAR(50)); INSERT INTO support_programs (program_id, program_name, budget, disability_type) VALUES (1, \u0027Accessible Technology\u0027, 75000, \u0027Visual\u0027);", + "sql": "SELECT disability_type, MAX(budget) as max_budget, MIN(budget) as min_budget, AVG(budget) as avg_budget FROM support_programs GROUP BY disability_type;", + "sql_explanation": "This query groups the records in the support_programs table by the disability_type column and calculates the maximum, minimum, and average budget for support programs in each disability type, returning the maximum, minimum, and average budget for support programs by disability type." +}, { + "id": "1942", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of accommodations provided for students with learning disabilities in each school?", + "sql_context": "CREATE TABLE Accommodations (SchoolName VARCHAR(255), Student VARCHAR(255), Accommodation VARCHAR(255)); INSERT INTO Accommodations (SchoolName, Student, Accommodation) VALUES (\u0027SchoolA\u0027, \u0027Student1\u0027, \u0027Extra Time\u0027), (\u0027SchoolA\u0027, \u0027Student2\u0027, \u0027Reader\u0027), (\u0027SchoolB\u0027, \u0027Student3\u0027, \u0027Extra Time\u0027);", + "sql": "SELECT SchoolName, COUNT(*) as TotalAccommodations FROM Accommodations WHERE Accommodation LIKE \u0027%Learning Disability%\u0027 GROUP BY SchoolName;", + "sql_explanation": "This query calculates the total number of accommodations provided for students with learning disabilities in each school. It filters accommodations based on the keyword \u0027Learning Disability\u0027 and groups the results by school name, then counts the number of accommodations for each school using the COUNT function." +}, { + "id": "2295", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of accommodations provided in Australia by accommodation type?", + "sql_context": "CREATE TABLE accommodations (id INT, country VARCHAR(255), region VARCHAR(255), accommodation_type VARCHAR(255), count INT); INSERT INTO accommodations (id, country, region, accommodation_type, count) VALUES (1, \u0027Australia\u0027, \u0027Northern\u0027, \u0027Braille Materials\u0027, 150); INSERT INTO accommodations (id, country, region, accommodation_type, count) VALUES (2, \u0027Australia\u0027, \u0027Southern\u0027, \u0027Accessible Furniture\u0027, 250);", + "sql": "SELECT accommodation_type, SUM(count) as total_count FROM accommodations WHERE country \u003d \u0027Australia\u0027 GROUP BY accommodation_type;", + "sql_explanation": "This SQL query calculates the total number of accommodations provided in Australia by accommodation type. It filters the accommodations table to only include rows where the country is \u0027Australia\u0027, then groups the results by accommodation_type and calculates the sum of the count column for each group." +}, { + "id": "2399", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many students with disabilities are enrolled in each university department?", + "sql_context": "CREATE TABLE Student_Enrollment (Student_ID INT, Student_Name VARCHAR(50), Department VARCHAR(50), Disability_Status VARCHAR(10));", + "sql": "SELECT Department, COUNT(*) as Enrolled_Students FROM Student_Enrollment WHERE Disability_Status \u003d \u0027Yes\u0027 GROUP BY Department;", + "sql_explanation": "This SQL query counts the number of students with disabilities enrolled in each university department. It uses the COUNT function to count the number of students and the GROUP BY clause to group the results by department." +}, { + "id": "3037", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of disability advocacy events held in each city and the total cost for each city.", + "sql_context": "CREATE TABLE disability_advocacy_events (event_id INT, city VARCHAR(255), date DATE, cost INT); INSERT INTO disability_advocacy_events (event_id, city, date, cost) VALUES (1, \u0027New York\u0027, \u00272021-03-22\u0027, 5000); INSERT INTO disability_advocacy_events (event_id, city, date, cost) VALUES (2, \u0027Los Angeles\u0027, \u00272021-04-01\u0027, 7000);", + "sql": "SELECT city, COUNT(*) as num_events, SUM(cost) as total_cost FROM disability_advocacy_events GROUP BY city;", + "sql_explanation": "This query selects the city, COUNT(*) as num_events, and SUM(cost) as total_cost from the disability_advocacy_events table and groups the results by city. This gives the number of disability advocacy events held in each city and the total cost for each city." +}, { + "id": "3765", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the number of support programs offered by each department, excluding any duplicate program names.", + "sql_context": "CREATE TABLE support_programs (program_name VARCHAR(50), department VARCHAR(50)); INSERT INTO support_programs VALUES (\u0027Tutoring\u0027, \u0027Science\u0027), (\u0027Writing Center\u0027, \u0027English\u0027), (\u0027Tutoring\u0027, \u0027Science\u0027), (\u0027Accessibility Services\u0027, \u0027General\u0027);", + "sql": "SELECT department, COUNT(DISTINCT program_name) FROM support_programs GROUP BY department;", + "sql_explanation": "The query uses the COUNT(DISTINCT) function to find the number of unique support programs offered by each department, excluding any duplicate program names." +}, { + "id": "3883", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum cost of accommodations in the AssistiveTechnology table for each accommodation type?", + "sql_context": "CREATE TABLE AssistiveTechnology (studentID INT, accommodationType VARCHAR(50), cost DECIMAL(5,2));", + "sql": "SELECT accommodationType, MAX(cost) FROM AssistiveTechnology GROUP BY accommodationType;", + "sql_explanation": "This query calculates the maximum cost value in the AssistiveTechnology table for each accommodationType by summing up the cost column values for each accommodationType in the table and selecting the maximum value." +}, { + "id": "4228", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many students with disabilities are enrolled in each program in the Midwest?", + "sql_context": "CREATE TABLE Students (ID INT, Name VARCHAR(50), Disability VARCHAR(50), Program VARCHAR(50), Region VARCHAR(50)); INSERT INTO Students (ID, Name, Disability, Program, Region) VALUES (1, \u0027Jane Doe\u0027, \u0027Visual Impairment\u0027, \u0027Braille Literacy\u0027, \u0027Midwest\u0027), (2, \u0027John Doe\u0027, \u0027Hearing Loss\u0027, \u0027Sign Language\u0027, \u0027Midwest\u0027);", + "sql": "SELECT Program, COUNT(*) FROM Students WHERE Region \u003d \u0027Midwest\u0027 GROUP BY Program;", + "sql_explanation": "This SQL query counts the number of students with disabilities enrolled in each program in the Midwest by selecting the Program column and using the COUNT function to count the number of rows in the Students table for each unique value in the Program column, filtering the rows with the WHERE clause to only include records where the Region is \u0027Midwest\u0027. The results are grouped by the Program column using the GROUP BY clause." +}, { + "id": "5105", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average budget for support programs in each region", + "sql_context": "CREATE TABLE support_programs (program_id INT, program_name VARCHAR(30), budget DECIMAL(10,2), region VARCHAR(20)); INSERT INTO support_programs (program_id, program_name, budget, region) VALUES (1, \u0027Mobility Support\u0027, 25000, \u0027North\u0027), (2, \u0027Assistive Technology\u0027, 30000, \u0027South\u0027), (3, \u0027Note Taking\u0027, 15000, \u0027East\u0027), (4, \u0027Diversity Training\u0027, 40000, \u0027West\u0027);", + "sql": "SELECT region, AVG(budget) FROM support_programs GROUP BY region;", + "sql_explanation": "This query groups the support_programs table by the region column and calculates the average budget in each group." +}, { + "id": "5167", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of inclusion efforts in each region", + "sql_context": "CREATE TABLE inclusion_efforts (effort_id INT, effort_name VARCHAR(30), region VARCHAR(20)); INSERT INTO inclusion_efforts (effort_id, effort_name, region) VALUES (1, \u0027Accessible Buildings\u0027, \u0027North\u0027), (2, \u0027Diversity Training\u0027, \u0027South\u0027), (3, \u0027Inclusive Hiring\u0027, \u0027East\u0027), (4, \u0027Community Outreach\u0027, \u0027West\u0027);", + "sql": "SELECT region, COUNT(*) FROM inclusion_efforts GROUP BY region;", + "sql_explanation": "This query groups the inclusion_efforts table by the region column and counts the number of rows in each group." +}, { + "id": "1470", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total budget allocated for public transportation in 2019 and 2020, and which year had a lower allocation?", + "sql_context": "CREATE TABLE BudgetAllocations (Year INT, Service TEXT, Amount INT); INSERT INTO BudgetAllocations (Year, Service, Amount) VALUES (2019, \u0027PublicTransportation\u0027, 14000000), (2020, \u0027PublicTransportation\u0027, 15000000);", + "sql": "SELECT Year, SUM(Amount) FROM BudgetAllocations WHERE Service \u003d \u0027PublicTransportation\u0027 GROUP BY Year HAVING Year IN (2019, 2020) ORDER BY SUM(Amount) LIMIT 1;", + "sql_explanation": "This query calculates the total budget allocated for public transportation in 2019 and 2020, and identifies the year with the lower allocation by grouping by Year and Service, filtering for the specified years, and ordering by the sum of the Amount column in ascending order, then returning the first row." +}, { + "id": "1500", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the number of public events organized by the department of community services in the city of Toronto.", + "sql_context": "CREATE SCHEMA gov_data;CREATE TABLE gov_data.public_events (city VARCHAR(20), department VARCHAR(20), events INT); INSERT INTO gov_data.public_events (city, department, events) VALUES (\u0027Toronto\u0027, \u0027Community Services\u0027, 25), (\u0027Toronto\u0027, \u0027Parks\u0027, 15), (\u0027Toronto\u0027, \u0027Public Libraries\u0027, 10);", + "sql": "SELECT department, SUM(events) as total_events FROM gov_data.public_events WHERE city \u003d \u0027Toronto\u0027 AND department \u003d \u0027Community Services\u0027 GROUP BY department;", + "sql_explanation": "This query identifies the number of public events organized by the department of community services in the city of Toronto by grouping the public_events table by department and summing up the events values for the Community Services department in Toronto." +}, { + "id": "1526", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average budget allocation for education and health services in the year 2020, per region?", + "sql_context": "CREATE TABLE BudgetAllocations (Year INT, Service VARCHAR(255), Region VARCHAR(255), Allocation FLOAT); INSERT INTO BudgetAllocations (Year, Service, Region, Allocation) VALUES (2020, \u0027Education\u0027, \u0027North\u0027, 5000000), (2020, \u0027Health\u0027, \u0027North\u0027, 7000000), (2020, \u0027Education\u0027, \u0027South\u0027, 6000000), (2020, \u0027Health\u0027, \u0027South\u0027, 8000000);", + "sql": "SELECT AVG(Allocation) AS AvgAllocation, Region FROM BudgetAllocations WHERE Year \u003d 2020 AND (Service \u003d \u0027Education\u0027 OR Service \u003d \u0027Health\u0027) GROUP BY Region;", + "sql_explanation": "This query calculates the average budget allocation for Education and Health services in 2020, grouped by region. It filters the BudgetAllocations table for the specified year and services, calculates the average allocation, and groups the results by region." +}, { + "id": "1968", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average response time for public service requests in urban and rural areas of Japan, for the last quarter?", + "sql_context": "CREATE TABLE ResponseTimes (Area VARCHAR(50), ResponseTime DECIMAL(3,2), RequestDate DATE); INSERT INTO ResponseTimes (Area, ResponseTime, RequestDate) VALUES (\u0027Urban\u0027, 2.50, \u00272022-04-01\u0027), (\u0027Urban\u0027, 2.75, \u00272022-04-02\u0027), (\u0027Rural\u0027, 3.25, \u00272022-04-01\u0027), (\u0027Rural\u0027, 3.00, \u00272022-04-02\u0027);", + "sql": "SELECT Area, AVG(ResponseTime) as AvgResponseTime FROM ResponseTimes WHERE RequestDate \u003e\u003d DATEADD(quarter, -1, CURRENT_DATE) GROUP BY Area;", + "sql_explanation": "This SQL query calculates the average response time for public service requests in urban and rural areas of Japan for the last quarter. It averages the \u0027ResponseTime\u0027 column grouped by the \u0027Area\u0027 column, filtering the rows where the \u0027RequestDate\u0027 is within the last quarter from the current date." +}, { + "id": "2084", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total budget for public transportation in each city?", + "sql_context": "CREATE TABLE City_Transport_Budget (City VARCHAR(255), Department VARCHAR(255), Budget INT); INSERT INTO City_Transport_Budget (City, Department, Budget) VALUES (\u0027New York\u0027, \u0027Public Transportation\u0027, 200000000); INSERT INTO City_Transport_Budget (City, Department, Budget) VALUES (\u0027Los Angeles\u0027, \u0027Public Transportation\u0027, 100000000);", + "sql": "SELECT City, SUM(Budget) AS Total_Transport_Budget FROM City_Transport_Budget WHERE Department \u003d \u0027Public Transportation\u0027 GROUP BY City;", + "sql_explanation": "This SQL query calculates the total budget for public transportation in each city by summing up the budget for the Public Transportation department in each city." +}, { + "id": "2459", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average budget allocated to each department in 2023?", + "sql_context": "CREATE TABLE department (id INT, name TEXT, budget INT, created_at DATETIME); INSERT INTO department (id, name, budget, created_at) VALUES (1, \u0027education\u0027, 500000, \u00272021-01-01\u0027), (2, \u0027healthcare\u0027, 1000000, \u00272022-01-01\u0027);", + "sql": "SELECT name, AVG(budget) as avg_budget FROM department WHERE created_at BETWEEN \u00272023-01-01\u0027 AND \u00272023-12-31\u0027 GROUP BY name;", + "sql_explanation": "Calculate the average budget allocated to each department in 2023 by filtering records between \u00272023-01-01\u0027 and \u00272023-12-31\u0027, grouping departments by name, and finding the average budget for each group." +}, { + "id": "2699", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average response time to citizen complaints per day, with the fastest response time first?", + "sql_context": "CREATE TABLE Daily_Response(Day DATE, Response_Time INT); INSERT INTO Daily_Response VALUES (\u00272022-01-01\u0027, 2), (\u00272022-01-01\u0027, 5), (\u00272022-01-02\u0027, 3), (\u00272022-01-03\u0027, 4), (\u00272022-01-03\u0027, 6);", + "sql": "SELECT Day, AVG(Response_Time) as Avg_Response_Time FROM Daily_Response GROUP BY Day ORDER BY Avg_Response_Time ASC;", + "sql_explanation": "The SQL query calculates the average response time to citizen complaints per day. It groups the data by day and then calculates the average response time for each group. Finally, it orders the results by the average response time in ascending order." +}, { + "id": "2902", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of public hospitals in cities with a population greater than 1 million?", + "sql_context": "CREATE TABLE City (id INT, name VARCHAR(50), population INT, num_hospitals INT); INSERT INTO City (id, name, population, num_hospitals) VALUES (1, \u0027Mumbai\u0027, 20411274, 50); INSERT INTO City (id, name, population, num_hospitals) VALUES (2, \u0027SÃŖo Paulo\u0027, 21846507, 75); INSERT INTO City (id, name, population, num_hospitals) VALUES (3, \u0027Seoul\u0027, 9733509, 35);", + "sql": "SELECT name, SUM(num_hospitals) as \u0027Total Public Hospitals\u0027 FROM City WHERE population \u003e 1000000 GROUP BY name;", + "sql_explanation": "Sum the total number of public hospitals for cities with a population greater than 1 million." +}, { + "id": "3171", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of park vendors in CityC?", + "sql_context": "CREATE TABLE Vendor(City VARCHAR(20), Department VARCHAR(20), Vendor_Count INT); INSERT INTO Vendor VALUES(\u0027CityA\u0027, \u0027Parks\u0027, 3), (\u0027CityA\u0027, \u0027Education\u0027, 5), (\u0027CityB\u0027, \u0027Parks\u0027, 2), (\u0027CityB\u0027, \u0027Education\u0027, 4), (\u0027CityC\u0027, \u0027Parks\u0027, 1);", + "sql": "SELECT City, AVG(Vendor_Count) FROM Vendor WHERE Department \u003d \u0027Parks\u0027 AND City \u003d \u0027CityC\u0027 GROUP BY City;", + "sql_explanation": "Calculate the average number of park vendors in CityC." +}, { + "id": "3404", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average citizen satisfaction score for public services in each district of City X in 2022?", + "sql_context": "CREATE TABLE Satisfaction (Year INT, District VARCHAR(255), Score FLOAT); INSERT INTO Satisfaction (Year, District, Score) VALUES (2022, \u0027District A\u0027, 4.2); INSERT INTO Satisfaction (Year, District, Score) VALUES (2022, \u0027District B\u0027, 4.5); INSERT INTO Satisfaction (Year, District, Score) VALUES (2022, \u0027District C\u0027, 4.3);", + "sql": "SELECT District, AVG(Score) as AverageScore FROM Satisfaction WHERE Year \u003d 2022 GROUP BY District;", + "sql_explanation": "The SQL query calculates the average citizen satisfaction score for public services in each district of City X in 2022. It averages the scores for each district and groups the result by district." +}, { + "id": "3626", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total budget for transportation in each city?", + "sql_context": "CREATE TABLE City_Budget(City VARCHAR(20), Department VARCHAR(20), Budget INT); INSERT INTO City_Budget(City, Department, Budget) VALUES(\u0027Toronto\u0027, \u0027Parks\u0027, 25000000); INSERT INTO City_Budget(City, Department, Budget) VALUES(\u0027Toronto\u0027, \u0027Transportation\u0027, 50000000); INSERT INTO City_Budget(City, Department, Budget) VALUES(\u0027Montreal\u0027, \u0027Parks\u0027, 18000000); INSERT INTO City_Budget(City, Department, Budget) VALUES(\u0027Montreal\u0027, \u0027Transportation\u0027, 42000000);", + "sql": "SELECT City, SUM(Budget) FROM City_Budget WHERE Department \u003d \u0027Transportation\u0027 GROUP BY City;", + "sql_explanation": "This query calculates the total budget for transportation in each city. It filters the City_Budget table for the \u0027Transportation\u0027 department, groups the results by city, and calculates the sum of budgets for each group." +}, { + "id": "3753", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum number of citizens\u0027 feedback forms submitted to each city council in the \u0027Rural\u0027 region?", + "sql_context": "CREATE SCHEMA Government;CREATE TABLE Government.Region (name VARCHAR(255), budget INT);CREATE TABLE Government.City (name VARCHAR(255), region VARCHAR(255), feedbacks INT);", + "sql": "SELECT region, MIN(feedbacks) FROM Government.City WHERE region \u003d \u0027Rural\u0027 GROUP BY region;", + "sql_explanation": "We select the minimum number of citizens\u0027 feedback forms submitted to each city council in the \u0027Rural\u0027 region." +}, { + "id": "3820", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average budget allocated per service category in the Education department?", + "sql_context": "CREATE TABLE EducationBudget (Department VARCHAR(25), Category VARCHAR(25), Budget INT); INSERT INTO EducationBudget (Department, Category, Budget) VALUES (\u0027Education\u0027, \u0027Elementary\u0027, 5000000), (\u0027Education\u0027, \u0027Secondary\u0027, 7000000), (\u0027Education\u0027, \u0027Higher Education\u0027, 8000000);", + "sql": "SELECT AVG(Budget) FROM EducationBudget WHERE Department \u003d \u0027Education\u0027 GROUP BY Category;", + "sql_explanation": "The SQL query calculates the average budget for each service category in the Education department. It groups the data by category and then calculates the average budget for each group." +}, { + "id": "4835", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the total number of hospitals in each city in the state of New York.", + "sql_context": "CREATE TABLE Hospitals (City VARCHAR(255), State VARCHAR(255), Type VARCHAR(255)); INSERT INTO Hospitals (City, State, Type) VALUES (\u0027New York\u0027, \u0027NY\u0027, \u0027Hospital\u0027), (\u0027Buffalo\u0027, \u0027NY\u0027, \u0027Hospital\u0027), (\u0027Rochester\u0027, \u0027NY\u0027, \u0027Hospital\u0027);", + "sql": "SELECT City, COUNT(*) FROM Hospitals WHERE State \u003d \u0027NY\u0027 GROUP BY City;", + "sql_explanation": "The SQL query displays the total number of hospitals in each city in the state of New York from the Hospitals table. It filters the records where State is equal to \u0027NY\u0027, then groups the records by city and calculates the count." +}, { + "id": "4900", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the policy impact of unemployment rates in rural areas compared to urban areas from 2018 to 2021?", + "sql_context": "CREATE TABLE Unemployment(Year INT, Location VARCHAR(10), Rate DECIMAL(4, 2)); INSERT INTO Unemployment VALUES (2018, \u0027Rural\u0027, 5.2), (2018, \u0027Urban\u0027, 4.8), (2019, \u0027Rural\u0027, 5.1), (2019, \u0027Urban\u0027, 4.7), (2020, \u0027Rural\u0027, 5.6), (2020, \u0027Urban\u0027, 5.1), (2021, \u0027Rural\u0027, 5.5), (2021, \u0027Urban\u0027, 4.9);", + "sql": "SELECT Year, AVG(Rate) FROM Unemployment GROUP BY Year ORDER BY Year;", + "sql_explanation": "This SQL query shows the policy impact of unemployment rates in rural areas compared to urban areas from 2018 to 2021 by grouping the data by Year and calculating the average unemployment Rate for each Year." +}, { + "id": "1477", + "domain": "media entertainment", + "domain_description": "Audience demographics, viewership trends, content ratings, production budgets, and marketing strategies for movies, TV shows, and music.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of production budgets for movies by release decade?", + "sql_context": "CREATE TABLE Movies (MovieId INT, Title VARCHAR(50), ReleaseYear INT, Genre VARCHAR(50), Budget INT); INSERT INTO Movies (MovieId, Title, ReleaseYear, Genre, Budget) VALUES (1, \u0027MovieA\u0027, 2010, \u0027Action\u0027, 10000000), (2, \u0027MovieB\u0027, 2015, \u0027Comedy\u0027, 5000000), (3, \u0027MovieC\u0027, 2020, \u0027Action\u0027, 20000000);", + "sql": "SELECT ROW_NUMBER() OVER(ORDER BY DATEPART( decade, ReleaseYear )) AS Decade, AVG(Budget) AS Avg_Budget FROM Movies GROUP BY DATEPART( decade, ReleaseYear );", + "sql_explanation": "This query calculates the average production budget for movies grouped by their release decade using the ROW_NUMBER and AVG functions." +}, { + "id": "2632", + "domain": "media entertainment", + "domain_description": "Audience demographics, viewership trends, content ratings, production budgets, and marketing strategies for movies, TV shows, and music.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get the top 2 countries with the most music streams in the last month.", + "sql_context": "CREATE TABLE music_streams (song VARCHAR(255), country VARCHAR(255), streams INT, stream_date DATE); INSERT INTO music_streams (song, country, streams, stream_date) VALUES (\u0027Song1\u0027, \u0027Country1\u0027, 100000, \u00272022-01-01\u0027), (\u0027Song2\u0027, \u0027Country2\u0027, 150000, \u00272022-01-02\u0027), (\u0027Song3\u0027, \u0027Country3\u0027, 120000, \u00272022-01-03\u0027), (\u0027Song4\u0027, \u0027Country1\u0027, 110000, \u00272022-01-04\u0027), (\u0027Song5\u0027, \u0027Country3\u0027, 130000, \u00272022-01-05\u0027); ALTER TABLE music_streams ADD CONSTRAINT chk_stream_date CHECK (stream_date \u003e\u003d DATEADD(month, -1, GETDATE()));", + "sql": "SELECT country, SUM(streams) as total_streams FROM music_streams GROUP BY country ORDER BY total_streams DESC LIMIT 2;", + "sql_explanation": "This query lists the top 2 countries with the most music streams in the last month by grouping the data by country, summing the streams, and sorting in descending order." +}, { + "id": "2962", + "domain": "media entertainment", + "domain_description": "Audience demographics, viewership trends, content ratings, production budgets, and marketing strategies for movies, TV shows, and music.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total revenue for music concerts, by city and month?", + "sql_context": "CREATE TABLE MusicConcerts (title VARCHAR(255), city VARCHAR(255), revenue FLOAT, concert_date DATE); INSERT INTO MusicConcerts (title, city, revenue, concert_date) VALUES (\u0027Concert1\u0027, \u0027NYC\u0027, 100000, \u00272022-01-01\u0027), (\u0027Concert2\u0027, \u0027LA\u0027, 120000, \u00272022-01-02\u0027), (\u0027Concert3\u0027, \u0027Chicago\u0027, 80000, \u00272022-01-03\u0027), (\u0027Concert4\u0027, \u0027NYC\u0027, 110000, \u00272022-02-01\u0027), (\u0027Concert5\u0027, \u0027LA\u0027, 130000, \u00272022-02-02\u0027);", + "sql": "SELECT city, DATE_PART(\u0027month\u0027, concert_date) as month, SUM(revenue) FROM MusicConcerts GROUP BY city, month;", + "sql_explanation": "This SQL query calculates the total revenue for music concerts, by city and month. It first extracts the month from the concert date, and then groups the data by city and month. Finally, it calculates the sum of revenue for each group." +}, { + "id": "3871", + "domain": "media entertainment", + "domain_description": "Audience demographics, viewership trends, content ratings, production budgets, and marketing strategies for movies, TV shows, and music.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many movies and TV shows were released per year, in which Latin American country?", + "sql_context": "CREATE TABLE MovieTVShows (title VARCHAR(255), release_year INT, country VARCHAR(255)); INSERT INTO MovieTVShows (title, release_year, country) VALUES (\u0027Movie1\u0027, 2018, \u0027Mexico\u0027), (\u0027TVShow1\u0027, 2019, \u0027Brazil\u0027), (\u0027Movie2\u0027, 2020, \u0027Argentina\u0027), (\u0027TVShow2\u0027, 2021, \u0027Colombia\u0027), (\u0027Movie3\u0027, 2018, \u0027Chile\u0027);", + "sql": "SELECT release_year, country, COUNT(*) FROM MovieTVShows GROUP BY release_year, country;", + "sql_explanation": "This SQL query calculates the number of movies and TV shows released per year, in each Latin American country. It first groups the data by release year and country, and then calculates the count of records for each group." +}, { + "id": "4247", + "domain": "media entertainment", + "domain_description": "Audience demographics, viewership trends, content ratings, production budgets, and marketing strategies for movies, TV shows, and music.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Number of music albums released by year?", + "sql_context": "CREATE TABLE MusicAlbums (AlbumID INT, Title VARCHAR(100), ReleaseYear INT, Genre VARCHAR(50));", + "sql": "SELECT ReleaseYear, COUNT(*) as Num_Albums FROM MusicAlbums GROUP BY ReleaseYear;", + "sql_explanation": "This query counts the number of music albums released by year." +}, { + "id": "4556", + "domain": "media entertainment", + "domain_description": "Audience demographics, viewership trends, content ratings, production budgets, and marketing strategies for movies, TV shows, and music.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average viewership for TV shows by network?", + "sql_context": "CREATE TABLE TV_Shows_Viewership (id INT, title VARCHAR(100), network VARCHAR(50), avg_viewers DECIMAL(10,2)); INSERT INTO TV_Shows_Viewership (id, title, network, avg_viewers) VALUES (1, \u0027The Simpsons\u0027, \u0027FOX\u0027, 5000000.00), (2, \u0027Game of Thrones\u0027, \u0027HBO\u0027, 12000000.00), (3, \u0027Friends\u0027, \u0027NBC\u0027, 20000000.00);", + "sql": "SELECT network, AVG(avg_viewers) FROM TV_Shows_Viewership GROUP BY network;", + "sql_explanation": "This query calculates the average viewership for TV shows by network." +}, { + "id": "5462", + "domain": "media entertainment", + "domain_description": "Audience demographics, viewership trends, content ratings, production budgets, and marketing strategies for movies, TV shows, and music.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many TV shows were produced in each country?", + "sql_context": "CREATE TABLE TV_Shows (id INT, title VARCHAR(255), country VARCHAR(255)); INSERT INTO TV_Shows (id, title, country) VALUES (1, \u0027TV Show 1\u0027, \u0027USA\u0027), (2, \u0027TV Show 2\u0027, \u0027UK\u0027), (3, \u0027TV Show 3\u0027, \u0027Canada\u0027), (4, \u0027TV Show 4\u0027, \u0027Australia\u0027);", + "sql": "SELECT country, COUNT(*) FROM TV_Shows GROUP BY country;", + "sql_explanation": "The SQL query calculates the number of TV shows in each country by using the COUNT function on the * wildcard and grouping the data by country." +}, { + "id": "1228", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total REE production for each quarter in 2020?", + "sql_context": "CREATE TABLE mines (id INT, name TEXT, location TEXT, quarter INT, annual_production INT); INSERT INTO mines (id, name, location, quarter, annual_production) VALUES (1, \u0027Mine A\u0027, \u0027Country X\u0027, 1, 375), (2, \u0027Mine B\u0027, \u0027Country Y\u0027, 1, 500), (3, \u0027Mine C\u0027, \u0027Country Z\u0027, 1, 437), (1, \u0027Mine A\u0027, \u0027Country X\u0027, 2, 400), (2, \u0027Mine B\u0027, \u0027Country Y\u0027, 2, 500), (3, \u0027Mine C\u0027, \u0027Country Z\u0027, 2, 437), (1, \u0027Mine A\u0027, \u0027Country X\u0027, 3, 425), (2, \u0027Mine B\u0027, \u0027Country Y\u0027, 3, 500), (3, \u0027Mine C\u0027, \u0027Country Z\u0027, 3, 462), (1, \u0027Mine A\u0027, \u0027Country X\u0027, 4, 375), (2, \u0027Mine B\u0027, \u0027Country Y\u0027, 4, 500), (3, \u0027Mine C\u0027, \u0027Country Z\u0027, 4, 463);", + "sql": "SELECT YEAR(timestamp) as year, QUARTER(timestamp) as quarter, SUM(annual_production) as total_production FROM mines WHERE YEAR(timestamp) \u003d 2020 GROUP BY year, quarter;", + "sql_explanation": "This query calculates the total REE production for each quarter in 2020. It filters the mines table for 2020 records and groups them by year and quarter. Then, it sums the annual production for each group." +}, { + "id": "1288", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 2 countries with the highest total production of Dysprosium in 2021 and their respective production amounts.", + "sql_context": "CREATE TABLE Country (Code TEXT, Name TEXT, Continent TEXT); INSERT INTO Country (Code, Name, Continent) VALUES (\u0027CN\u0027, \u0027China\u0027, \u0027Asia\u0027), (\u0027AU\u0027, \u0027Australia\u0027, \u0027Australia\u0027), (\u0027US\u0027, \u0027United States\u0027, \u0027North America\u0027), (\u0027IN\u0027, \u0027India\u0027, \u0027Asia\u0027); CREATE TABLE ProductionCountry (Year INT, Country TEXT, Element TEXT, Quantity INT); INSERT INTO ProductionCountry (Year, Country, Element, Quantity) VALUES (2021, \u0027CN\u0027, \u0027Dysprosium\u0027, 1500), (2021, \u0027AU\u0027, \u0027Dysprosium\u0027, 800), (2021, \u0027US\u0027, \u0027Dysprosium\u0027, 1200), (2021, \u0027IN\u0027, \u0027Dysprosium\u0027, 900);", + "sql": "SELECT Country, SUM(Quantity) FROM ProductionCountry WHERE Element \u003d \u0027Dysprosium\u0027 AND Year \u003d 2021 GROUP BY Country ORDER BY SUM(Quantity) DESC FETCH FIRST 2 ROWS ONLY;", + "sql_explanation": "This query identifies the top 2 countries with the highest total production of Dysprosium in 2021 and their respective production amounts. It groups the ProductionCountry table by Country and calculates the sum of Quantity for Dysprosium. The results are then ordered by SUM(Quantity) in descending order and the FETCH FIRST 2 ROWS ONLY function is used to limit the results to 2 rows." +}, { + "id": "1404", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the country with the highest total production of Lanthanum in 2020 and 2021?", + "sql_context": "CREATE TABLE Lanthanum_Production (year INT, country TEXT, production INT); INSERT INTO Lanthanum_Production (year, country, production) VALUES (2020, \u0027China\u0027, 5000); INSERT INTO Lanthanum_Production (year, country, production) VALUES (2020, \u0027USA\u0027, 3000); INSERT INTO Lanthanum_Production (year, country, production) VALUES (2020, \u0027Russia\u0027, 2000); INSERT INTO Lanthanum_Production (year, country, production) VALUES (2021, \u0027China\u0027, 5500); INSERT INTO Lanthanum_Production (year, country, production) VALUES (2021, \u0027USA\u0027, 3500); INSERT INTO Lanthanum_Production (year, country, production) VALUES (2021, \u0027Russia\u0027, 2500);", + "sql": "SELECT country, SUM(production) as total_production FROM Lanthanum_Production WHERE year IN (2020, 2021) GROUP BY country ORDER BY total_production DESC LIMIT 1;", + "sql_explanation": "The SQL query identifies the country with the highest total production of Lanthanum in 2020 and 2021 by using the SUM function to calculate the total production for each country, grouping the records by country, and ordering the result set in descending order based on the total production. The query then returns the country with the highest total production using the LIMIT clause." +}, { + "id": "1660", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of Terbium (Tb) and Gadolinium (Gd) supplied by each supplier in 2020, ordered by supplier name?", + "sql_context": "CREATE TABLE supplier_data (supplier VARCHAR(25), element VARCHAR(2), quantity INT, year INT); INSERT INTO supplier_data VALUES (\u0027SupplierX\u0027, \u0027Tb\u0027, 250, 2020), (\u0027SupplierY\u0027, \u0027Gd\u0027, 350, 2020), (\u0027SupplierX\u0027, \u0027Gd\u0027, 150, 2020);", + "sql": "SELECT supplier, SUM(quantity) AS total_quantity FROM supplier_data WHERE element IN (\u0027Tb\u0027, \u0027Gd\u0027) AND year \u003d 2020 GROUP BY supplier ORDER BY supplier;", + "sql_explanation": "This SQL query calculates the total quantity of Terbium (Tb) and Gadolinium (Gd) supplied by each supplier in 2020 and orders the result by supplier name. It filters the supplier_data table for the desired elements and year, then groups the results by supplier and calculates the sum of quantities for each group. The result is ordered by supplier name." +}, { + "id": "2236", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the total quantity of each rare earth element recycled by each recycler in 2020.", + "sql_context": "CREATE TABLE recyclers (id INT PRIMARY KEY, name VARCHAR(255), country VARCHAR(255));CREATE TABLE recycling_stats (recycler_id INT, year INT, element VARCHAR(255), quantity INT, PRIMARY KEY (recycler_id, year, element), FOREIGN KEY (recycler_id) REFERENCES recyclers(id));INSERT INTO recycling_stats (recycler_id, year, element, quantity) VALUES (1, 2019, \u0027Neodymium\u0027, 500), (1, 2019, \u0027Praseodymium\u0027, 700), (2, 2019, \u0027Neodymium\u0027, 800), (2, 2019, \u0027Praseodymium\u0027, 1000), (1, 2020, \u0027Neodymium\u0027, 550), (1, 2020, \u0027Praseodymium\u0027, 750), (2, 2020, \u0027Neodymium\u0027, 850), (2, 2020, \u0027Praseodymium\u0027, 1100), (1, 2020, \u0027Holmium\u0027, 100), (1, 2020, \u0027Erbium\u0027, 150), (2, 2020, \u0027Holmium\u0027, 120), (2, 2020, \u0027Erbium\u0027, 180);", + "sql": "SELECT recycler_id, element, SUM(quantity) AS Total_Recycled FROM recycling_stats WHERE year \u003d 2020 GROUP BY recycler_id, element;", + "sql_explanation": "This query calculates the total quantity of each rare earth element recycled by each recycler in 2020 by joining the recyclers table with the recycling_stats table on the recycler_id column, filtering for the desired year, and then using the GROUP BY clause to group the results by recycler and element. The SUM function is used to calculate the total quantity of each element recycled by each recycler." +}, { + "id": "2259", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total production of Cerium in 2019 and 2020?", + "sql_context": "CREATE TABLE production (year INT, element VARCHAR(10), quantity INT); INSERT INTO production (year, element, quantity) VALUES (2019, \u0027Cerium\u0027, 10000), (2020, \u0027Cerium\u0027, 11000);", + "sql": "SELECT element, SUM(quantity) as total_quantity FROM production WHERE element \u003d \u0027Cerium\u0027 AND year IN (2019, 2020) GROUP BY element", + "sql_explanation": "This query calculates the total production quantity of Cerium in 2019 and 2020 by grouping data by element, selecting the SUM function on the quantity column, and filtering the data by the element \u0027Cerium\u0027 and year in (2019, 2020)." +}, { + "id": "3081", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which year had the highest Europium production in Africa?", + "sql_context": "CREATE TABLE production (year INT, region VARCHAR(10), element VARCHAR(10), quantity INT); INSERT INTO production (year, region, element, quantity) VALUES (2015, \u0027Africa\u0027, \u0027Europium\u0027, 1200), (2016, \u0027Africa\u0027, \u0027Europium\u0027, 1400), (2017, \u0027Africa\u0027, \u0027Europium\u0027, 1500), (2018, \u0027Africa\u0027, \u0027Europium\u0027, 1700), (2019, \u0027Africa\u0027, \u0027Europium\u0027, 1800);", + "sql": "SELECT year, MAX(quantity) FROM production WHERE element \u003d \u0027Europium\u0027 AND region \u003d \u0027Africa\u0027 GROUP BY year;", + "sql_explanation": "This query calculates the highest production quantity for Europium in Africa for each year by using the MAX function of the quantity column and the GROUP BY clause for rows where the element is Europium and the region is Africa." +}, { + "id": "4089", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the total Scandium production for each year and quarter.", + "sql_context": "CREATE TABLE Scandium_Production (Year INT, Quarter INT, Quantity INT); INSERT INTO Scandium_Production (Year, Quarter, Quantity) VALUES (2017, 1, 125), (2017, 2, 140), (2017, 3, 155), (2017, 4, 170), (2018, 1, 185), (2018, 2, 210), (2018, 3, 235), (2018, 4, 260), (2019, 1, 285), (2019, 2, 310), (2019, 3, 335), (2019, 4, 360);", + "sql": "SELECT Year, Quarter, SUM(Quantity) FROM Scandium_Production GROUP BY Year, Quarter;", + "sql_explanation": "The SQL query calculates the total Scandium production quantity for each year and quarter." +}, { + "id": "4491", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average price of Neodymium per year?", + "sql_context": "CREATE TABLE production (element VARCHAR(10), year INT, price DECIMAL(5,2)); INSERT INTO production VALUES (\u0027Neodymium\u0027, 2015, 45.25), (\u0027Neodymium\u0027, 2016, 48.75), (\u0027Neodymium\u0027, 2017, 52.35), (\u0027Neodymium\u0027, 2018, 56.10), (\u0027Neodymium\u0027, 2019, 60.85), (\u0027Neodymium\u0027, 2020, 65.20);", + "sql": "SELECT AVG(price) FROM production WHERE element \u003d \u0027Neodymium\u0027 GROUP BY year;", + "sql_explanation": "The SQL query calculates the average price of Neodymium for each year by grouping the production table data by year and calculating the average price." +}, { + "id": "4680", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the total water consumption from rare earth element production in each country?", + "sql_context": "CREATE TABLE water_consumption (country VARCHAR(50), consumption INT); INSERT INTO water_consumption (country, consumption) VALUES (\u0027China\u0027, 25000), (\u0027USA\u0027, 7000), (\u0027Australia\u0027, 4000), (\u0027India\u0027, 1000), (\u0027Brazil\u0027, 500);", + "sql": "SELECT country, SUM(consumption) FROM water_consumption GROUP BY country;", + "sql_explanation": "This query calculates the total water consumption from rare earth element production in each country by summing up the consumption for each country and grouping by country." +}, { + "id": "615", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display fan demographics, pivoted by gender", + "sql_context": "CREATE TABLE fan_demographics (id INT, age INT, gender VARCHAR(50), location VARCHAR(50), interest VARCHAR(50));", + "sql": "SELECT age, location, interest, SUM(CASE WHEN gender \u003d \u0027Male\u0027 THEN 1 ELSE 0 END) as males, SUM(CASE WHEN gender \u003d \u0027Female\u0027 THEN 1 ELSE 0 END) as females FROM fan_demographics GROUP BY age, location, interest;", + "sql_explanation": "This query pivots the \u0027fan_demographics\u0027 table by the \u0027gender\u0027 column, displaying separate columns for \u0027Male\u0027 and \u0027Female\u0027 counts in addition to the existing columns." +}, { + "id": "927", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display fan demographics, pivoted by location", + "sql_context": "CREATE TABLE fan_demographics (id INT, age INT, gender VARCHAR(50), location VARCHAR(50), interest VARCHAR(50));", + "sql": "SELECT gender, SUM(CASE WHEN location \u003d \u0027New York\u0027 THEN 1 ELSE 0 END) as new_york, SUM(CASE WHEN location \u003d \u0027Los Angeles\u0027 THEN 1 ELSE 0 END) as la FROM fan_demographics GROUP BY gender;", + "sql_explanation": "This query pivots the \u0027fan_demographics\u0027 table by the \u0027location\u0027 column, displaying separate columns for \u0027New York\u0027 and \u0027Los Angeles\u0027 counts in addition to the existing columns." +}, { + "id": "941", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total number of tickets sold for outdoor events in the last month, grouped by event type.", + "sql_context": "CREATE TABLE TicketSales (id INT, event_type VARCHAR(255), location VARCHAR(255), tickets_sold INT, price DECIMAL(5,2), ticket_type VARCHAR(50), date DATE); INSERT INTO TicketSales (id, event_type, location, tickets_sold, price, ticket_type, date) VALUES (1, \u0027Concert\u0027, \u0027Indoor Arena\u0027, 1500, 150, \u0027VIP\u0027, \u00272021-11-01\u0027), (2, \u0027Sports Game\u0027, \u0027Outdoor Stadium\u0027, 8000, 50, \u0027General Admission\u0027, \u00272021-10-15\u0027), (3, \u0027Concert\u0027, \u0027Indoor Arena\u0027, 2000, 200, \u0027VIP\u0027, \u00272021-12-10\u0027);", + "sql": "SELECT event_type, SUM(tickets_sold) as total_tickets_sold FROM TicketSales WHERE location \u003d \u0027Outdoor Stadium\u0027 AND date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH) GROUP BY event_type;", + "sql_explanation": "This query calculates the total number of tickets sold for outdoor events in the last month, grouped by event type by filtering the tickets sold based on location and date range, and then grouping them by event type and calculating the total tickets sold for each event type using the GROUP BY clause." +}, { + "id": "1443", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which athletes have the highest and lowest wellbeing scores?", + "sql_context": "CREATE TABLE Athletes (AthleteID INT, AthleteName VARCHAR(255), TotalWellbeingScore INT); INSERT INTO Athletes (AthleteID, AthleteName, TotalWellbeingScore) VALUES (1, \u0027John Doe\u0027, 85), (2, \u0027Jane Doe\u0027, 90), (3, \u0027Jim Brown\u0027, 80), (4, \u0027Jill Smith\u0027, 95);", + "sql": "SELECT AthleteName, MAX(TotalWellbeingScore) as Highest_Wellbeing_Score, MIN(TotalWellbeingScore) as Lowest_Wellbeing_Score FROM Athletes GROUP BY AthleteName;", + "sql_explanation": "Group the Athletes table by AthleteName, then calculate the highest and lowest wellbeing scores for each athlete." +}, { + "id": "1551", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average ticket price for rugby games in the last year, broken down by month?", + "sql_context": "CREATE TABLE rugby_prices (game_date DATE, tickets_sold INT, ticket_price DECIMAL(5,2)); INSERT INTO rugby_prices (game_date, tickets_sold, ticket_price) VALUES (\u00272022-01-01\u0027, 500, 75.50), (\u00272022-02-01\u0027, 700, 65.00), (\u00272022-03-01\u0027, 600, 80.00), (\u00272022-04-01\u0027, 800, 70.00), (\u00272022-05-01\u0027, 900, 60.00);", + "sql": "SELECT EXTRACT(MONTH FROM game_date) AS month, AVG(ticket_price) FROM rugby_prices WHERE game_date \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 1 YEAR) GROUP BY month;", + "sql_explanation": "The query filters the rugby prices data for the last year and then calculates the average ticket price for each month using the AVG function. The query uses the EXTRACT function to determine the month for each game date, and the result is grouped by month." +}, { + "id": "1726", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the total ticket sales by month for a specific team?", + "sql_context": "CREATE TABLE ticket_sales (sale_id INT, team_id INT, sale_date DATE, sales INT); INSERT INTO ticket_sales (sale_id, team_id, sale_date, sales) VALUES (1, 1, \u00272022-01-01\u0027, 10000), (2, 1, \u00272022-02-01\u0027, 12000), (3, 1, \u00272022-03-01\u0027, 15000);", + "sql": "SELECT EXTRACT(MONTH FROM sale_date) as month, SUM(sales) as total_sales FROM ticket_sales WHERE team_id \u003d 1 GROUP BY EXTRACT(MONTH FROM sale_date);", + "sql_explanation": "The SQL query extracts the month from the sale_date field and calculates the total ticket sales for a specific team (team_id \u003d 1) by grouping by the month and summing the sales." +}, { + "id": "2002", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average wellbeing score for each team by month?", + "sql_context": "CREATE TABLE athlete_wellbeing (id INT, team VARCHAR(50), wellbeing_score INT, assessment_month DATE); INSERT INTO athlete_wellbeing (id, team, wellbeing_score, assessment_month) VALUES (1, \u0027TeamA\u0027, 80, \u00272022-01-01\u0027), (2, \u0027TeamB\u0027, 75, \u00272022-01-01\u0027), (3, \u0027TeamA\u0027, 85, \u00272022-02-01\u0027), (4, \u0027TeamB\u0027, 70, \u00272022-02-01\u0027);", + "sql": "SELECT EXTRACT(MONTH FROM assessment_month) as month, team, AVG(wellbeing_score) as avg_score FROM athlete_wellbeing GROUP BY month, team;", + "sql_explanation": "This query calculates the average wellbeing score for each team by month by extracting the month from the \u0027assessment_month\u0027 column and then grouping the data by both the \u0027month\u0027 and \u0027team\u0027 columns. It then calculates the average wellbeing score for each group using the AVG function." +}, { + "id": "2727", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of female fans from each city who purchased tickets to any event.", + "sql_context": "CREATE TABLE fans (id INT, city VARCHAR(50), age INT, gender VARCHAR(10), event_id INT); INSERT INTO fans (id, city, age, gender, event_id) VALUES (1, \u0027New York\u0027, 25, \u0027Female\u0027, 1); INSERT INTO fans (id, city, age, gender, event_id) VALUES (2, \u0027Los Angeles\u0027, 30, \u0027Male\u0027, 2);", + "sql": "SELECT city, gender, COUNT(DISTINCT event_id) AS num_events FROM fans WHERE gender \u003d \u0027Female\u0027 GROUP BY city, gender;", + "sql_explanation": "Count the number of female fans from each city who purchased tickets to any event by filtering the fans table based on gender, grouping by city and gender, and counting distinct event IDs." +}, { + "id": "2739", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of fans who attended more than 5 games in the season.", + "sql_context": "CREATE TABLE FanDemographics (FanID INT, TeamID INT, GamesAttended INT); INSERT INTO FanDemographics (FanID, TeamID, GamesAttended) VALUES (1, 1, 6), (2, 1, 3), (3, 2, 7), (4, 2, 4), (5, 3, 5), (6, 3, 9);", + "sql": "SELECT TeamID, COUNT(*) as FansAttendedMoreThan5Games FROM FanDemographics WHERE GamesAttended \u003e 5 GROUP BY TeamID;", + "sql_explanation": "This SQL query calculates the number of fans who attended more than 5 games in the season by filtering the records in the FanDemographics table based on the GamesAttended column and then using the COUNT() function to count the number of fans for each team. The results are then grouped by TeamID and the COUNT() function is used to count the number of fans for each team who attended more than 5 games." +}, { + "id": "2751", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total ticket revenue for each quarter?", + "sql_context": "CREATE TABLE tickets (ticket_id INT, purchase_date DATE, revenue DECIMAL(10,2), quantity INT);", + "sql": "SELECT DATE_FORMAT(purchase_date, \u0027%Y-%q\u0027) as quarter, SUM(revenue) as total_revenue FROM tickets GROUP BY quarter;", + "sql_explanation": "This query extracts the quarter from the purchase_date column using the DATE_FORMAT function and calculates the total revenue for each quarter by grouping the results by quarter." +}, { + "id": "2851", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 3 cities with the highest number of ticket sales.", + "sql_context": "CREATE TABLE ticket_sales(ticket_id INT, city VARCHAR(50), tickets_sold INT);", + "sql": "SELECT city, SUM(tickets_sold) AS total_sales FROM ticket_sales GROUP BY city ORDER BY total_sales DESC LIMIT 3;", + "sql_explanation": "The query first calculates the total number of tickets sold for each city with the GROUP BY clause. Then, the ORDER BY clause sorts the cities by total ticket sales in descending order, and the LIMIT clause limits the result to the top 3 cities." +}, { + "id": "2920", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display athlete names and their total goals and assists", + "sql_context": "CREATE TABLE athlete_stats (athlete_id INT PRIMARY KEY, name VARCHAR(100), sport VARCHAR(50), team VARCHAR(50), games_played INT, goals_scored INT, assists INT); INSERT INTO athlete_stats (athlete_id, name, sport, team, games_played, goals_scored, assists) VALUES (1, \u0027John Doe\u0027, \u0027Soccer\u0027, \u0027Blue Eagles\u0027, 10, 5, 3), (2, \u0027Jane Smith\u0027, \u0027Soccer\u0027, \u0027Blue Eagles\u0027, 12, 7, 2), (3, \u0027Raj Patel\u0027, \u0027Cricket\u0027, \u0027Indian Lions\u0027, 15, 30, 15), (4, \u0027Emma Johnson\u0027, \u0027Basketball\u0027, \u0027NY Knicks\u0027, 20, 15, 5);", + "sql": "SELECT name, SUM(goals_scored) as total_goals, SUM(assists) as total_assists FROM athlete_stats GROUP BY name;", + "sql_explanation": "This SQL query calculates the total goals and assists for each athlete using the SUM() function and groups them by name." +}, { + "id": "3445", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of tickets sold by each team for all events?", + "sql_context": "CREATE TABLE teams (team_id INT, team_name VARCHAR(255)); INSERT INTO teams (team_id, team_name) VALUES (1, \u0027Knights\u0027), (2, \u0027Lions\u0027), (3, \u0027Titans\u0027); CREATE TABLE events (event_id INT, team_id INT, num_tickets_sold INT); INSERT INTO events (event_id, team_id, num_tickets_sold) VALUES (1, 1, 500), (2, 1, 700), (3, 2, 600), (4, 3, 800), (5, 3, 900);", + "sql": "SELECT e.team_id, SUM(e.num_tickets_sold) as total_tickets_sold FROM events e GROUP BY e.team_id;", + "sql_explanation": "This query calculates the total number of tickets sold by each team by summing the num_tickets_sold column for each team_id in the events table. The result is grouped by team_id to get the total tickets sold for each team." +}, { + "id": "3468", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of fan demographics by favorite esports team?", + "sql_context": "CREATE TABLE fan_demographics_esports (id INT, fan VARCHAR(255), age INT, gender VARCHAR(10), team VARCHAR(255)); INSERT INTO fan_demographics_esports (id, fan, age, gender, team) VALUES (1, \u0027Ahmed\u0027, 22, \u0027Male\u0027, \u0027Gen.G\u0027), (2, \u0027Bella\u0027, 27, \u0027Female\u0027, \u0027100 Thieves\u0027), (3, \u0027Carlos\u0027, 30, \u0027Male\u0027, \u0027Team Liquid\u0027), (4, \u0027Diana\u0027, 24, \u0027Female\u0027, \u0027Cloud9\u0027), (5, \u0027Elias\u0027, 35, \u0027Male\u0027, \u0027TSM\u0027), (6, \u0027Farah\u0027, 29, \u0027Female\u0027, \u0027Fnatic\u0027);", + "sql": "SELECT team, gender, COUNT(*) as fans_count FROM fan_demographics_esports GROUP BY team, gender;", + "sql_explanation": "The SQL query calculates the distribution of fan demographics by favorite esports team by grouping the fan_demographics_esports table by the team and gender columns and then applying the COUNT function on the entire table." +}, { + "id": "3735", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average ticket price for each sport, excluding prices above $150.", + "sql_context": "CREATE TABLE ticket_prices(ticket_id INT, sport VARCHAR(10), price DECIMAL(5,2)); INSERT INTO ticket_prices(ticket_id, sport, price) VALUES (1, \u0027baseball\u0027, 75.00), (2, \u0027hockey\u0027, 50.00), (3, \u0027baseball\u0027, 100.00), (4, \u0027hockey\u0027, 60.00), (5, \u0027baseball\u0027, 155.00), (6, \u0027hockey\u0027, 35.00), (7, \u0027baseball\u0027, 80.00), (8, \u0027hockey\u0027, 70.00);", + "sql": "SELECT sport, AVG(price) AS avg_price FROM ticket_prices WHERE price \u003c 150 GROUP BY sport;", + "sql_explanation": "Calculate the average ticket price for each sport while excluding prices above $150. Use the WHERE clause to exclude ticket prices above $150, and then use the GROUP BY clause to group ticket prices by sport and calculate the average ticket price for each sport." +}, { + "id": "4085", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all athletes who participated in the Wellbeing Program and their average age.", + "sql_context": "CREATE TABLE athletes (athlete_id INT, name VARCHAR(255), age INT, program VARCHAR(255)); INSERT INTO athletes (athlete_id, name, age, program) VALUES (1, \u0027John Doe\u0027, 25, \u0027Wellbeing\u0027), (2, \u0027Jane Smith\u0027, 30, \u0027Fitness\u0027), (3, \u0027Alice Johnson\u0027, 35, \u0027Wellbeing\u0027);", + "sql": "SELECT program, AVG(age) FROM athletes WHERE program \u003d \u0027Wellbeing\u0027 GROUP BY program;", + "sql_explanation": "List the average age of athletes who participated in the \u0027Wellbeing Program\u0027 from the \u0027athletes\u0027 table by grouping by the \u0027program\u0027 column and averaging the \u0027age\u0027 values where \u0027program\u0027 is \u0027Wellbeing\u0027." +}, { + "id": "4315", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue generated from ticket sales for each sport in the \u0027sales\u0027 table?", + "sql_context": "CREATE TABLE sales (sale_id INT, event VARCHAR(50), sport VARCHAR(20), price DECIMAL(5,2), quantity INT); INSERT INTO sales (sale_id, event, sport, price, quantity) VALUES (1, \u0027Game 1\u0027, \u0027Basketball\u0027, 100.00, 500); INSERT INTO sales (sale_id, event, sport, price, quantity) VALUES (2, \u0027Game 2\u0027, \u0027Soccer\u0027, 75.00, 750);", + "sql": "SELECT sport, SUM(price * quantity) as total_revenue FROM sales GROUP BY sport;", + "sql_explanation": "The SQL query calculates the total revenue for each sport by using the SUM function on the product of the \u0027price\u0027 and \u0027quantity\u0027 columns and grouping the result by the \u0027sport\u0027 column." +}, { + "id": "4368", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average viewership rating for each sports event by month?", + "sql_context": "CREATE TABLE viewership_ratings (id INT, event VARCHAR(255), month INT, rating DECIMAL(3,2)); INSERT INTO viewership_ratings (id, event, month, rating) VALUES (1, \u0027NBA Finals\u0027, 6, 12.5), (2, \u0027UEFA Champions League\u0027, 7, 10.3), (3, \u0027FIFA World Cup\u0027, 11, 14.1), (4, \u0027NBA Finals\u0027, 6, 12.8), (5, \u0027UEFA Champions League\u0027, 7, 10.6), (6, \u0027FIFA World Cup\u0027, 11, 14.4);", + "sql": "SELECT event, AVG(rating) as avg_rating FROM viewership_ratings GROUP BY event;", + "sql_explanation": "The SQL query calculates the average viewership rating for each sports event by month by grouping the viewership_ratings table by the event column and then applying the AVG function on the rating column." +}, { + "id": "4660", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of games played by each team?", + "sql_context": "CREATE TABLE team_games (team_id INT, game_id INT);", + "sql": "SELECT team_id, COUNT(*) as total_games FROM team_games GROUP BY team_id;", + "sql_explanation": "The SQL query calculates the total number of games played by each team. It groups the team_games table by team_id and calculates the total number of games for each team using the COUNT() function." +}, { + "id": "4696", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of athletes by position?", + "sql_context": "CREATE TABLE athlete_data (athlete_id INT, position VARCHAR(50), team_id INT, age INT);", + "sql": "SELECT position, AVG(age) as avg_age FROM athlete_data GROUP BY position;", + "sql_explanation": "The SQL query calculates the average age of athletes by position. It groups the athlete_data table by position and calculates the average age for each position using the AVG() function." +}, { + "id": "5575", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of home games for each team in the \u0027home_games\u0027 table.", + "sql_context": "CREATE TABLE home_games (game_id INT, team VARCHAR(20), opponent VARCHAR(20), date DATE, tickets_sold INT, ticket_price DECIMAL(5,2));", + "sql": "SELECT team, COUNT(*) FROM home_games GROUP BY team;", + "sql_explanation": "This query shows the number of home games for each team in the \u0027home_games\u0027 table. It uses the COUNT function to count the number of rows for each team and the GROUP BY clause to group the results by team." +}, { + "id": "1799", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the names of the top two cities with the highest visitor counts at heritage sites in \u0027Middle East\u0027.", + "sql_context": "CREATE TABLE HeritageSitesME (SiteID INT PRIMARY KEY, SiteName VARCHAR(50), City VARCHAR(50), VisitorCount INT); INSERT INTO HeritageSitesME (SiteID, SiteName, City, VisitorCount) VALUES (1, \u0027Petra\u0027, \u0027Jordan\u0027, 500000), (2, \u0027Persepolis\u0027, \u0027Iran\u0027, 400000);", + "sql": "SELECT City, SUM(VisitorCount) as Total FROM HeritageSitesME WHERE City IN (\u0027Jordan\u0027, \u0027Iran\u0027, \u0027Turkey\u0027) GROUP BY City ORDER BY Total DESC LIMIT 2;", + "sql_explanation": "Identify the top two cities with the highest visitor counts at heritage sites in \u0027Middle East\u0027 by summing the \u0027VisitorCount\u0027 column values for Jordan, Iran, and Turkey, then grouping and sorting in descending order to select the top two." +}, { + "id": "2017", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the language preservation programs with their start and end dates, and the number of participants.", + "sql_context": "CREATE TABLE LanguagePreservation (id INT, program VARCHAR(255), start_date DATE, end_date DATE, participants INT); INSERT INTO LanguagePreservation (id, program, start_date, end_date, participants) VALUES (1, \u0027Endangered Languages Summer Camp\u0027, \u00272021-06-01\u0027, \u00272021-08-15\u0027, 25), (2, \u0027Indigenous Languages Online Course\u0027, \u00272021-02-01\u0027, \u00272021-04-30\u0027, 32);", + "sql": "SELECT program, start_date, end_date, participants, COUNT(participants) as total_participants FROM LanguagePreservation GROUP BY program;", + "sql_explanation": "This SQL query groups the LanguagePreservation table by the \u0027program\u0027 column, then counts the number of participants for each program (total_participants)." +}, { + "id": "2115", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of heritage sites in each country in Europe?", + "sql_context": "CREATE TABLE HeritageSites (Country VARCHAR(255), Site VARCHAR(255)); INSERT INTO HeritageSites (Country, Site) VALUES (\u0027Spain\u0027, \u0027Alhambra\u0027), (\u0027Spain\u0027, \u0027Sagrada Familia\u0027), (\u0027Spain\u0027, \u0027Prado Museum\u0027), (\u0027France\u0027, \u0027Louvre Museum\u0027), (\u0027France\u0027, \u0027Eiffel Tower\u0027), (\u0027Italy\u0027, \u0027Colosseum\u0027), (\u0027Italy\u0027, \u0027Leaning Tower of Pisa\u0027), (\u0027Germany\u0027, \u0027Brandenburg Gate\u0027), (\u0027Germany\u0027, \u0027Neuschwanstein Castle\u0027);", + "sql": "SELECT Country, COUNT(Site) as Num_Sites FROM HeritageSites WHERE Country IN (\u0027Spain\u0027, \u0027France\u0027, \u0027Italy\u0027, \u0027Germany\u0027) GROUP BY Country;", + "sql_explanation": "This query calculates the number of heritage sites in each country in Europe. It uses the COUNT function to count the number of sites and the GROUP BY clause to group the results by country." +}, { + "id": "2686", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 3 countries with the most heritage sites?", + "sql_context": "CREATE TABLE heritage_count (id INT, country VARCHAR(50), heritage_site VARCHAR(50)); INSERT INTO heritage_count (id, country, heritage_site) VALUES (1, \u0027USA\u0027, \u0027Mesa Verde\u0027); INSERT INTO heritage_count (id, country, heritage_site) VALUES (2, \u0027Ecuador\u0027, \u0027Galapagos Islands\u0027);", + "sql": "SELECT country, COUNT(heritage_site) FROM heritage_count GROUP BY country ORDER BY COUNT(heritage_site) DESC LIMIT 3;", + "sql_explanation": "This SQL query selects the country column and the count of the heritage_site column from the heritage_count table, grouped by country, ordered by the count in descending order and limited to the top 3." +}, { + "id": "3112", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 3 most common art types in the TraditionalArt table?", + "sql_context": "CREATE TABLE TraditionalArt (ArtID int, ArtName varchar(50), ArtType varchar(50)); INSERT INTO TraditionalArt (ArtID, ArtName, ArtType) VALUES (1, \u0027Pottery\u0027, \u0027Ceramics\u0027), (2, \u0027Woven Rug\u0027, \u0027Textiles\u0027), (3, \u0027Calligraphy\u0027, \u0027Writing\u0027), (4, \u0027Dance Performance\u0027, \u0027Performance\u0027), (5, \u0027Painting\u0027, \u0027Ceramics\u0027), (6, \u0027Sculpture\u0027, \u0027Sculpture\u0027);", + "sql": "SELECT ArtType, COUNT(*) AS ArtCount FROM TraditionalArt GROUP BY ArtType ORDER BY ArtCount DESC LIMIT 3;", + "sql_explanation": "The query calculates the number of art pieces for each art type and then orders the results in descending order to show the top 3 most common types." +}, { + "id": "3264", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the heritage site in \u0027Asia\u0027 with the highest visitor count.", + "sql_context": "CREATE TABLE HeritageSites (SiteID INT PRIMARY KEY, SiteName VARCHAR(50), Location VARCHAR(50), VisitorCount INT); INSERT INTO HeritageSites (SiteID, SiteName, Location, VisitorCount) VALUES (1, \u0027Angkor Wat\u0027, \u0027Cambodia\u0027, 2500000), (2, \u0027Taj Mahal\u0027, \u0027India\u0027, 3000000);", + "sql": "SELECT SiteName, MAX(VisitorCount) FROM HeritageSites WHERE Location LIKE \u0027%Asia%\u0027 GROUP BY SiteName;", + "sql_explanation": "Determine the heritage site in \u0027Asia\u0027 with the highest visitor count by grouping sites in Asia and selecting the site name with the maximum value in the \u0027VisitorCount\u0027 column." +}, { + "id": "380", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average temperature trend for each crop type in the past year?", + "sql_context": "CREATE TABLE crop_temperature_trend (crop_type TEXT, date DATE, temperature INTEGER); INSERT INTO crop_temperature_trend VALUES (\u0027maize\u0027, \u00272022-01-01\u0027, 20), (\u0027wheat\u0027, \u00272022-01-01\u0027, 15);", + "sql": "SELECT crop_type, AVG(temperature) as avg_temperature, PERIOD_DIFF(date, \u00272022-01-01\u0027, INTERVAL 1 YEAR) as year FROM crop_temperature_trend WHERE date \u003e\u003d \u00272022-01-01\u0027 AND date \u003c \u00272023-01-01\u0027 GROUP BY crop_type, YEAR(date) ORDER BY year;", + "sql_explanation": "The SQL query calculates the average temperature trend for each crop type by grouping by crop_type and year, applying the AVG() function on the temperature column, and filtering the records for the past year. The PERIOD_DIFF() function is used to calculate the year difference between the current date and the starting date." +}, { + "id": "486", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total number of sensors that have been active in the past week, and the average temperature and humidity recorded.", + "sql_context": "CREATE TABLE sensor_data (id INT, sensor_id VARCHAR(255), temperature INT, humidity INT, status VARCHAR(255), timestamp DATETIME); INSERT INTO sensor_data (id, sensor_id, temperature, humidity, status, timestamp) VALUES (1, \u0027SENS001\u0027, 22, 65, \u0027active\u0027, \u00272022-01-01 10:00:00\u0027);", + "sql": "SELECT status, COUNT(*) as sensor_count, AVG(temperature) as avg_temp, AVG(humidity) as avg_humidity FROM sensor_data WHERE status \u003d \u0027active\u0027 AND timestamp \u003e\u003d DATE_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 WEEK) GROUP BY status;", + "sql_explanation": "The SQL query finds the total number of sensors that have been active in the past week, and the average temperature and humidity recorded. It does this by selecting the status, count of sensors (sensor_count), average temperature (avg_temp), and average humidity (avg_humidity) from the sensor_data table where the status is \u0027active\u0027 and the timestamp is greater than or equal to one week before the current timestamp. The query then groups the results by status." +}, { + "id": "597", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average nitrogen, phosphorus, and potassium levels for each crop type in the past month?", + "sql_context": "CREATE TABLE crop_nutrients (id INT, crop_id INT, type VARCHAR(255), nitrogen FLOAT, phosphorus FLOAT, potassium FLOAT, timestamp DATETIME);", + "sql": "SELECT type, AVG(nitrogen) as avg_nitrogen, AVG(phosphorus) as avg_phosphorus, AVG(potassium) as avg_potassium FROM crop_nutrients WHERE timestamp \u003e\u003d DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 1 MONTH) GROUP BY type;", + "sql_explanation": "This query calculates the average nitrogen, phosphorus, and potassium levels for each crop type in the past month. It uses the WHERE clause to filter records within the past month, and the GROUP BY clause to group results by crop type." +}, { + "id": "1079", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total rainfall for each irrigation system in the past month, ranked in descending order?", + "sql_context": "CREATE TABLE IrrigationData (id INT, System VARCHAR(255), Rainfall INT, Timestamp DATETIME); INSERT INTO IrrigationData (id, System, Rainfall, Timestamp) VALUES (1, \u0027Drip Irrigation\u0027, 150, \u00272022-04-20 12:00:00\u0027), (2, \u0027Sprinkler Irrigation\u0027, 200, \u00272022-04-20 12:00:00\u0027);", + "sql": "SELECT System, SUM(Rainfall) as TotalRainfall FROM IrrigationData WHERE Timestamp BETWEEN DATEADD(day, -30, GETDATE()) AND GETDATE() GROUP BY System ORDER BY TotalRainfall DESC;", + "sql_explanation": "The SQL query calculates the total rainfall for each irrigation system in the past month by filtering the IrrigationData table for records between 30 days ago and now, then grouping the results by system and calculating the total rainfall. Finally, the results are ordered by total rainfall in descending order." +}, { + "id": "1262", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the average precipitation and light intensity for each farm in the farmland table?", + "sql_context": "CREATE TABLE farmland (farmland_id INT, country VARCHAR(255), precipitation INT, light_intensity INT); INSERT INTO farmland (farmland_id, country, precipitation, light_intensity) VALUES (1, \u0027India\u0027, 50, 1000), (2, \u0027Nepal\u0027, 100, 800), (3, \u0027Bangladesh\u0027, 75, 1200), (4, \u0027Pakistan\u0027, 60, 900);", + "sql": "SELECT AVG(farmland.precipitation) AS avg_precipitation, AVG(farmland.light_intensity) AS avg_light_intensity, farmland.country FROM farmland GROUP BY farmland.country;", + "sql_explanation": "This SQL query calculates the average precipitation and light intensity for each country in the \u0027farmland\u0027 table. It uses the GROUP BY clause to group the data by country and calculate the average precipitation and light intensity for each group." +}, { + "id": "1373", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 3 countries with the highest average temperature in January, from the \u0027satellite_data\u0027 table.", + "sql_context": "CREATE TABLE satellite_data (country VARCHAR(255), temperature FLOAT, date DATE); INSERT INTO satellite_data (country, temperature, date) VALUES (\u0027Canada\u0027, 0, \u00272022-01-01\u0027); INSERT INTO satellite_data (country, temperature, date) VALUES (\u0027Mexico\u0027, 20, \u00272022-01-01\u0027);", + "sql": "SELECT country, AVG(temperature) as avg_temp FROM satellite_data WHERE date BETWEEN \u00272022-01-01\u0027 AND \u00272022-01-31\u0027 GROUP BY country ORDER BY avg_temp DESC LIMIT 3;", + "sql_explanation": "The SQL query calculates the average temperature for each country in January from the \u0027satellite_data\u0027 table. It then groups the results by country, orders them by average temperature in descending order, and limits the output to the top 3 countries." +}, { + "id": "1700", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum soil moisture level for each crop type in the past week?", + "sql_context": "CREATE TABLE crop_soil_moisture_level (crop_type TEXT, date DATE, soil_moisture INTEGER); INSERT INTO crop_soil_moisture_level VALUES (\u0027sorghum\u0027, \u00272022-06-01\u0027, 55), (\u0027barley\u0027, \u00272022-06-01\u0027, 60);", + "sql": "SELECT crop_type, MAX(soil_moisture) as max_soil_moisture FROM crop_soil_moisture_level WHERE date \u003e\u003d CURDATE() - INTERVAL 1 WEEK GROUP BY crop_type;", + "sql_explanation": "The SQL query calculates the maximum soil moisture level for each crop type by grouping by crop_type and applying the MAX() function on the soil_moisture column, while filtering the records for the past week." +}, { + "id": "1797", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which fields had precipitation over 10mm in the last month?", + "sql_context": "CREATE TABLE field_precipitation (field_id INT, date DATE, precipitation FLOAT); INSERT INTO field_precipitation (field_id, date, precipitation) VALUES (2, \u00272021-06-15\u0027, 12.5), (2, \u00272021-06-20\u0027, 8.3), (3, \u00272021-07-01\u0027, 15.2);", + "sql": "SELECT field_id, COUNT(*) as precipitation_days FROM field_precipitation WHERE precipitation \u003e 10 GROUP BY field_id HAVING precipitation_days \u003e 0;", + "sql_explanation": "This query finds fields with precipitation over 10mm in the past month by counting days with such precipitation for each field." +}, { + "id": "1847", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum soil moisture level for each crop type in the past month?", + "sql_context": "CREATE TABLE crop_soil_moisture (crop_type TEXT, date DATE, soil_moisture INTEGER); INSERT INTO crop_soil_moisture VALUES (\u0027cotton\u0027, \u00272022-06-01\u0027, 40), (\u0027rice\u0027, \u00272022-06-01\u0027, 50);", + "sql": "SELECT crop_type, MIN(soil_moisture) as min_soil_moisture FROM crop_soil_moisture WHERE date \u003e\u003d CURDATE() - INTERVAL 1 MONTH GROUP BY crop_type;", + "sql_explanation": "The SQL query calculates the minimum soil moisture level for each crop type by grouping by crop_type and applying the MIN() function on the soil_moisture column, while filtering the records for the past month." +}, { + "id": "1933", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the number of IoT sensors in vineyards and orchards in Australia.", + "sql_context": "CREATE TABLE IoT_Sensors (farm_type VARCHAR(20), country VARCHAR(20), num_sensors INTEGER); INSERT INTO IoT_Sensors (farm_type, country, num_sensors) VALUES (\u0027Vineyard\u0027, \u0027Australia\u0027, 450), (\u0027Vineyard\u0027, \u0027Australia\u0027, 520), (\u0027Orchard\u0027, \u0027Australia\u0027, 380), (\u0027Orchard\u0027, \u0027Australia\u0027, 410), (\u0027Orchard\u0027, \u0027Australia\u0027, 460), (\u0027Vineyard\u0027, \u0027Australia\u0027, 550);", + "sql": "SELECT farm_type, SUM(num_sensors) FROM IoT_Sensors WHERE country \u003d \u0027Australia\u0027 AND farm_type IN (\u0027Vineyard\u0027, \u0027Orchard\u0027) GROUP BY farm_type;", + "sql_explanation": "This SQL query lists the number of IoT sensors in vineyards and orchards in Australia using SUM function on the \u0027num_sensors\u0027 column and filtering the rows with WHERE operator for the given country and farm types. The GROUP BY operator is used to group the result by farm types." +}, { + "id": "2229", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 3 countries with the highest average soil temperature in May.", + "sql_context": "CREATE TABLE WeatherStats (id INT, country VARCHAR(50), month VARCHAR(10), avg_temp DECIMAL(5,2)); INSERT INTO WeatherStats (id, country, month, avg_temp) VALUES (1, \u0027US\u0027, \u0027May\u0027, 22.5), (2, \u0027Canada\u0027, \u0027May\u0027, 15.3), (3, \u0027Mexico\u0027, \u0027May\u0027, 27.2), (4, \u0027Brazil\u0027, \u0027May\u0027, 24.6), (5, \u0027Argentina\u0027, \u0027May\u0027, 18.9);", + "sql": "SELECT country, AVG(avg_temp) as AvgTemp FROM WeatherStats WHERE month \u003d \u0027May\u0027 GROUP BY country ORDER BY AvgTemp DESC LIMIT 3;", + "sql_explanation": "The SQL query calculates the average soil temperature (AvgTemp) for each country in May by grouping the WeatherStats table by country and filtering the month to May. It then orders the results in descending order based on the average temperature and limits the output to the top 3 countries." +}, { + "id": "3864", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all unique soil_moisture_sensors with their last recorded timestamp.", + "sql_context": "CREATE TABLE soil_moisture_sensors ( id INT, sensor_id INT, moisture DECIMAL(5,2), timestamp TIMESTAMP); INSERT INTO soil_moisture_sensors (id, sensor_id, moisture, timestamp) VALUES (1, 1001, 45, \u00272022-01-01 12:00:00\u0027), (2, 1002, 48, \u00272022-01-01 13:00:00\u0027), (3, 1001, 46, \u00272022-01-01 14:00:00\u0027);", + "sql": "SELECT DISTINCT sensor_id, MAX(timestamp) FROM soil_moisture_sensors GROUP BY sensor_id;", + "sql_explanation": "This SQL query lists all unique soil moisture sensors with their last recorded timestamp by grouping the soil_moisture_sensors table by sensor_id and selecting the maximum timestamp for each group." +}, { + "id": "4785", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all the unique sensor types and their maximum values in the entire database.", + "sql_context": "CREATE TABLE field_sensors (field_id INT, sensor_type VARCHAR(20), value FLOAT, timestamp TIMESTAMP); INSERT INTO field_sensors (field_id, sensor_type, value, timestamp) VALUES (1, \u0027temperature\u0027, 22.5, \u00272021-03-01 10:00:00\u0027), (1, \u0027humidity\u0027, 60.0, \u00272021-03-01 10:00:00\u0027);", + "sql": "SELECT sensor_type, MAX(value) FROM field_sensors GROUP BY sensor_type;", + "sql_explanation": "This query lists all unique sensor types and their maximum values in the entire database. It groups the records based on the sensor_type column and then calculates the maximum value for each group." +}, { + "id": "38", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the percentage of delays related to construction, weather, and other reasons for each route.", + "sql_context": "CREATE TABLE delays (delay_id INT, route_id INT, delay FLOAT, delay_reason VARCHAR(255), delay_date DATE); INSERT INTO delays (delay_id, route_id, delay, delay_reason, delay_date) VALUES (1, 1, 15.0, \u0027Construction\u0027, \u00272022-05-01\u0027), (2, 2, 20.0, \u0027Accident\u0027, \u00272022-05-02\u0027), (3, 1, 5.0, \u0027Weather\u0027, \u00272022-05-03\u0027), (4, 2, 10.0, \u0027Mechanical\u0027, \u00272022-05-04\u0027), (5, 1, 12.0, \u0027Construction\u0027, \u00272022-05-05\u0027);", + "sql": "SELECT route_id, ROUND(100.0 * SUM(CASE WHEN delay_reason \u003d \u0027Construction\u0027 THEN 1 ELSE 0 END) / COUNT(*), 2) as pct_construction, ROUND(100.0 * SUM(CASE WHEN delay_reason \u003d \u0027Weather\u0027 THEN 1 ELSE 0 END) / COUNT(*), 2) as pct_weather, ROUND(100.0 * SUM(CASE WHEN delay_reason NOT IN (\u0027Construction\u0027, \u0027Weather\u0027) THEN 1 ELSE 0 END) / COUNT(*), 2) as pct_other FROM delays GROUP BY route_id;", + "sql_explanation": "This query calculates the percentage of delays related to construction, weather, and other reasons for each route." +}, { + "id": "1713", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many vehicles of each type (bus, subway, tram) are there in the fleet, and what is the average age of each type?", + "sql_context": "CREATE TABLE vehicles (vehicle_id INT, vehicle_type VARCHAR(50), registration_date DATE);", + "sql": "SELECT vehicle_type, COUNT(*) as vehicle_count, AVG(DATEDIFF(CURDATE(), registration_date) / 365.25) as avg_age FROM vehicles GROUP BY vehicle_type;", + "sql_explanation": "The SQL query groups the rows in the vehicles table by vehicle_type and calculates the count of vehicles for each type and the average age of each type. The query finally returns the number of vehicles of each type and the average age of each type." +}, { + "id": "1871", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many unique vehicles have been used in route 103 over the last 30 days?", + "sql_context": "CREATE TABLE vehicle_routes (vehicle_id INT, route_id INT, date DATE); INSERT INTO vehicle_routes (vehicle_id, route_id, date) VALUES (101, 101, \u00272022-01-01\u0027), (102, 101, \u00272022-01-02\u0027), (103, 102, \u00272022-01-01\u0027), (104, 102, \u00272022-01-02\u0027), (105, 103, \u00272022-01-01\u0027), (106, 103, \u00272022-01-02\u0027), (107, 102, \u00272022-02-01\u0027), (108, 102, \u00272022-02-02\u0027), (109, 103, \u00272022-01-15\u0027), (110, 103, \u00272022-01-16\u0027);", + "sql": "SELECT route_id, COUNT(DISTINCT vehicle_id) FROM vehicle_routes WHERE date \u003e\u003d CURDATE() - INTERVAL 30 DAY AND route_id \u003d 103 GROUP BY route_id;", + "sql_explanation": "Get the unique vehicle count per route for the last 30 days by filtering the date range and route_id." +}, { + "id": "2763", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of unique vehicle types in the vehicles table, ordered from highest to lowest", + "sql_context": "CREATE TABLE vehicles (vehicle_id INT, vehicle_type VARCHAR(50)); INSERT INTO vehicles (vehicle_id, vehicle_type) VALUES (1000, \u0027Bus\u0027), (1001, \u0027Tram\u0027), (1002, \u0027Bus\u0027), (1003, \u0027Tram\u0027), (1004, \u0027Trolleybus\u0027);", + "sql": "SELECT COUNT(DISTINCT vehicle_type) FROM vehicles GROUP BY vehicle_type ORDER BY COUNT(DISTINCT vehicle_type) DESC;", + "sql_explanation": "This query displays the number of unique vehicle types in the vehicles table, ordered from highest to lowest, by selecting the count of distinct vehicle_type values, grouping by vehicle_type, and ordering by the count in descending order." +}, { + "id": "3529", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average delay for each route with more than 5 records?", + "sql_context": "CREATE TABLE delays (route_id INT, delay INT, timestamp TIMESTAMP); INSERT INTO delays (route_id, delay, timestamp) VALUES (1, 300, \u00272022-01-01 08:00:00\u0027), (1, 400, \u00272022-01-02 08:00:00\u0027), (2, 200, \u00272022-01-01 08:00:00\u0027), (2, 100, \u00272022-01-02 08:00:00\u0027), (1, 500, \u00272022-01-03 08:00:00\u0027), (2, 300, \u00272022-01-03 08:00:00\u0027), (1, 200, \u00272022-01-04 08:00:00\u0027), (2, 400, \u00272022-01-04 08:00:00\u0027), (1, 100, \u00272022-01-05 08:00:00\u0027), (2, 500, \u00272022-01-05 08:00:00\u0027);", + "sql": "SELECT route_id, AVG(delay) as Average_Delay FROM delays GROUP BY route_id HAVING COUNT(*) \u003e 5;", + "sql_explanation": "Calculates the average delay for each route with more than 5 records in the \u0027delays\u0027 table. Uses the AVG function and GROUP BY clause, and filters with HAVING." +}, { + "id": "3761", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many routes are there in the London public transportation system for each vehicle type?", + "sql_context": "CREATE TABLE london_transport (route_id INT, company VARCHAR(20), vehicle_type VARCHAR(10)); INSERT INTO london_transport (route_id, company, vehicle_type) VALUES (1, \u0027Bus Company\u0027, \u0027Bus\u0027), (2, \u0027Bus Company\u0027, \u0027Bus\u0027), (3, \u0027Train Company\u0027, \u0027Train\u0027), (4, \u0027Train Company\u0027, \u0027Train\u0027);", + "sql": "SELECT vehicle_type, COUNT(DISTINCT route_id) FROM london_transport GROUP BY vehicle_type;", + "sql_explanation": "Count the number of distinct routes for each vehicle type by grouping by \u0027vehicle_type\u0027 and counting the distinct \u0027route_id\u0027 values." +}, { + "id": "4117", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many accessible vehicles are there for each type in the fleet?", + "sql_context": "CREATE TABLE fleet (vehicle_id INT, type VARCHAR(50), accessibility BOOLEAN); INSERT INTO fleet VALUES (1, \u0027Bus\u0027, TRUE), (2, \u0027Bus\u0027, FALSE), (3, \u0027Tram\u0027, TRUE), (4, \u0027Tram\u0027, TRUE);", + "sql": "SELECT type, COUNT(*) FILTER (WHERE accessibility \u003d TRUE) FROM fleet GROUP BY type;", + "sql_explanation": "This query calculates the number of accessible vehicles for each type by filtering for accessible vehicles (accessibility \u003d TRUE) and grouping the result by vehicle type." +}, { + "id": "4200", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total distance traveled by bus and tram routes?", + "sql_context": "CREATE TABLE routes (id INT, name VARCHAR(50), type VARCHAR(10), length DECIMAL(5,2)); INSERT INTO routes (id, name, type, length) VALUES (1, \u0027Line 1A\u0027, \u0027Bus\u0027, 12.3), (2, \u0027Line 2B\u0027, \u0027Tram\u0027, 15.8);", + "sql": "SELECT type, SUM(length) FROM routes WHERE type IN (\u0027Bus\u0027, \u0027Tram\u0027) GROUP BY type;", + "sql_explanation": "Sum the length of bus and tram routes by filtering the routes table where type is either \u0027Bus\u0027 or \u0027Tram\u0027 and grouping by type." +}, { + "id": "4249", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total fare collected for each station in the \u0027stations\u0027 table?", + "sql_context": "CREATE TABLE stations (station_id INT, station_name VARCHAR(255), num_platforms INT, daily_passengers INT, fare FLOAT);", + "sql": "SELECT station_name, SUM(fare) as total_fare FROM stations GROUP BY station_name;", + "sql_explanation": "The SQL query calculates the total fare collected for each station by summing the \u0027fare\u0027 column and grouping by \u0027station_name\u0027." +}, { + "id": "4411", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many emergency maintenance records are there for each line?", + "sql_context": "CREATE TABLE maintenance (line VARCHAR(10), date DATE, type VARCHAR(20)); INSERT INTO maintenance (line, date, type) VALUES (\u0027red\u0027, \u00272022-01-01\u0027, \u0027routine\u0027), (\u0027red\u0027, \u00272022-02-01\u0027, \u0027emergency\u0027), (\u0027blue\u0027, \u00272022-03-01\u0027, \u0027routine\u0027), (\u0027yellow\u0027, \u00272022-04-01\u0027, \u0027emergency\u0027), (\u0027yellow\u0027, \u00272022-05-01\u0027, \u0027emergency\u0027);", + "sql": "SELECT line, COUNT(*) FROM maintenance WHERE type \u003d \u0027emergency\u0027 GROUP BY line;", + "sql_explanation": "This query calculates the number of emergency maintenance records for each line by selecting the count of all records where the type is \u0027emergency\u0027 and grouping by line." +}, { + "id": "1862", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of animals of each species in protected areas, ordered by the number of animals in descending order?", + "sql_context": "CREATE TABLE AnimalProtectedAreas (Species VARCHAR(255), Area VARCHAR(255), Animals INT); INSERT INTO AnimalProtectedAreas (Species, Area, Animals) VALUES (\u0027Giraffe\u0027, \u0027NationalPark\u0027, 50), (\u0027Giraffe\u0027, \u0027Reserve\u0027, 20), (\u0027Lion\u0027, \u0027NationalPark\u0027, 100), (\u0027Lion\u0027, \u0027Reserve\u0027, 30), (\u0027Elephant\u0027, \u0027NationalPark\u0027, 25), (\u0027Elephant\u0027, \u0027Reserve\u0027, 15);", + "sql": "SELECT Species, SUM(Animals) as TotalAnimals FROM AnimalProtectedAreas WHERE Area \u003d \u0027NationalPark\u0027 GROUP BY Species ORDER BY TotalAnimals DESC;", + "sql_explanation": "Calculate the number of animals of each species in protected areas using the SUM function and group them by species with the GROUP BY clause. Then, filter the results to only include protected areas with the WHERE clause. Finally, order the results by the total number of animals in descending order with the ORDER BY clause." +}, { + "id": "1887", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 5 animal species with the highest conservation spending.", + "sql_context": "CREATE TABLE conservation_funding (id INT, animal_species VARCHAR(255), spending FLOAT, year INT);", + "sql": "SELECT animal_species, SUM(spending) as total_spending FROM conservation_funding GROUP BY animal_species ORDER BY total_spending DESC LIMIT 5;", + "sql_explanation": "Calculate the total conservation spending per animal species by grouping the conservation_funding table by animal_species and finding the sum of spending. Then, order by total_spending in descending order and limit the results to the top 5." +}, { + "id": "2144", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many endangered animals are there in total, by type, in Asia?", + "sql_context": "CREATE TABLE Animals (AnimalID INT, AnimalName VARCHAR(50), Population INT, Habitat VARCHAR(50), Status VARCHAR(20)); INSERT INTO Animals (AnimalID, AnimalName, Population, Habitat, Status) VALUES (1, \u0027Tiger\u0027, 3890, \u0027Asia\u0027, \u0027Endangered\u0027); INSERT INTO Animals (AnimalID, AnimalName, Population, Habitat, Status) VALUES (2, \u0027Giant Panda\u0027, 1864, \u0027Asia\u0027, \u0027Endangered\u0027);", + "sql": "SELECT Status, AnimalName, SUM(Population) FROM Animals WHERE Habitat \u003d \u0027Asia\u0027 AND Status \u003d \u0027Endangered\u0027 GROUP BY Status, AnimalName;", + "sql_explanation": "The SQL query calculates the total population of endangered animals (Tiger and Giant Panda) in Asia by summing the Population column for the corresponding rows, grouped by Status and AnimalName." +}, { + "id": "2212", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of endangered animals in the animal_population table for each region?", + "sql_context": "CREATE TABLE animal_population (id INT, region VARCHAR(50), population INT, endangered BOOLEAN);", + "sql": "SELECT region, (COUNT(*) FILTER (WHERE endangered \u003d TRUE)) * 100.0 / COUNT(*) AS percentage FROM animal_population GROUP BY region;", + "sql_explanation": "This query calculates the percentage of endangered animals for each region in the animal_population table by using the COUNT function with the FILTER clause to count the number of endangered animals (endangered \u003d TRUE) for each region, and dividing this by the total number of animals for each region (COUNT(*)). The result is multiplied by 100 to convert it to a percentage. The GROUP BY clause groups the results by region, so the query returns a separate percentage for each region." +}, { + "id": "3528", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of animals in each conservation status", + "sql_context": "CREATE TABLE animals (id INT, name VARCHAR(255), conservation_status VARCHAR(255)); INSERT INTO animals (id, name, conservation_status) VALUES (1, \u0027Tiger\u0027, \u0027Endangered\u0027), (2, \u0027Elephant\u0027, \u0027Vulnerable\u0027), (3, \u0027Rhinoceros\u0027, \u0027Critically Endangered\u0027), (4, \u0027Panda\u0027, \u0027Vulnerable\u0027), (5, \u0027Giraffe\u0027, \u0027Least Concern\u0027);", + "sql": "SELECT conservation_status, COUNT(*) AS animal_count FROM animals GROUP BY conservation_status;", + "sql_explanation": "This query groups the \u0027animals\u0027 table by the \u0027conservation_status\u0027 column and counts the number of animals for each conservation status." +}, { + "id": "3550", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all continents and the number of animal species they have in the \u0027habitat_preservation\u0027 table", + "sql_context": "CREATE TABLE habitat_preservation (id INT, animal_species VARCHAR(50), population INT, continent VARCHAR(50)); INSERT INTO habitat_preservation (id, animal_species, population, continent) VALUES (1, \u0027Tiger\u0027, 2000, \u0027Asia\u0027), (2, \u0027Elephant\u0027, 5000, \u0027Africa\u0027), (3, \u0027Giraffe\u0027, 8000, \u0027Africa\u0027), (4, \u0027Kangaroo\u0027, 9000, \u0027Australia\u0027), (5, \u0027Panda\u0027, 1000, \u0027Asia\u0027);", + "sql": "SELECT continent, COUNT(DISTINCT animal_species) FROM habitat_preservation GROUP BY continent;", + "sql_explanation": "This query lists all continents and the number of animal species they have by selecting the \u0027continent\u0027 and counting the number of distinct \u0027animal_species\u0027 in the \u0027habitat_preservation\u0027 table, grouping the results by \u0027continent\u0027." +}, { + "id": "3834", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of animals in protected habitats for each species and region?", + "sql_context": "CREATE TABLE ProtectedHabitats (id INT, animal_id INT, species VARCHAR(255), size FLOAT, region VARCHAR(255)); INSERT INTO ProtectedHabitats (id, animal_id, species, size, region) VALUES (1, 1, \u0027Lion\u0027, 5.6, \u0027Africa\u0027), (2, 2, \u0027Elephant\u0027, 3.2, \u0027Asia\u0027), (3, 3, \u0027Tiger\u0027, 7.8, \u0027Africa\u0027);", + "sql": "SELECT species, region, COUNT(animal_id) FROM ProtectedHabitats GROUP BY species, region;", + "sql_explanation": "This SQL query counts the number of animals (COUNT(animal_id)) in protected habitats for each species (GROUP BY species) and region (GROUP BY region)." +}, { + "id": "3980", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many animals of each species were adopted in 2019?", + "sql_context": "CREATE TABLE AnimalPopulation(Year INT, Species VARCHAR(20), Animals INT); INSERT INTO AnimalPopulation VALUES (2019, \u0027Tiger\u0027, 15), (2019, \u0027Lion\u0027, 25), (2019, \u0027Elephant\u0027, 30), (2019, \u0027Giraffe\u0027, 20);", + "sql": "SELECT Species, SUM(Animals) FROM AnimalPopulation WHERE Year \u003d 2019 GROUP BY Species;", + "sql_explanation": "The SQL query uses the SUM() function to add up the number of animals of each species adopted in 2019 and the GROUP BY clause to group the results by species." +}, { + "id": "4124", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average population size for each animal species in the wetlands?", + "sql_context": "CREATE TABLE habitat (id INT, type VARCHAR(50), animals INT, species VARCHAR(50)); INSERT INTO habitat (id, type, animals, species) VALUES (1, \u0027Wetlands\u0027, 150, \u0027Alligator\u0027), (2, \u0027Wetlands\u0027, 100, \u0027Turtle\u0027), (3, \u0027Wetlands\u0027, 120, \u0027Crane\u0027);", + "sql": "SELECT species, AVG(animals) FROM habitat WHERE type \u003d \u0027Wetlands\u0027 GROUP BY species;", + "sql_explanation": "This SQL query filters the habitat table to only include rows with the \u0027Wetlands\u0027 type, and then calculates the average population size for each animal species using the AVG function and the GROUP BY statement." +}, { + "id": "4242", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of animals adopted by each community?", + "sql_context": "CREATE TABLE CommunityEducation(Community VARCHAR(20), AnimalsAdopted INT); INSERT INTO CommunityEducation VALUES (\u0027CommunityA\u0027, 35), (\u0027CommunityB\u0027, 28), (\u0027CommunityC\u0027, 42);", + "sql": "SELECT Community, SUM(AnimalsAdopted) FROM CommunityEducation GROUP BY Community;", + "sql_explanation": "The SQL query uses the SUM() function to add up the number of animals adopted by each community and the GROUP BY clause to group the results by community." +}, { + "id": "4332", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average animal population for each continent in the \u0027habitat_preservation\u0027 table", + "sql_context": "CREATE TABLE habitat_preservation (id INT, animal_species VARCHAR(50), population INT, continent VARCHAR(50)); INSERT INTO habitat_preservation (id, animal_species, population, continent) VALUES (1, \u0027Tiger\u0027, 2000, \u0027Asia\u0027), (2, \u0027Elephant\u0027, 5000, \u0027Africa\u0027), (3, \u0027Giraffe\u0027, 8000, \u0027Africa\u0027), (4, \u0027Kangaroo\u0027, 9000, \u0027Australia\u0027);", + "sql": "SELECT continent, AVG(population) FROM habitat_preservation GROUP BY continent;", + "sql_explanation": "This query calculates the average animal population for each continent by selecting the \u0027continent\u0027 and finding the average of the \u0027population\u0027 column in the \u0027habitat_preservation\u0027 table, grouping the results by \u0027continent\u0027." +}, { + "id": "4546", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total size of protected habitats for each animal type?", + "sql_context": "CREATE TABLE Protected_Habitats (id INT, animal_type VARCHAR(50), size INT);", + "sql": "SELECT animal_type, SUM(size) FROM Protected_Habitats GROUP BY animal_type;", + "sql_explanation": "This query calculates the total size of protected habitats for each animal type. It groups the records by animal_type and then computes the sum of size for each group." +}, { + "id": "4621", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many education programs were conducted in each region?", + "sql_context": "CREATE TABLE education_programs (region TEXT, program_count INTEGER); INSERT INTO education_programs (region, program_count) VALUES (\u0027North\u0027, 15), (\u0027South\u0027, 20), (\u0027East\u0027, 10), (\u0027West\u0027, 25);", + "sql": "SELECT region, SUM(program_count) FROM education_programs GROUP BY region;", + "sql_explanation": "This SQL query calculates the total number of education programs conducted in each region. It uses the GROUP BY clause to group the records by region and the SUM function to add up the program_count for each group." +}, { + "id": "4702", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Count the number of community education programs for each country", + "sql_context": "CREATE TABLE community_education (id INT, country VARCHAR(50), program VARCHAR(50)); INSERT INTO community_education (id, country, program) VALUES (1, \u0027Brazil\u0027, \u0027Rainforest Awareness\u0027), (2, \u0027Kenya\u0027, \u0027Wildlife Conservation\u0027), (3, \u0027Canada\u0027, \u0027Polar Bear Protection\u0027), (4, \u0027Brazil\u0027, \u0027Amazon Conservation\u0027);", + "sql": "SELECT country, COUNT(program) FROM community_education GROUP BY country;", + "sql_explanation": "This query counts the number of community education programs for each country by selecting the \u0027country\u0027 and counting the number of \u0027program\u0027 entries in the \u0027community_education\u0027 table, grouping the results by \u0027country\u0027." +}, { + "id": "5184", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average daily intake for each animal food type in the wildlife conservation domain?", + "sql_context": "CREATE TABLE if not exists animal_diets (id INT, animal_name VARCHAR, food VARCHAR, daily_intake INT); INSERT INTO animal_diets (id, animal_name, food, daily_intake) VALUES (1, \u0027Elephant\u0027, \u0027Hay\u0027, 200), (2, \u0027Giraffe\u0027, \u0027Leaves\u0027, 100), (3, \u0027Tiger\u0027, \u0027Meat\u0027, 300);", + "sql": "SELECT food, AVG(daily_intake) FROM animal_diets GROUP BY food;", + "sql_explanation": "Calculate the average daily intake for each animal food type in the wildlife conservation domain." +}, { + "id": "853", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of chatbot interactions for each interaction type in hotels located in the Asia-Pacific region?", + "sql_context": "CREATE TABLE chatbot_interactions (id INT PRIMARY KEY, hotel_name VARCHAR(50), interaction_type VARCHAR(50), interaction_count INT, hotel_location VARCHAR(50)); INSERT INTO chatbot_interactions (id, hotel_name, interaction_type, interaction_count, hotel_location) VALUES (1, \u0027Pacific Place\u0027, \u0027Greeting\u0027, 75, \u0027Asia-Pacific\u0027), (2, \u0027Asian Palace\u0027, \u0027Question about amenities\u0027, 80, \u0027Asia-Pacific\u0027);", + "sql": "SELECT hotel_location, interaction_type, MAX(interaction_count) FROM chatbot_interactions WHERE hotel_location \u003d \u0027Asia-Pacific\u0027 GROUP BY hotel_location, interaction_type HAVING COUNT(*) \u003e 1;", + "sql_explanation": "This SQL query calculates the maximum number of chatbot interactions for each interaction type in hotels located in the Asia-Pacific region. It uses the WHERE clause to filter the data for the Asia-Pacific region, the GROUP BY clause to group the data by hotel_location and interaction_type, and the HAVING clause to ensure only rows with more than one entry are included in the result set." +}, { + "id": "1813", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue for each hotel category in Q1 2022?", + "sql_context": "CREATE TABLE hotel_revenue (hotel_category VARCHAR(20), revenue DECIMAL(10,2), date DATE); INSERT INTO hotel_revenue (hotel_category, revenue, date) VALUES (\u00275 Star\u0027, 15000, \u00272022-01-01\u0027), (\u00275 Star\u0027, 16000, \u00272022-01-02\u0027), (\u00274 Star\u0027, 12000, \u00272022-01-01\u0027), (\u00274 Star\u0027, 12500, \u00272022-01-02\u0027);", + "sql": "SELECT hotel_category, SUM(revenue) as total_revenue FROM hotel_revenue WHERE date BETWEEN \u00272022-01-01\u0027 AND \u00272022-03-31\u0027 GROUP BY hotel_category;", + "sql_explanation": "This query calculates the total revenue for each hotel category in Q1 2022. It sums the revenue for each hotel category and groups them by hotel_category." +}, { + "id": "2227", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average hotel rating for hotels in the USA and Canada, ordered by rating in descending order?", + "sql_context": "CREATE TABLE hotels (hotel_id INT, hotel_name TEXT, country TEXT, rating FLOAT); INSERT INTO hotels (hotel_id, hotel_name, country, rating) VALUES (1, \u0027Hotel A\u0027, \u0027USA\u0027, 4.5), (2, \u0027Hotel B\u0027, \u0027Canada\u0027, 4.7), (3, \u0027Hotel C\u0027, \u0027Mexico\u0027, 4.2), (4, \u0027Hotel D\u0027, \u0027USA\u0027, 4.6), (5, \u0027Hotel E\u0027, \u0027Canada\u0027, 4.4);", + "sql": "SELECT country, AVG(rating) as avg_rating FROM hotels WHERE country IN (\u0027USA\u0027, \u0027Canada\u0027) GROUP BY country ORDER BY avg_rating DESC;", + "sql_explanation": "This query calculates the average hotel rating for hotels in the USA and Canada by filtering the hotels table to only include rows where the country is \u0027USA\u0027 or \u0027Canada\u0027 and grouping the results by country. It orders the results by the average rating in descending order." +}, { + "id": "2607", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average price per night for luxury hotels in \u0027Mumbai\u0027?", + "sql_context": "CREATE TABLE hotels (hotel_id INT, name TEXT, city TEXT, category TEXT, price_per_night FLOAT);", + "sql": "SELECT city, AVG(price_per_night) as avg_price FROM hotels WHERE city \u003d \u0027Mumbai\u0027 AND category \u003d \u0027luxury\u0027 GROUP BY city;", + "sql_explanation": "This SQL query calculates the average price per night for luxury hotels located in \u0027Mumbai\u0027 by using the AVG function on the price_per_night column. It filters the data for the city \u0027Mumbai\u0027 and the category \u0027luxury\u0027. The data is grouped by city." +}, { + "id": "2635", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of virtual tour engagements for hotels in the \u0027Americas\u0027 region for the year 2022?", + "sql_context": "CREATE TABLE virtual_tours (id INT, hotel_id INT, region TEXT, calendar DATE, engagement INT);", + "sql": "SELECT region, SUM(engagement) FROM virtual_tours WHERE region \u003d \u0027Americas\u0027 AND YEAR(calendar) \u003d 2022 GROUP BY region;", + "sql_explanation": "The SQL query calculates the total number of virtual tour engagements for hotels in the \u0027Americas\u0027 region for the year 2022 by grouping the data by region and calendar, and then summing the engagement for each group." +}, { + "id": "3233", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "AI adoption rate in \u0027Europe\u0027 and \u0027Americas\u0027?", + "sql_context": "CREATE TABLE hotel_ai (hotel_id INT, hotel_name TEXT, region TEXT, ai_adoption BOOLEAN); INSERT INTO hotel_ai (hotel_id, hotel_name, region, ai_adoption) VALUES (1, \u0027Royal Castle\u0027, \u0027Europe\u0027, true), (2, \u0027Paris Luxury Hotel\u0027, \u0027Europe\u0027, true), (3, \u0027New York Palace\u0027, \u0027Americas\u0027, true), (4, \u0027Buenos Aires Hotel\u0027, \u0027Americas\u0027, false);", + "sql": "SELECT region, AVG(ai_adoption) FROM hotel_ai GROUP BY region HAVING region IN (\u0027Europe\u0027, \u0027Americas\u0027);", + "sql_explanation": "Calculate the AI adoption rate in the \u0027Europe\u0027 and \u0027Americas\u0027 regions by using the AVG function on the \u0027ai_adoption\u0027 column, filtering the rows with the WHERE clause to only consider hotels in the specified regions and the GROUP BY clause to group the results by region, while also using the HAVING clause to limit the results to the specified regions." +}, { + "id": "3323", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the virtual tour engagement rate by city?", + "sql_context": "CREATE TABLE virtual_tours (tour_id INT, city TEXT, views INT, clicks INT); INSERT INTO virtual_tours (tour_id, city, views, clicks) VALUES (1, \u0027City A\u0027, 1000, 200), (2, \u0027City B\u0027, 1500, 300), (3, \u0027City C\u0027, 2000, 400);", + "sql": "SELECT city, (SUM(clicks) * 100.0 / SUM(views)) as engagement_rate FROM virtual_tours GROUP BY city;", + "sql_explanation": "The SQL query calculates the virtual tour engagement rate by city by using the SUM function on the clicks and views columns, grouped by the city column. The engagement rate is calculated by dividing the SUM of clicks by the SUM of views, then multiplying by 100.0 to convert the result to a percentage." +}, { + "id": "3378", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show online travel agency (OTA) commission rates for hotels in Africa.", + "sql_context": "CREATE TABLE ota_data (hotel_id INT, location VARCHAR(20), ota VARCHAR(20), commission DECIMAL(5,2));", + "sql": "SELECT ota, AVG(commission) as avg_commission FROM ota_data WHERE location \u003d \u0027Africa\u0027 GROUP BY ota", + "sql_explanation": "Query ota_data table, filter for Africa and calculate average commission for each OTA, grouped by OTA name." +}, { + "id": "4094", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of listings per hotel in \u0027APAC\u0027?", + "sql_context": "CREATE TABLE ota_hotels (hotel_id INT, hotel_name TEXT, country TEXT, listings INT); INSERT INTO ota_hotels (hotel_id, hotel_name, country, listings) VALUES (1, \u0027Marina Bay Sands\u0027, \u0027Singapore\u0027, 900), (2, \u0027Grand Hyatt Tokyo\u0027, \u0027Japan\u0027, 800), (3, \u0027Four Seasons Mumbai\u0027, \u0027India\u0027, 700), (4, \u0027The Landmark Mandarin Oriental\u0027, \u0027Hong Kong\u0027, 950);", + "sql": "SELECT region, MAX(listings) FROM ota_hotels WHERE region \u003d \u0027APAC\u0027 GROUP BY region;", + "sql_explanation": "Calculate the maximum number of listings per hotel in the \u0027APAC\u0027 region by using the MAX function on the \u0027listings\u0027 column, filtering the rows with the WHERE clause to only consider hotels in the \u0027APAC\u0027 region and the GROUP BY clause to group the results by region." +}, { + "id": "4205", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many users engaged with virtual tours in each region?", + "sql_context": "CREATE TABLE virtual_tours (tour_id INT, tour_name TEXT, region TEXT, user_count INT); INSERT INTO virtual_tours (tour_id, tour_name, region, user_count) VALUES (1, \u0027Tour 1\u0027, \u0027North America\u0027, 250), (2, \u0027Tour 2\u0027, \u0027Europe\u0027, 300), (3, \u0027Tour 3\u0027, \u0027Asia\u0027, 150);", + "sql": "SELECT region, SUM(user_count) as total_users FROM virtual_tours GROUP BY region;", + "sql_explanation": "This SQL query calculates the total number of users who engaged with virtual tours in each region in the virtual_tours table. It groups the records by the region column and then calculates the sum of user_count for each group." +}, { + "id": "4772", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average hotel rating for each country?", + "sql_context": "CREATE TABLE hotels (hotel_id INT, hotel_name TEXT, country TEXT, rating FLOAT); INSERT INTO hotels (hotel_id, hotel_name, country, rating) VALUES (1, \u0027Hotel X\u0027, \u0027USA\u0027, 4.5), (2, \u0027Hotel Y\u0027, \u0027Canada\u0027, 4.2), (3, \u0027Hotel Z\u0027, \u0027Mexico\u0027, 4.7);", + "sql": "SELECT country, AVG(rating) as avg_rating FROM hotels GROUP BY country;", + "sql_explanation": "The SQL query calculates the average rating for hotels in each country by using the AVG function on the rating column, grouped by the country column." +}, { + "id": "4965", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the average engagement time for virtual tours in each city.", + "sql_context": "CREATE TABLE virtual_tours (tour_id INT, city TEXT, engagement_time FLOAT); INSERT INTO virtual_tours (tour_id, city, engagement_time) VALUES (1, \u0027Tokyo\u0027, 15.5), (2, \u0027Tokyo\u0027, 12.3), (3, \u0027Osaka\u0027, 18.1);", + "sql": "SELECT city, AVG(engagement_time) FROM virtual_tours GROUP BY city;", + "sql_explanation": "This SQL query calculates the average engagement time for virtual tours in each city. It first groups the virtual_tours table by city, then calculates the average engagement_time for the matching rows for each group." +}, { + "id": "5216", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue for each region in the \u0027Region_Sales\u0027 table?", + "sql_context": "CREATE TABLE Region_Sales (region TEXT, revenue FLOAT); INSERT INTO Region_Sales (region, revenue) VALUES (\u0027North\u0027, 50000), (\u0027South\u0027, 60000);", + "sql": "SELECT region, SUM(revenue) FROM Region_Sales GROUP BY region;", + "sql_explanation": "The SQL query calculates the total revenue for each region in the \u0027Region_Sales\u0027 table by using the SUM() function and GROUP BY clause." +}, { + "id": "971", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of AI safety incidents and their impact level, partitioned by incident location, ordered by impact level in ascending order?", + "sql_context": "CREATE TABLE ai_safety_incidents_location (incident_id INT, incident_location VARCHAR(50), impact_level DECIMAL(3,2)); INSERT INTO ai_safety_incidents_location (incident_id, incident_location, impact_level) VALUES (1, \u0027North America\u0027, 0.65), (2, \u0027Europe\u0027, 0.75), (3, \u0027Asia\u0027, 0.85), (4, \u0027Africa\u0027, 0.95);", + "sql": "SELECT incident_location, COUNT(*) as num_incidents, MIN(impact_level) as min_impact_level FROM ai_safety_incidents_location GROUP BY incident_location ORDER BY min_impact_level ASC;", + "sql_explanation": "The SQL query calculates the number of AI safety incidents and their minimum impact level for each incident location using the COUNT and MIN functions, groups the results by incident location using the GROUP BY clause, and orders the results in ascending order based on the minimum impact level using the ORDER BY clause." +}, { + "id": "1315", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average algorithmic fairness score for AI applications in the African continent?", + "sql_context": "CREATE TABLE ai_applications (app_id INT, app_name TEXT, algorithmic_fairness_score DECIMAL(3,2), app_country TEXT); INSERT INTO ai_applications (app_id, app_name, algorithmic_fairness_score, app_country) VALUES (1, \u0027AI Diagnostics\u0027, 8.2, \u0027Egypt\u0027), (2, \u0027AI Fraud Detection\u0027, 9.1, \u0027South Africa\u0027), (3, \u0027AI HR Analytics\u0027, 6.8, \u0027Nigeria\u0027);", + "sql": "SELECT app_country, AVG(algorithmic_fairness_score) as avg_score FROM ai_applications WHERE app_country IN (\u0027Egypt\u0027, \u0027South Africa\u0027, \u0027Nigeria\u0027) GROUP BY app_country;", + "sql_explanation": "This query calculates the average algorithmic fairness score for AI applications in the African continent. It does so by grouping the ai_applications table by the app_country column and filtering the results for countries in the African continent. It then calculates the average algorithmic fairness score for each group." +}, { + "id": "1394", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many AI safety incidents were reported by each organization in the last year?", + "sql_context": "CREATE TABLE incidents (id INT, date DATE, organization TEXT, type TEXT);", + "sql": "SELECT organization, COUNT(*) as num_incidents FROM incidents WHERE date \u003e\u003d (CURRENT_DATE - INTERVAL \u00271 year\u0027) GROUP BY organization ORDER BY num_incidents DESC;", + "sql_explanation": "The SQL query shows the number of AI safety incidents reported by each organization in the last year. It uses the EXTRACT function to extract the month from the date field and calculates the count (COUNT) of rows for each organization, grouped by organization. The result set is ordered by num_incidents in descending order. The WHERE clause filters the rows to only include incidents that occurred in the last year (from the current date minus one year)." +}, { + "id": "1564", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of AI patents filed by companies in the European Union, excluding the United Kingdom, from 2015 to 2020?", + "sql_context": "CREATE TABLE ai_patents (company VARCHAR(255), country VARCHAR(255), year INT, num_patents INT); INSERT INTO ai_patents (company, country, year, num_patents) VALUES (\u0027Google\u0027, \u0027Germany\u0027, 2015, 50), (\u0027Microsoft\u0027, \u0027France\u0027, 2016, 75), (\u0027IBM\u0027, \u0027Italy\u0027, 2017, 60), (\u0027Amazon\u0027, \u0027Spain\u0027, 2018, 80), (\u0027Facebook\u0027, \u0027Ireland\u0027, 2019, 90), (\u0027Alphabet\u0027, \u0027Netherlands\u0027, 2020, 100);", + "sql": "SELECT SUM(num_patents) as total_patents, country FROM ai_patents WHERE country NOT IN (\u0027United Kingdom\u0027) AND year BETWEEN 2015 AND 2020 GROUP BY country;", + "sql_explanation": "The SQL query calculates the total number of AI patents filed by companies in the European Union, excluding the United Kingdom, from 2015 to 2020. It starts by filtering out rows with the country \u0027United Kingdom\u0027 using the WHERE clause. Then, it filters rows with years between 2015 and 2020 using the BETWEEN operator. After that, it groups the remaining rows by the \u0027country\u0027 column using the GROUP BY clause. Finally, it calculates the total number of patents for each group using the SUM() function." +}, { + "id": "1597", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum, minimum and average fairness score of the models developed by different organizations?", + "sql_context": "CREATE TABLE fairness_scores (model_id INT, org_id INT, fairness_score FLOAT); INSERT INTO fairness_scores (model_id, org_id, fairness_score) VALUES (101, 1, 0.75), (102, 1, 0.85), (103, 2, 0.95), (104, 2, 0.9), (105, 3, 0.8);", + "sql": "SELECT org_id, MAX(fairness_score) as max_score, MIN(fairness_score) as min_score, AVG(fairness_score) as avg_score FROM fairness_scores GROUP BY org_id;", + "sql_explanation": "This query calculates the maximum, minimum, and average fairness score for each organization by grouping the records based on the org_id and then calculating the maximum, minimum, and average of fairness_score for each group." +}, { + "id": "1778", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 3 countries with the highest number of models developed by underrepresented communities.", + "sql_context": "CREATE TABLE models_underrepresented (model_id INT, country TEXT, community TEXT); INSERT INTO models_underrepresented (model_id, country, community) VALUES (101, \u0027USA\u0027, \u0027African American\u0027), (102, \u0027USA\u0027, \u0027Hispanic\u0027), (103, \u0027Canada\u0027, \u0027First Nations\u0027), (104, \u0027USA\u0027, \u0027Asian American\u0027), (105, \u0027India\u0027, \u0027Dalit\u0027);", + "sql": "SELECT country, COUNT(*) as num_models FROM models_underrepresented WHERE community IS NOT NULL GROUP BY country ORDER BY num_models DESC LIMIT 3;", + "sql_explanation": "This query retrieves the top 3 countries with the highest number of models developed by underrepresented communities by grouping the records based on the country and the community, filtering the records where the community is not null, and then sorting the results based on the number of models in descending order and limiting the results to the top 3 records." +}, { + "id": "1893", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the most common algorithmic fairness issues in Southeast Asian countries?", + "sql_context": "CREATE TABLE fairness_issues (country VARCHAR(255), issue VARCHAR(255)); INSERT INTO fairness_issues (country, issue) VALUES (\u0027Country1\u0027, \u0027Issue1\u0027); INSERT INTO fairness_issues (country, issue) VALUES (\u0027Country2\u0027, \u0027Issue2\u0027); INSERT INTO fairness_issues (country, issue) VALUES (\u0027Country3\u0027, \u0027Issue3\u0027);", + "sql": "SELECT issue, COUNT(*) as num_countries FROM fairness_issues WHERE country LIKE \u0027%Southeast Asia%\u0027 GROUP BY issue ORDER BY num_countries DESC;", + "sql_explanation": "The SQL query selects the issue and counts the number of countries from the fairness_issues table, filters the rows where the country is in Southeast Asia, groups the result by issue, and orders the result by the number of countries in descending order." +}, { + "id": "2072", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average safety score for each AI algorithm, ranked by the highest average?", + "sql_context": "CREATE TABLE ai_algorithms (ai_algorithm_id INT, algorithm_name VARCHAR(50), safety_score FLOAT); INSERT INTO ai_algorithms (ai_algorithm_id, algorithm_name, safety_score) VALUES (1, \u0027Algorithm A\u0027, 0.8), (2, \u0027Algorithm B\u0027, 0.9), (3, \u0027Algorithm C\u0027, 0.7);", + "sql": "SELECT algorithm_name, AVG(safety_score) as avg_safety_score FROM ai_algorithms GROUP BY algorithm_name ORDER BY avg_safety_score DESC;", + "sql_explanation": "The SQL query calculates the average safety score for each AI algorithm using the AVG function and groups them by algorithm_name. It then orders the results by the average safety score in descending order." +}, { + "id": "2121", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average number of AI safety research papers published per month in the past year", + "sql_context": "CREATE TABLE research_papers (id INT, publication_year INT, publication_month INT, topic VARCHAR(255)); INSERT INTO research_papers (id, publication_year, publication_month, topic) VALUES (1, 2021, 1, \u0027AI Safety\u0027), (2, 2021, 2, \u0027AI Safety\u0027), (3, 2021, 3, \u0027AI Safety\u0027), (4, 2021, 4, \u0027AI Safety\u0027), (5, 2021, 5, \u0027AI Safety\u0027), (6, 2021, 6, \u0027AI Safety\u0027), (7, 2021, 7, \u0027AI Safety\u0027), (8, 2021, 8, \u0027AI Safety\u0027), (9, 2021, 9, \u0027AI Safety\u0027), (10, 2021, 10, \u0027AI Safety\u0027), (11, 2021, 11, \u0027AI Safety\u0027), (12, 2021, 12, \u0027AI Safety\u0027);", + "sql": "SELECT AVG(COUNT(*)) FROM research_papers WHERE topic \u003d \u0027AI Safety\u0027 AND publication_year \u003d YEAR(NOW()) - 1 GROUP BY publication_month;", + "sql_explanation": "1. Filter research_papers rows with the topic \u0027AI Safety\u0027 and publication_year equal to the year before the current year. 2. Group the resulting rows by publication_month. 3. For each group, calculate the count of rows. 4. Find the average of those counts." +}, { + "id": "2291", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many AI safety incidents were recorded for each month in the \u0027safety_incidents\u0027 table?", + "sql_context": "CREATE TABLE safety_incidents (incident_date DATE, incident_type VARCHAR(20), incident_count INT); INSERT INTO safety_incidents (incident_date, incident_type, incident_count) VALUES (\u00272022-01-01\u0027, \u0027autonomous_vehicle\u0027, 3), (\u00272022-01-05\u0027, \u0027AI_assistant\u0027, 1), (\u00272022-02-10\u0027, \u0027autonomous_vehicle\u0027, 2);", + "sql": "SELECT DATE_FORMAT(incident_date, \u0027%Y-%m\u0027) as month, SUM(incident_count) as total_incidents FROM safety_incidents GROUP BY month;", + "sql_explanation": "The SQL query calculates the number of AI safety incidents for each month in the \u0027safety_incidents\u0027 table. It does this by extracting the month from the \u0027incident_date\u0027 values using the DATE_FORMAT function and grouping the table by this value. It then calculates the sum of the \u0027incident_count\u0027 values for each group." +}, { + "id": "2411", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of safety incidents for AI applications across various countries?", + "sql_context": " CREATE TABLE safety_incidents (id INT, ai_application VARCHAR(50), country VARCHAR(50), incident_count INT); INSERT INTO safety_incidents (id, ai_application, country, incident_count) VALUES (1, \u0027Autonomous Vehicles\u0027, \u0027USA\u0027, 3), (2, \u0027Healthcare\u0027, \u0027Canada\u0027, 2), (3, \u0027Finance\u0027, \u0027Mexico\u0027, 1);", + "sql": " SELECT country, ai_application, AVG(incident_count) as avg_incidents FROM safety_incidents GROUP BY country, ai_application;", + "sql_explanation": "This query calculates the average number of safety incidents for AI applications across various countries. It groups the safety_incidents table by the country and ai_application columns, and then calculates the average of the incident_count column for each group using the AVG function. The results are then displayed with the country, ai_application, and the average number of incidents (avg_incidents)." +}, { + "id": "2640", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum satisfaction score for models trained on dataset A, for each region, excluding North America?", + "sql_context": "CREATE TABLE models (id INT, dataset VARCHAR(20), satisfaction FLOAT, region VARCHAR(20)); INSERT INTO models VALUES (1, \u0027datasetA\u0027, 4.3, \u0027Europe\u0027), (2, \u0027datasetA\u0027, 4.5, \u0027Asia\u0027), (3, \u0027datasetB\u0027, 3.9, \u0027Africa\u0027), (4, \u0027datasetB\u0027, 4.1, \u0027Africa\u0027), (5, \u0027datasetA\u0027, 4.2, \u0027North America\u0027);", + "sql": "SELECT region, MIN(satisfaction) FROM models WHERE dataset \u003d \u0027datasetA\u0027 AND region !\u003d \u0027North America\u0027 GROUP BY region;", + "sql_explanation": "This query uses the MIN function to find the minimum satisfaction score for models trained on dataset A, broken down by region and excluding North America." +}, { + "id": "2660", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of explainable AI methodologies published per year.", + "sql_context": "CREATE TABLE publications (id INT, title TEXT, date DATE, methodology TEXT); CREATE VIEW explainable_ai AS SELECT * FROM publications WHERE methodology IS NOT NULL;", + "sql": "SELECT EXTRACT(YEAR FROM date) as year, COUNT(*) as num_publications FROM explainable_ai GROUP BY year ORDER BY year;", + "sql_explanation": "The SQL query shows the number of explainable AI methodologies published per year. It first creates a view named explainable_ai that selects all rows from the publications table where the methodology field is not null. The final SELECT statement extracts (EXTRACT) the year from the date field and calculates the count (COUNT) of rows for each year, grouped by year. The result set is ordered by year." +}, { + "id": "2862", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many safety incidents occurred for each AI model, ordered by the number of incidents in descending order?", + "sql_context": "CREATE TABLE safety_incidents (incident_id INT PRIMARY KEY, model_id INT, incident_date DATE, FOREIGN KEY (model_id) REFERENCES ai_models(model_id)); INSERT INTO safety_incidents (incident_id, model_id, incident_date) VALUES (1, 1, \u00272021-01-01\u0027), (2, 2, \u00272021-02-01\u0027), (3, 1, \u00272021-03-01\u0027), (4, 3, \u00272021-04-01\u0027), (5, 2, \u00272021-05-01\u0027);", + "sql": "SELECT model_id, COUNT(*) as num_incidents FROM safety_incidents GROUP BY model_id ORDER BY num_incidents DESC;", + "sql_explanation": "This SQL query calculates the number of safety incidents for each AI model and orders the results in descending order by the number of incidents. It does this by grouping the records in the safety_incidents table by the model_id column and then counting the number of records in each group. The results are then ordered in descending order by the count of records." +}, { + "id": "2963", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average satisfaction score for creative AI applications, grouped by the target user demographic?", + "sql_context": "CREATE TABLE creative_ai_satisfaction (id INT, app_name VARCHAR(50), target_demographic VARCHAR(50), satisfaction_score INT); INSERT INTO creative_ai_satisfaction (id, app_name, target_demographic, satisfaction_score) VALUES (1, \u0027Artbreeder\u0027, \u0027Artists\u0027, 85), (2, \u0027Daz 3D\u0027, \u00273D Artists\u0027, 90), (3, \u0027Runway ML\u0027, \u0027Content Creators\u0027, 95);", + "sql": "SELECT target_demographic, AVG(satisfaction_score) FROM creative_ai_satisfaction GROUP BY target_demographic;", + "sql_explanation": "This SQL query calculates the average satisfaction score for creative AI applications, grouped by the target user demographic. It uses the GROUP BY clause to group the data by target_demographic, and the AVG function to calculate the average satisfaction score for each group." +}, { + "id": "3381", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total number of artworks created by each AI artist for each genre in the Creative AI Artworks database?", + "sql_context": "CREATE TABLE creative_ai_artworks (id INT PRIMARY KEY, artist VARCHAR(255), genre VARCHAR(255), creation_date DATE); INSERT INTO creative_ai_artworks (id, artist, genre, creation_date) VALUES (1, \u0027AI Artist 1\u0027, \u0027Cubism\u0027, \u00272021-12-15\u0027);", + "sql": "SELECT genre, artist, COUNT(*) as total_artworks FROM creative_ai_artworks GROUP BY genre, artist;", + "sql_explanation": "This query retrieves the number of artworks created by each AI artist for each genre in the creative_ai_artworks table. The result is grouped by the genre and artist columns." +}, { + "id": "3400", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many AI safety incidents were reported by each community in 2021?", + "sql_context": "CREATE TABLE IncidentReports (report_id INT, reported_by VARCHAR(255), incident_year INT); INSERT INTO IncidentReports (report_id, reported_by, incident_year) VALUES (1, \u0027Minority Group\u0027, 2021), (2, \u0027LGBTQ+\u0027, 2021), (3, \u0027Women in Tech\u0027, 2021);", + "sql": "SELECT reported_by, COUNT(*) FROM IncidentReports WHERE incident_year \u003d 2021 GROUP BY reported_by;", + "sql_explanation": "The query groups the records by the reported_by column and counts the number of AI safety incidents reported by each community in 2021." +}, { + "id": "3416", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average satisfaction score for creative AI applications, grouped by region, for applications released in 2020?", + "sql_context": "CREATE TABLE creative_ai (app_id INT, app_name TEXT, release_year INT, region TEXT, satisfaction_score INT); INSERT INTO creative_ai (app_id, app_name, release_year, region, satisfaction_score) VALUES (1, \u0027Dreamscope\u0027, 2020, \u0027North America\u0027, 85); INSERT INTO creative_ai (app_id, app_name, release_year, region, satisfaction_score) VALUES (2, \u0027DeepArt\u0027, 2019, \u0027Europe\u0027, 80); INSERT INTO creative_ai (app_id, app_name, release_year, region, satisfaction_score) VALUES (3, \u0027Artbreeder\u0027, 2020, \u0027North America\u0027, 90);", + "sql": "SELECT region, AVG(satisfaction_score) FROM creative_ai WHERE release_year \u003d 2020 GROUP BY region;", + "sql_explanation": "This query calculates the average satisfaction score for creative AI applications released in 2020, grouped by their region. It uses the GROUP BY clause to group the records by the region column and the AVG function to calculate the average satisfaction score for each group." +}, { + "id": "3514", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average safety score for each creative AI application?", + "sql_context": "CREATE TABLE CreativeAI (id INT, application VARCHAR(255), safety_score DECIMAL(5,2)); INSERT INTO CreativeAI (id, application, safety_score) VALUES (1, \u0027Artistic Image Generation\u0027, 85.67), (2, \u0027Automated Journalism\u0027, 91.23), (3, \u0027Music Composition\u0027, 88.98);", + "sql": "SELECT application, AVG(safety_score) as avg_safety_score FROM CreativeAI GROUP BY application;", + "sql_explanation": "This SQL query calculates the average safety score for each creative AI application. It uses the AVG function to find the average of the safety_score column for each unique application." +}, { + "id": "3812", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of transactions for each gender?", + "sql_context": "CREATE TABLE Transactions (transaction_id INT, user_id INT, gender VARCHAR(10), transaction_amount DECIMAL(10,2)); INSERT INTO Transactions (transaction_id, user_id, gender, transaction_amount) VALUES (1, 101, \u0027Male\u0027, 50.00), (2, 102, \u0027Female\u0027, 75.00), (3, 103, \u0027Non-binary\u0027, 35.00), (4, 104, \u0027Male\u0027, 60.00);", + "sql": "SELECT gender, SUM(transaction_amount) as total_amount FROM Transactions GROUP BY gender;", + "sql_explanation": "This SQL query calculates the total transaction amount for each gender by grouping the Transactions table by the gender column and then calculating the sum of transaction_amount for each group." +}, { + "id": "3904", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the average evaluation scores for each model in the \u0027evaluation_scores\u0027 table?", + "sql_context": "CREATE TABLE evaluation_scores (id INT, model VARCHAR(50), evaluation_score DECIMAL(5,2));", + "sql": "SELECT model, AVG(evaluation_score) as avg_score FROM evaluation_scores GROUP BY model;", + "sql_explanation": "The SQL query groups the \u0027evaluation_scores\u0027 table by \u0027model\u0027 and calculates the average of \u0027evaluation_score\u0027 for each model, providing the average evaluation scores for each model." +}, { + "id": "4431", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Count the number of creative AI applications by type", + "sql_context": "CREATE TABLE creative_ai (application_name TEXT, application_type TEXT); INSERT INTO creative_ai (application_name, application_type) VALUES (\u0027App1\u0027, \u0027Image Generation\u0027), (\u0027App2\u0027, \u0027Text Generation\u0027), (\u0027App3\u0027, \u0027Music Generation\u0027);", + "sql": "SELECT application_type, COUNT(*) FROM creative_ai GROUP BY application_type;", + "sql_explanation": "Count the number of applications for each type in the \u0027creative_ai\u0027 table." +}, { + "id": "4632", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many creative AI applications are developed per country?", + "sql_context": "CREATE TABLE creative_ai (ai_app_id INT, app_name VARCHAR(50), country VARCHAR(50)); INSERT INTO creative_ai (ai_app_id, app_name, country) VALUES (1, \u0027AI Painter\u0027, \u0027USA\u0027), (2, \u0027AI Music Composer\u0027, \u0027Germany\u0027), (3, \u0027AI Poet\u0027, \u0027France\u0027), (4, \u0027AI Choreographer\u0027, \u0027India\u0027), (5, \u0027AI Filmmaker\u0027, \u0027Canada\u0027), (6, \u0027AI Writer\u0027, \u0027Australia\u0027), (7, \u0027AI Photographer\u0027, \u0027UK\u0027), (8, \u0027AI Sculptor\u0027, \u0027Brazil\u0027);", + "sql": "SELECT country, COUNT(*) as num_ai_apps FROM creative_ai GROUP BY country;", + "sql_explanation": "This SQL query calculates the number of creative AI applications developed per country by grouping the creative_ai table by country and finding the count of rows for each group." +}, { + "id": "4894", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of AI models across different countries?", + "sql_context": "CREATE TABLE ai_models (model_name TEXT, model_country TEXT); INSERT INTO ai_models (model_name, model_country) VALUES (\u0027ModelA\u0027, \u0027USA\u0027), (\u0027ModelB\u0027, \u0027Germany\u0027), (\u0027ModelC\u0027, \u0027China\u0027), (\u0027ModelD\u0027, \u0027Brazil\u0027);", + "sql": "SELECT model_country, COUNT(*) FROM ai_models GROUP BY model_country;", + "sql_explanation": "This query calculates the distribution of AI models across different countries. It groups the data by the model_country column and then calculates the count for each group." +}, { + "id": "5400", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many AI safety research papers were published by each author?", + "sql_context": "CREATE TABLE SafetyPapers (id INT, author VARCHAR(255), title VARCHAR(255));", + "sql": "SELECT author, COUNT(*) FROM SafetyPapers GROUP BY author;", + "sql_explanation": "This query groups the SafetyPapers table by the author column and calculates the count of rows in each group, effectively counting the number of papers published by each author." +}, { + "id": "5506", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get the number of AI safety incidents recorded for each region in the \u0027ai_safety\u0027 table.", + "sql_context": "CREATE TABLE ai_safety (region TEXT, incident_count INT);", + "sql": "SELECT region, COUNT(*) FROM ai_safety GROUP BY region;", + "sql_explanation": "The SQL query groups the \u0027ai_safety\u0027 table data by the \u0027region\u0027 column and then counts the number of rows in each group using the COUNT function." +}, { + "id": "1361", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which fund has invested the most in the education sector?", + "sql_context": "CREATE TABLE investments (id INT, fund_name VARCHAR(255), sector VARCHAR(255), investment_amount FLOAT);", + "sql": "SELECT fund_name, SUM(investment_amount) as total_investment FROM investments WHERE sector \u003d \u0027education\u0027 GROUP BY fund_name ORDER BY total_investment DESC LIMIT 1;", + "sql_explanation": "This query finds the fund that has invested the most in the education sector. It uses the GROUP BY clause to group rows by fund_name, the SUM() function to calculate the total investment amount for each fund, and the ORDER BY clause with DESC modifier to sort rows by total investment amount in descending order. The LIMIT clause restricts the number of rows returned to 1." +}, { + "id": "1471", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of investments in the education sector, broken down by year?", + "sql_context": "CREATE TABLE investments (investment_id INT, sector VARCHAR(50), investment_amount INT, investment_date DATE); INSERT INTO investments (investment_id, sector, investment_amount, investment_date) VALUES (1, \u0027Education\u0027, 500000, \u00272022-01-01\u0027), (2, \u0027Education\u0027, 600000, \u00272023-02-01\u0027), (3, \u0027Education\u0027, 400000, \u00272024-03-01\u0027), (4, \u0027Education\u0027, 300000, \u00272025-04-01\u0027), (5, \u0027Education\u0027, 700000, \u00272026-05-01\u0027);", + "sql": "SELECT EXTRACT(YEAR FROM investment_date) as year, COUNT(*) as total_investments FROM investments WHERE sector \u003d \u0027Education\u0027 GROUP BY year ORDER BY year ASC;", + "sql_explanation": "This SQL query calculates the total number of investments in the education sector, broken down by year, by counting the number of records in the investments table where the sector is \u0027Education\u0027 and grouping the results by year." +}, { + "id": "2210", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total investment made by each fund in the renewable energy sector?", + "sql_context": "CREATE TABLE investments (id INT, fund_name VARCHAR(255), sector VARCHAR(255), investment_amount FLOAT);", + "sql": "SELECT fund_name, SUM(investment_amount) as total_investment FROM investments WHERE sector \u003d \u0027renewable energy\u0027 GROUP BY fund_name;", + "sql_explanation": "This query calculates the total investment made by each fund in the renewable energy sector. It uses the GROUP BY clause to group rows by fund_name, and the SUM() function to calculate the total investment amount for each fund." +}, { + "id": "2400", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 3 countries with the highest number of impact investments?", + "sql_context": "CREATE TABLE impact_investments (id INT, investment_id INT, country TEXT); INSERT INTO impact_investments (id, investment_id, country) VALUES (1, 1001, \u0027United States\u0027), (2, 1002, \u0027Canada\u0027), (3, 1003, \u0027United States\u0027), (4, 1004, \u0027Brazil\u0027), (5, 1005, \u0027India\u0027);", + "sql": "SELECT country, COUNT(*) AS investment_count FROM impact_investments GROUP BY country ORDER BY investment_count DESC LIMIT 3;", + "sql_explanation": "Identify the top 3 countries with the highest number of impact investments by grouping by country and counting the investments, then ordering by the count in descending order and limiting the results to 3." +}, { + "id": "2880", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 3 investors by total investment amount?", + "sql_context": "CREATE TABLE Investors (InvestorID INT, Name VARCHAR(20), Gender VARCHAR(10), Amount INT); INSERT INTO Investors (InvestorID, Name, Gender, Amount) VALUES (1, \u0027John Doe\u0027, \u0027Male\u0027, 10000), (2, \u0027Jane Smith\u0027, \u0027Female\u0027, 12000), (3, \u0027Jim Brown\u0027, \u0027Male\u0027, 8000), (4, \u0027Joan Johnson\u0027, \u0027Female\u0027, 9000);", + "sql": "SELECT Name, SUM(Amount) as TotalInvestment FROM Investors GROUP BY Name ORDER BY TotalInvestment DESC LIMIT 3;", + "sql_explanation": "This SQL query identifies the top 3 investors by total investment amount. It uses the SUM() function to calculate the total investment for each investor, the GROUP BY clause to group the results by investor name, the ORDER BY clause to sort the results in descending order by total investment, and the LIMIT clause to limit the results to the top 3 investors." +}, { + "id": "2932", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total number of investments in each sector, ordered from highest to lowest.", + "sql_context": "CREATE TABLE investments(id INT, sector VARCHAR(20), esg_score INT); INSERT INTO investments VALUES(1, \u0027Tech\u0027, 85), (2, \u0027Healthcare\u0027, 75), (3, \u0027Tech\u0027, 82);", + "sql": "SELECT sector, COUNT(*) as total_investments FROM investments GROUP BY sector ORDER BY total_investments DESC;", + "sql_explanation": "Counts the number of investments in each sector, and orders the results from highest to lowest. Groups the investments table by sector, counts the number of records for each sector, and orders the results in descending order by the total number of investments." +}, { + "id": "2983", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the average ESG score for companies in the \u0027finance\u0027 and \u0027technology\u0027 sectors?", + "sql_context": "CREATE TABLE companies_esg_2 (id INT, sector VARCHAR(20), ESG_score FLOAT); INSERT INTO companies_esg_2 (id, sector, ESG_score) VALUES (1, \u0027technology\u0027, 72.5), (2, \u0027finance\u0027, 80.2), (3, \u0027technology\u0027, 76.1);", + "sql": "SELECT sector, AVG(ESG_score) FROM companies_esg_2 WHERE sector IN (\u0027finance\u0027, \u0027technology\u0027) GROUP BY sector;", + "sql_explanation": "Calculate the average ESG score for companies in the \u0027finance\u0027 and \u0027technology\u0027 sectors by using the AVG() function on the ESG_score column and the GROUP BY clause on the sector column, filtered by the sector column with \u0027finance\u0027 and \u0027technology\u0027 values." +}, { + "id": "3305", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many investments were made in total by a specific investor in Q3 2021?", + "sql_context": "CREATE TABLE investments (id INT, investor VARCHAR(20), date DATE); INSERT INTO investments (id, investor, date) VALUES (1, \u0027Investor X\u0027, \u00272021-07-15\u0027), (2, \u0027Investor Y\u0027, \u00272021-08-01\u0027), (3, \u0027Investor X\u0027, \u00272021-09-30\u0027);", + "sql": "SELECT COUNT(*) FROM investments WHERE date BETWEEN \u00272021-07-01\u0027 AND \u00272021-09-30\u0027 GROUP BY investor;", + "sql_explanation": "This query counts the total number of investments made by each investor in Q3 2021. It does this by using the COUNT function and the GROUP BY clause, which groups the rows by the investor column and returns a separate count for each group. The WHERE clause filters the rows to only those with a date value within the range of Q3 2021." +}, { + "id": "3406", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the sum of investments in healthcare for each region?", + "sql_context": "CREATE TABLE investments (id INT, sector VARCHAR(20), amount DECIMAL(10,2), region VARCHAR(30)); INSERT INTO investments (id, sector, amount, region) VALUES (1, \u0027Education\u0027, 5000.00, \u0027North America\u0027), (2, \u0027Healthcare\u0027, 7000.00, \u0027Europe\u0027), (3, \u0027Education\u0027, 6000.00, \u0027Asia\u0027), (4, \u0027Healthcare\u0027, 8000.00, \u0027North America\u0027), (5, \u0027Healthcare\u0027, 9000.00, \u0027Europe\u0027);", + "sql": "SELECT i.region, SUM(i.amount) FROM investments i WHERE i.sector \u003d \u0027Healthcare\u0027 GROUP BY i.region;", + "sql_explanation": "This query calculates the sum of investments in the healthcare sector for each region by filtering the investments table for the healthcare sector and grouping the results by region." +}, { + "id": "1021", + "domain": "startups venture capital", + "domain_description": "Company founding data, investment rounds, exit strategies, and diversity metrics in the startup ecosystem.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many startups were founded by people from underrepresented racial or ethnic backgrounds each year since 2010?", + "sql_context": "CREATE TABLE company (id INT, name TEXT, founder_race TEXT, founding_date DATE); INSERT INTO company (id, name, founder_race, founding_date) VALUES (1, \u0027Acme Inc\u0027, \u0027Hispanic\u0027, \u00272016-01-01\u0027); INSERT INTO company (id, name, founder_race, founding_date) VALUES (2, \u0027Beta Inc\u0027, \u0027Black\u0027, \u00272017-01-01\u0027);", + "sql": "SELECT EXTRACT(YEAR FROM founding_date) AS year, COUNT(*) AS startups FROM company WHERE founder_race IN (\u0027Hispanic\u0027, \u0027Black\u0027, \u0027Native American\u0027, \u0027Pacific Islander\u0027) GROUP BY year;", + "sql_explanation": "This query calculates the number of startups founded by people from underrepresented racial or ethnic backgrounds each year since 2010. It does this by extracting the year from the founding_date field, filtering for companies with founders from underrepresented racial or ethnic backgrounds (founder_race IN (\u0027Hispanic\u0027, \u0027Black\u0027, \u0027Native American\u0027, \u0027Pacific Islander\u0027)), and then grouping the results by year and counting the number of startups in each group." +}, { + "id": "2885", + "domain": "startups venture capital", + "domain_description": "Company founding data, investment rounds, exit strategies, and diversity metrics in the startup ecosystem.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many investment rounds have startups in the health sector participated in?", + "sql_context": "CREATE TABLE investment (id INT, company_id INT, round_number INT, round_date DATE, funding_amount INT); INSERT INTO investment (id, company_id, round_number, round_date, funding_amount) VALUES (1, 1, 1, \u00272018-01-01\u0027, 500000); CREATE TABLE company (id INT, name TEXT, industry TEXT); INSERT INTO company (id, name, industry) VALUES (1, \u0027HealthX\u0027, \u0027Health\u0027);", + "sql": "SELECT company_id, COUNT(DISTINCT round_number) FROM investment GROUP BY company_id HAVING industry \u003d \u0027Health\u0027;", + "sql_explanation": "The SQL query calculates the number of distinct investment rounds for each startup in the health sector by using the GROUP BY clause to group investment records by company_id and COUNT(DISTINCT round_number) to count unique investment rounds." +}, { + "id": "3721", + "domain": "startups venture capital", + "domain_description": "Company founding data, investment rounds, exit strategies, and diversity metrics in the startup ecosystem.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many startups have had an exit strategy of IPO in each country?", + "sql_context": "CREATE TABLE startup (id INT, name TEXT, country TEXT, exit_strategy TEXT); INSERT INTO startup (id, name, country, exit_strategy) VALUES (1, \u0027Omicron Enterprises\u0027, \u0027USA\u0027, \u0027IPO\u0027); INSERT INTO startup (id, name, country, exit_strategy) VALUES (2, \u0027Pi Inc\u0027, \u0027Canada\u0027, \u0027Acquisition\u0027); INSERT INTO startup (id, name, country, exit_strategy) VALUES (3, \u0027Rho Ltd\u0027, \u0027Mexico\u0027, \u0027IPO\u0027);", + "sql": "SELECT s.country, COUNT(*) FROM startup s WHERE s.exit_strategy \u003d \u0027IPO\u0027 GROUP BY s.country;", + "sql_explanation": "We group the rows in the startup table by the country column and filter for rows where the exit_strategy column is \u0027IPO\u0027. We then calculate the count of rows for each group in the result." +}, { + "id": "3966", + "domain": "startups venture capital", + "domain_description": "Company founding data, investment rounds, exit strategies, and diversity metrics in the startup ecosystem.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum funding amount for companies founded by women, in each industry category?", + "sql_context": "CREATE TABLE company (id INT, name TEXT, founder TEXT, industry TEXT, funding FLOAT); INSERT INTO company (id, name, founder, industry, funding) VALUES (1, \u0027Acme Inc\u0027, \u0027Female\u0027, \u0027Tech\u0027, 2000000);", + "sql": "SELECT industry, MAX(funding) FROM company WHERE founder \u003d \u0027Female\u0027 GROUP BY industry;", + "sql_explanation": "The SQL query calculates the maximum funding amount for companies founded by women, in each industry category. It first selects the industry and maximum funding columns from the company table, filters the rows where the founder is female, and then groups the results by industry." +}, { + "id": "125", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total energy savings (in kWh) for each technology type in the \u0027energy_savings\u0027 table, grouped by year", + "sql_context": "CREATE TABLE energy_savings (id INT, building_id INT, technology VARCHAR(50), energy_savings_kwh FLOAT, year INT);", + "sql": "SELECT technology, EXTRACT(YEAR FROM saving_date) as year, SUM(energy_savings_kwh) FROM energy_savings, generate_series(date_trunc(\u0027year\u0027, saving_date), date_trunc(\u0027year\u0027, saving_date + interval \u00271 year\u0027 - interval \u00271 day\u0027), interval \u00271 year\u0027) as series(saving_date) GROUP BY technology, year;", + "sql_explanation": "This query calculates the total energy savings (in kWh) for each technology type in the \u0027energy_savings\u0027 table, grouped by year. It does this by using the EXTRACT() function to extract the year from the saving_date column, and the generate_series() function to generate a series of dates for each year. The SUM() function is then used to calculate the total energy savings for each technology type, grouped by year." +}, { + "id": "1037", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 3 technology types with the highest average energy savings (in kWh) in the \u0027energy_savings\u0027 table, grouped by year", + "sql_context": "CREATE TABLE energy_savings (id INT, building_id INT, technology VARCHAR(50), energy_savings_kwh FLOAT, year INT);", + "sql": "SELECT technology, EXTRACT(YEAR FROM saving_date) as year, AVG(energy_savings_kwh) as avg_savings FROM energy_savings GROUP BY technology, year ORDER BY avg_savings DESC LIMIT 3;", + "sql_explanation": "This query lists the top 3 technology types with the highest average energy savings (in kWh) in the \u0027energy_savings\u0027 table, grouped by year. It does this by using the AVG() function to calculate the average energy savings for each technology type, grouped by year, and the ORDER BY and LIMIT clauses to order the results by average energy savings and limit the results to the top 3." +}, { + "id": "1946", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the total investment (in USD) in renewable energy initiatives by each investor, in descending order", + "sql_context": "CREATE TABLE renewable_energy_investments (id INT, investor VARCHAR(100), initiative VARCHAR(100), investment_usd FLOAT);", + "sql": "SELECT investor, SUM(investment_usd) as total_investment FROM renewable_energy_investments GROUP BY investor ORDER BY total_investment DESC;", + "sql_explanation": "The SQL query groups the renewable_energy_investments table by investor, calculating the sum of investment_usd for each group, and orders the results in descending order, providing the total investment in renewable energy initiatives by each investor, in descending order." +}, { + "id": "2108", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 3 green building certifications by number of certified projects in the green_buildings table.", + "sql_context": "CREATE TABLE green_buildings (building_id INT, building_name VARCHAR(255), location VARCHAR(255), certification_id INT, certification_name VARCHAR(255));", + "sql": "SELECT certification_name, COUNT(*) AS num_projects FROM green_buildings GROUP BY certification_id ORDER BY num_projects DESC LIMIT 3;", + "sql_explanation": "This query groups the green_buildings table by certification_id and calculates the number of projects for each certification. It then orders the results in descending order by the number of projects and limits the output to the top 3 certifications." +}, { + "id": "2344", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average wind speed and solar radiation for each city by month?", + "sql_context": "CREATE TABLE sensors (id INT, city VARCHAR(255), type VARCHAR(255), value FLOAT, timestamp TIMESTAMP); INSERT INTO sensors (id, city, type, value, timestamp) VALUES (1, \u0027EcoCity\u0027, \u0027Wind Speed\u0027, 7.2, \u00272022-04-01 10:00:00\u0027), (2, \u0027EcoCity\u0027, \u0027Solar Radiation\u0027, 500, \u00272022-04-01 10:00:00\u0027);", + "sql": "SELECT city, type, AVG(value) as avg_value, DATE_FORMAT(timestamp, \u0027%%Y-%%m\u0027) as month FROM sensors GROUP BY city, type, month;", + "sql_explanation": "This query pivots the data based on the \u0027type\u0027 column, summarizing the average value per city and month." +}, { + "id": "2366", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many green buildings are present in each continent, excluding those with a construction date before 2010?", + "sql_context": "CREATE TABLE building_details (id INT, building_name VARCHAR(255), continent VARCHAR(255), construction_date DATE);", + "sql": "SELECT continent, COUNT(*) AS building_count FROM building_details WHERE construction_date \u003e\u003d \u00272010-01-01\u0027 GROUP BY continent;", + "sql_explanation": "The SQL query counts the number of green buildings in the building_details table, grouped by continent. It only includes rows where the construction date is on or after 2010-01-01." +}, { + "id": "2611", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many renewable energy projects have been completed in the United Kingdom for each technology category?", + "sql_context": "CREATE TABLE RenewableEnergyProjects (project_id INT, project_name VARCHAR(255), country VARCHAR(255), capacity FLOAT, technology VARCHAR(255));", + "sql": "SELECT technology, COUNT(project_id) FROM RenewableEnergyProjects WHERE country \u003d \u0027United Kingdom\u0027 GROUP BY technology;", + "sql_explanation": "This query counts the number of renewable energy projects in the United Kingdom for each technology category by grouping the RenewableEnergyProjects table by technology where the country is \u0027United Kingdom\u0027." +}, { + "id": "2617", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 5 cities with the highest number of green buildings in the \u0027GreenBuildings\u0027 table.", + "sql_context": "CREATE TABLE GreenBuildings (id INT, name VARCHAR(100), location VARCHAR(100), energy_consumption FLOAT);", + "sql": "SELECT location, COUNT(*) as building_count FROM GreenBuildings GROUP BY location ORDER BY building_count DESC LIMIT 5;", + "sql_explanation": "This SQL query lists the top 5 cities with the highest number of green buildings. It does this by using the COUNT function to count the number of buildings in each city, then grouping the results by city using the GROUP BY clause. Finally, it orders the results in descending order based on the number of buildings and limits the results to the top 5 using the LIMIT clause." +}, { + "id": "2690", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total investment in renewable energy projects in \u0027Country E\u0027 for each project type?", + "sql_context": "CREATE TABLE RenewableEnergyInvestments (ProjectID INT, ProjectType VARCHAR(255), Country VARCHAR(255), Investment FLOAT); INSERT INTO RenewableEnergyInvestments (ProjectID, ProjectType, Country, Investment) VALUES (1, \u0027Solar\u0027, \u0027Country E\u0027, 1000000.0);", + "sql": "SELECT ProjectType, SUM(Investment) FROM RenewableEnergyInvestments WHERE Country \u003d \u0027Country E\u0027 GROUP BY ProjectType;", + "sql_explanation": "This query calculates the total investment in renewable energy projects in \u0027Country E\u0027 for each project type by summing the Investment values in the RenewableEnergyInvestments table where the Country is \u0027Country E\u0027, and grouping the results by ProjectType." +}, { + "id": "2784", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum CO2 emission reduction of green building projects in each reduction type, for reduction types with at least two projects?", + "sql_context": "CREATE TABLE GreenBuildingProjects (id INT, reduction_type VARCHAR(50), co2_reduction FLOAT); INSERT INTO GreenBuildingProjects (id, reduction_type, co2_reduction) VALUES (1, \u0027Insulation\u0027, 50.0), (2, \u0027Lighting\u0027, 75.0), (3, \u0027Insulation\u0027, 60.0), (4, \u0027HVAC\u0027, 40.0), (5, \u0027HVAC\u0027, 35.0);", + "sql": "SELECT reduction_type, MIN(co2_reduction) FROM GreenBuildingProjects GROUP BY reduction_type HAVING COUNT(*) \u003e\u003d 2;", + "sql_explanation": "The SQL query calculates the minimum CO2 emission reduction for green building projects in each reduction type, but only for reduction types with at least two projects. It does this by grouping the \u0027GreenBuildingProjects\u0027 table by the \u0027reduction_type\u0027 column and then applying the MIN function to the \u0027co2_reduction\u0027 column. It then filters the resulting table by only including reduction types with at least two projects." +}, { + "id": "2812", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total installed capacity of wind energy projects in each country, sorted by the total capacity in descending order?", + "sql_context": "CREATE TABLE wind_projects (id INT, country VARCHAR(50), capacity FLOAT);", + "sql": "SELECT country, SUM(capacity) as total_capacity FROM wind_projects GROUP BY country ORDER BY total_capacity DESC;", + "sql_explanation": "This query calculates the total installed capacity of wind energy projects in each country and sorts the results by the total capacity in descending order. It sums the capacity for each country, groups the results by country, and orders the results by the total capacity in descending order." +}, { + "id": "2859", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many renewable energy projects were completed in each region in the past 3 years?", + "sql_context": "CREATE TABLE renewable_projects (id INT, region VARCHAR(50), completion_year INT);", + "sql": "SELECT region, COUNT(*) FROM renewable_projects WHERE completion_year \u003e\u003d YEAR(CURRENT_DATE) - 3 GROUP BY region;", + "sql_explanation": "This query calculates the number of renewable energy projects completed in each region in the past 3 years. It filters the renewable_projects table to only include records from the past 3 years, counts the number of projects for each region, and groups the results by region." +}, { + "id": "3051", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total installed capacity of renewable energy projects, broken down by country and project type?", + "sql_context": "CREATE TABLE renewable_energy (country VARCHAR(50), project_type VARCHAR(50), installed_capacity INT); INSERT INTO renewable_energy (country, project_type, installed_capacity) VALUES (\u0027USA\u0027, \u0027Wind\u0027, 3000), (\u0027USA\u0027, \u0027Solar\u0027, 5000), (\u0027Mexico\u0027, \u0027Wind\u0027, 2000), (\u0027Mexico\u0027, \u0027Solar\u0027, 4000);", + "sql": "SELECT country, project_type, SUM(installed_capacity) FROM renewable_energy GROUP BY country, project_type;", + "sql_explanation": "This query groups the data by country and project type and calculates the total installed capacity for each combination of country and project type." +}, { + "id": "3407", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of buildings in each city in the \u0027GreenBuildings\u0027 schema, with energy consumption lower than 1000.", + "sql_context": "CREATE TABLE GreenBuildings.Buildings (id INT, city VARCHAR(50), energy_consumption FLOAT); INSERT INTO GreenBuildings.Buildings (id, city, energy_consumption) VALUES (1, \u0027NYC\u0027, 900.5), (2, \u0027LA\u0027, 1100.3), (3, \u0027SF\u0027, 700.7), (4, \u0027NYC\u0027, 850.4), (5, \u0027LA\u0027, 900.0);", + "sql": "SELECT city, COUNT(*) FROM GreenBuildings.Buildings WHERE energy_consumption \u003c 1000 GROUP BY city;", + "sql_explanation": "The SQL query finds the number of buildings in each city in the \u0027GreenBuildings\u0027 schema with energy consumption lower than 1000 by using the COUNT function on all rows for each city where the \u0027energy_consumption\u0027 column is lower than 1000, grouping the results by the \u0027city\u0027 column." +}, { + "id": "3605", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of smart city initiatives in each location and their total carbon offsets?", + "sql_context": "CREATE TABLE smart_city_initiatives (initiative_id INT, initiative_name VARCHAR(50), location VARCHAR(50), carbon_offsets FLOAT); INSERT INTO smart_city_initiatives (initiative_id, initiative_name, location, carbon_offsets) VALUES (1, \u0027Smart Grid 1\u0027, \u0027CityC\u0027, 1000.0), (2, \u0027Smart Lighting 1\u0027, \u0027CityD\u0027, 500.0), (3, \u0027Smart Waste Management 1\u0027, \u0027CityC\u0027, 1500.0);", + "sql": "SELECT location, COUNT(*), SUM(carbon_offsets) FROM smart_city_initiatives GROUP BY location;", + "sql_explanation": "The SQL query calculates the number of smart city initiatives in each location and their total carbon offsets by grouping the initiatives based on the location and calculating the count and sum of carbon offsets." +}, { + "id": "3639", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total installed capacity (in kW) of renewable energy projects for each country?", + "sql_context": "CREATE TABLE renewable_projects (project_id INT, project_name VARCHAR(255), capacity_kW INT, country VARCHAR(255)); INSERT INTO renewable_projects (project_id, project_name, capacity_kW, country) VALUES (1, \u0027Solar Farm 1\u0027, 1000, \u0027USA\u0027); INSERT INTO renewable_projects (project_id, project_name, capacity_kW, country) VALUES (2, \u0027Wind Farm 1\u0027, 2000, \u0027Canada\u0027);", + "sql": "SELECT country, SUM(capacity_kW) as total_capacity FROM renewable_projects GROUP BY country;", + "sql_explanation": "This SQL query calculates the total installed capacity (in kW) of renewable energy projects for each country. It uses the SUM() function to add up the capacity_kW values for each country, and the GROUP BY clause to group the results by country." +}, { + "id": "3723", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the total installed capacity of renewable energy projects in the \u0027renewable\u0027 schema, for each project type?", + "sql_context": "CREATE TABLE renewable.projects (project_type VARCHAR(255), installed_capacity INT); INSERT INTO renewable.projects (project_type, installed_capacity) VALUES (\u0027Wind\u0027, 5000), (\u0027Solar\u0027, 7000), (\u0027Wind\u0027, 6000), (\u0027Hydro\u0027, 8000);", + "sql": "SELECT project_type, SUM(installed_capacity) FROM renewable.projects GROUP BY project_type;", + "sql_explanation": "The SQL query calculates the total installed capacity for each project type in the \u0027renewable\u0027 schema\u0027s \u0027projects\u0027 table by using the SUM function and grouping by the \u0027project_type\u0027 column." +}, { + "id": "4264", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total installed capacity (in MW) for each technology type in the \u0027renewable_projects\u0027 table", + "sql_context": "CREATE TABLE renewable_projects (id INT, technology VARCHAR(50), location VARCHAR(50), capacity_mw FLOAT);", + "sql": "SELECT technology, SUM(capacity_mw) FROM renewable_projects GROUP BY technology;", + "sql_explanation": "This query calculates the total installed capacity for each technology type in the \u0027renewable_projects\u0027 table. It does this by using the SUM() function to add up the capacity_mw values for each technology, and the GROUP BY clause to group the results by technology." +}, { + "id": "4742", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average energy consumption of buildings in the \u0027urban\u0027 schema, grouped by city?", + "sql_context": "CREATE TABLE urban.buildings (city VARCHAR(255), energy_consumption INT); INSERT INTO urban.buildings (city, energy_consumption) VALUES (\u0027CityA\u0027, 1200), (\u0027CityA\u0027, 1500), (\u0027CityB\u0027, 1700), (\u0027CityB\u0027, 1300);", + "sql": "SELECT city, AVG(energy_consumption) FROM urban.buildings GROUP BY city;", + "sql_explanation": "The SQL query calculates the average energy consumption for each city in the \u0027urban\u0027 schema\u0027s \u0027buildings\u0027 table by using the AVG function and grouping by the \u0027city\u0027 column." +}, { + "id": "4777", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many buildings are there in each city and their total CO2 emissions?", + "sql_context": "CREATE TABLE buildings (id INT, name TEXT, city TEXT, co2_emissions FLOAT);", + "sql": "SELECT city, COUNT(*), SUM(co2_emissions) FROM buildings GROUP BY city;", + "sql_explanation": "This query counts the number of buildings and calculates their total CO2 emissions in each city. It does this by grouping the rows based on the city column and then counting the number of rows (using COUNT) and summing up the co2_emissions column (using SUM) for each group." +}, { + "id": "5466", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "aggregation", + "sql_complexity_description": "aggregation functions (COUNT, SUM, AVG, MIN, MAX, etc.), and HAVING clause", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of green buildings in each city.", + "sql_context": "CREATE TABLE green_buildings (building_id INT, city VARCHAR(50)); INSERT INTO green_buildings (building_id, city) VALUES (1, \u0027New York\u0027), (2, \u0027Toronto\u0027), (3, \u0027Mexico City\u0027), (4, \u0027New York\u0027), (5, \u0027Los Angeles\u0027);", + "sql": "SELECT city, COUNT(*) FROM green_buildings GROUP BY city", + "sql_explanation": "Calculates the number of green buildings in each city by grouping the records by the city column and then counting the number of records in each group using the COUNT() function." +}, { + "id": "824", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 3 authors with the most articles on disinformation detection?", + "sql_context": "CREATE TABLE articles (id INT, author_id INT, title TEXT, topic TEXT); INSERT INTO articles VALUES (1, 1, \u0027Article 1\u0027, \u0027Media Literacy\u0027), (2, 1, \u0027Article 2\u0027, \u0027Content Diversity\u0027), (3, 2, \u0027Article 3\u0027, \u0027Media Representation\u0027), (4, 2, \u0027Article 4\u0027, \u0027Disinformation Detection\u0027), (5, 3, \u0027Article 5\u0027, \u0027Disinformation Detection\u0027), (6, 3, \u0027Article 6\u0027, \u0027Content Diversity\u0027), (7, 4, \u0027Article 7\u0027, \u0027Media Literacy\u0027), (8, 5, \u0027Article 8\u0027, \u0027Disinformation Detection\u0027), (9, 5, \u0027Article 9\u0027, \u0027Media Representation\u0027); CREATE TABLE authors (id INT, name TEXT); INSERT INTO authors VALUES (1, \u0027John Doe\u0027), (2, \u0027Jane Smith\u0027), (3, \u0027Alice Johnson\u0027), (4, \u0027Bob Williams\u0027), (5, \u0027Charlie Brown\u0027);", + "sql": "SELECT a.name, COUNT(*) as article_count FROM authors a JOIN articles ar ON a.id \u003d ar.author_id WHERE ar.topic \u003d \u0027Disinformation Detection\u0027 GROUP BY a.name ORDER BY article_count DESC LIMIT 3;", + "sql_explanation": "This SQL query joins the authors and articles tables on the author_id column, then filters the results to only include rows where the topic column is \u0027Disinformation Detection\u0027. It groups the results by the name column and orders them in descending order by the article_count column, which is the result of the COUNT function. The LIMIT 3 clause is used to limit the results to the top 3 authors." +}, { + "id": "914", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Count the number of unique viewers who watched a series on Netflix by country", + "sql_context": "CREATE TABLE viewership (id INT, viewer_id INT, series_title VARCHAR(100), platform VARCHAR(50), watch_date DATE); INSERT INTO viewership (id, viewer_id, series_title, platform, watch_date) VALUES (1, 123, \u0027Series1\u0027, \u0027Netflix\u0027, \u00272022-01-01\u0027), (2, 456, \u0027Series2\u0027, \u0027Netflix\u0027, \u00272022-02-01\u0027), (3, 789, \u0027Series1\u0027, \u0027Netflix\u0027, \u00272022-03-01\u0027);", + "sql": "SELECT production_country, COUNT(DISTINCT viewer_id) as unique_viewers FROM viewership v JOIN movies m ON v.series_title \u003d m.title WHERE platform \u003d \u0027Netflix\u0027 GROUP BY production_country;", + "sql_explanation": "We filter viewership data by platform \u0027Netflix\u0027 and join it with the movies table using series_title and title columns. Then, we group viewership data by production country and count the number of unique viewers for each country." +}, { + "id": "1147", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average movie duration for French films directed by women?", + "sql_context": "CREATE TABLE movies (id INT, title VARCHAR(255), duration INT, production_country VARCHAR(64), director_id INT, PRIMARY KEY (id), FOREIGN KEY (director_id) REFERENCES directors(id)); CREATE TABLE directors (id INT, name VARCHAR(255), gender VARCHAR(8), PRIMARY KEY (id)); INSERT INTO directors (id, name, gender) VALUES (1, \u0027Director1\u0027, \u0027Female\u0027), (2, \u0027Director2\u0027, \u0027Male\u0027); INSERT INTO movies (id, title, duration, production_country, director_id) VALUES (1, \u0027Movie1\u0027, 120, \u0027France\u0027, 1), (2, \u0027Movie2\u0027, 90, \u0027USA\u0027, 2), (3, \u0027Movie3\u0027, 105, \u0027France\u0027, 1);", + "sql": "SELECT AVG(movies.duration) FROM movies INNER JOIN directors ON movies.director_id \u003d directors.id WHERE directors.gender \u003d \u0027Female\u0027 AND movies.production_country \u003d \u0027France\u0027;", + "sql_explanation": "This query calculates the average movie duration for French films directed by women. It joins the movies and directors tables on the director_id column. Then, it filters the rows to only include movies that were directed by women and produced in France. Finally, it uses the AVG function to calculate the average duration of the selected movies." +}, { + "id": "2185", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of words written by freelance authors in the \u0027Tech\u0027 section?", + "sql_context": "CREATE TABLE authors (id INT, name VARCHAR(50), is_freelance BOOLEAN); INSERT INTO authors (id, name, is_freelance) VALUES (1, \u0027Bob\u0027, TRUE); INSERT INTO authors (id, name, is_freelance) VALUES (2, \u0027Charlie\u0027, FALSE); INSERT INTO authors (id, name, is_freelance) VALUES (3, \u0027David\u0027, TRUE); CREATE TABLE articles (id INT, title VARCHAR(100), author_id INT, section VARCHAR(50), publish_date DATE, word_count INT); INSERT INTO articles (id, title, author_id, section, publish_date, word_count) VALUES (1, \u0027Article1\u0027, 1, \u0027Tech\u0027, \u00272022-03-01\u0027, 1200); INSERT INTO articles (id, title, author_id, section, publish_date, word_count) VALUES (2, \u0027Article2\u0027, 2, \u0027Politics\u0027, \u00272022-03-02\u0027, 1500); INSERT INTO articles (id, title, author_id, section, publish_date, word_count) VALUES (3, \u0027Article3\u0027, 1, \u0027Tech\u0027, \u00272022-03-03\u0027, 1300);", + "sql": "SELECT SUM(word_count) FROM articles JOIN authors ON articles.author_id \u003d authors.id WHERE is_freelance \u003d TRUE AND section \u003d \u0027Tech\u0027;", + "sql_explanation": "This query calculates the total number of words written by freelance authors in the \u0027Tech\u0027 section by joining the articles table with the authors table on the author_id column, filtering for records where is_freelance is TRUE and section is \u0027Tech\u0027, and summing up the word_count column values." +}, { + "id": "207", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 3 artists by total revenue from streaming platforms in 2020?", + "sql_context": "CREATE TABLE artists (artist_id INT, artist_name VARCHAR(30)); INSERT INTO artists (artist_id, artist_name) VALUES (1, \u0027Ariana Grande\u0027), (2, \u0027BTS\u0027), (3, \u0027Drake\u0027), (4, \u0027Ed Sheeran\u0027), (5, \u0027Taylor Swift\u0027); CREATE TABLE streams (stream_id INT, artist_id INT, revenue DECIMAL(10,2), stream_date DATE); INSERT INTO streams (stream_id, artist_id, revenue, stream_date) VALUES (1, 1, 10.50, \u00272020-03-15\u0027), (2, 1, 12.25, \u00272020-07-27\u0027), (3, 2, 9.99, \u00272020-09-01\u0027), (4, 3, 15.00, \u00272020-11-29\u0027), (5, 1, 8.75, \u00272020-12-31\u0027), (6, 2, 11.25, \u00272020-05-14\u0027);", + "sql": "SELECT artists.artist_name, SUM(streams.revenue) AS total_revenue FROM artists INNER JOIN streams ON artists.artist_id \u003d streams.artist_id WHERE streams.stream_date BETWEEN \u00272020-01-01\u0027 AND \u00272020-12-31\u0027 GROUP BY artists.artist_name ORDER BY total_revenue DESC LIMIT 3;", + "sql_explanation": "This query finds the top 3 artists by total revenue from streaming platforms in 2020 by joining the artists and streams tables, filtering for streams in 2020, grouping by artist_name, and ordering by the total revenue in descending order." +}, { + "id": "814", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue by genre for the first quarter of the year?", + "sql_context": "CREATE TABLE genres (genre_id INT, genre VARCHAR(50)); CREATE TABLE sales (sale_id INT, genre_id INT, sale_date DATE, revenue DECIMAL(5,2));", + "sql": "SELECT g.genre, SUM(s.revenue) AS total_revenue FROM genres g JOIN sales s ON g.genre_id \u003d s.genre_id WHERE s.sale_date \u003e\u003d DATEADD(quarter, DATEDIFF(quarter, 0, GETDATE()), 0) GROUP BY g.genre;", + "sql_explanation": "The SQL query joins the \u0027genres\u0027 and \u0027sales\u0027 tables on the \u0027genre_id\u0027 column. It then filters the data to only include sales from the first quarter of the year and groups the data by the \u0027genre\u0027 column, calculating the total revenue for each genre." +}, { + "id": "838", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many albums were released in total by Pop artists in 2021?", + "sql_context": "CREATE TABLE artists (artist_id INT, genre VARCHAR(20)); INSERT INTO artists (artist_id, genre) VALUES (1, \u0027Latin\u0027), (2, \u0027Pop\u0027), (3, \u0027Rock\u0027), (4, \u0027Jazz\u0027), (5, \u0027Folk\u0027); CREATE TABLE albums (album_id INT, artist_id INT, release_date DATE); INSERT INTO albums (album_id, artist_id, release_date) VALUES (1, 2, \u00272021-04-12\u0027), (2, 3, \u00272020-08-21\u0027), (3, 4, \u00272019-11-01\u0027), (4, 5, \u00272018-02-23\u0027), (5, 2, \u00272021-07-15\u0027);", + "sql": "SELECT COUNT(albums.album_id) FROM albums INNER JOIN artists ON albums.artist_id \u003d artists.artist_id WHERE artists.genre \u003d \u0027Pop\u0027 AND albums.release_date BETWEEN \u00272021-01-01\u0027 AND \u00272021-12-31\u0027;", + "sql_explanation": "This query counts the number of albums released in total by Pop artists in 2021 by joining the albums and artists tables and filtering for Pop artists and albums released in 2021." +}, { + "id": "980", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many songs have been released by non-binary artists on the \u0027Indie Pop\u0027 platform since its inception?", + "sql_context": "CREATE TABLE Artists (ArtistID int, ArtistName varchar(100), Gender varchar(50), Platform varchar(50)); CREATE TABLE Songs (SongID int, SongName varchar(100), ReleaseDate date, ArtistID int); INSERT INTO Artists VALUES (1, \u0027Artist L\u0027, \u0027Non-binary\u0027, \u0027Indie Pop\u0027); INSERT INTO Artists VALUES (2, \u0027Artist M\u0027, \u0027Female\u0027, \u0027Indie Pop\u0027); INSERT INTO Songs VALUES (1, \u0027Song 5\u0027, \u00272010-01-01\u0027, 1); INSERT INTO Songs VALUES (2, \u0027Song 6\u0027, \u00272012-01-01\u0027, 1);", + "sql": "SELECT COUNT(*) as NumberOfSongs FROM Artists JOIN Songs ON Artists.ArtistID \u003d Songs.ArtistID WHERE Gender \u003d \u0027Non-binary\u0027 AND Platform \u003d \u0027Indie Pop\u0027 AND ReleaseDate \u003e\u003d \u00272010-01-01\u0027;", + "sql_explanation": "The query joins the \u0027Artists\u0027 table and the \u0027Songs\u0027 table on the ArtistID. It then filters for songs released by non-binary artists on the \u0027Indie Pop\u0027 platform since its inception and returns the number of songs." +}, { + "id": "1488", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue for the top 3 streaming platforms?", + "sql_context": "CREATE TABLE platforms (id INT, platform VARCHAR(255)); CREATE TABLE sales (id INT, platform_id INT, revenue DECIMAL(10,2));", + "sql": "SELECT SUM(sales.revenue) FROM sales JOIN platforms ON sales.platform_id \u003d platforms.id GROUP BY platforms.platform ORDER BY SUM(sales.revenue) DESC LIMIT 3;", + "sql_explanation": "Join the sales and platforms tables using the platform_id column, group by platform, and calculate the total revenue. Limit to top 3 platforms." +}, { + "id": "1664", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average album sales per platform?", + "sql_context": "CREATE TABLE Platforms (platform_id INT, platform_name TEXT); CREATE TABLE AlbumSales (sale_id INT, platform_id INT, sales INT);", + "sql": "SELECT platform_name, AVG(sales) as avg_sales FROM Platforms JOIN AlbumSales ON Platforms.platform_id \u003d AlbumSales.platform_id GROUP BY platform_name;", + "sql_explanation": "This query joins the Platforms and AlbumSales tables on the platform_id column, then groups the results by platform_name, calculating the average of the sales for each group." +}, { + "id": "1961", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average playtime of all songs in the Pop genre?", + "sql_context": "CREATE TABLE Genre (id INT, genre VARCHAR(255)); CREATE TABLE Song (id INT, genre_id INT, title VARCHAR(255), playtime INT);", + "sql": "SELECT G.genre, AVG(S.playtime) as avg_playtime FROM Genre G INNER JOIN Song S ON G.id \u003d S.genre_id WHERE G.genre \u003d \u0027Pop\u0027 GROUP BY G.genre;", + "sql_explanation": "This query joins the \u0027Genre\u0027 and \u0027Song\u0027 tables on the \u0027genre_id\u0027 column. It filters for songs in the Pop genre and calculates the average playtime using the AVG function." +}, { + "id": "2102", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of unique artists who released songs in the \u0027Pop\u0027 genre in 2015?", + "sql_context": "CREATE TABLE Artists (id INT, name VARCHAR(100)); CREATE TABLE Songs (id INT, artist_id INT, title VARCHAR(100), release_year INT, genre VARCHAR(50), streams INT);", + "sql": "SELECT COUNT(DISTINCT Artists.id) FROM Artists JOIN Songs ON Artists.id \u003d Songs.artist_id WHERE genre \u003d \u0027Pop\u0027 AND release_year \u003d 2015;", + "sql_explanation": "This query calculates the total number of unique artists who released songs in the \u0027Pop\u0027 genre in 2015 by joining the \u0027Artists\u0027 and \u0027Songs\u0027 tables based on the \u0027artist_id\u0027 column and counting distinct \u0027id\u0027 values from the \u0027Artists\u0027 table where the \u0027genre\u0027 is \u0027Pop\u0027 and the \u0027release_year\u0027 is 2015." +}, { + "id": "2501", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which genre has the most songs in the music streaming platform?", + "sql_context": "CREATE TABLE genres (id INT, genre TEXT); CREATE TABLE songs (id INT, title TEXT, genre_id INT); INSERT INTO genres (id, genre) VALUES (1, \u0027Pop\u0027), (2, \u0027Rock\u0027), (3, \u0027Hip Hop\u0027); INSERT INTO songs (id, title, genre_id) VALUES (1, \u0027Shake it Off\u0027, 1), (2, \u0027Lose Yourself\u0027, 3), (3, \u0027Bohemian Rhapsody\u0027, 2);", + "sql": "SELECT COUNT(*) FROM songs JOIN genres ON songs.genre_id \u003d genres.id GROUP BY genres.genre ORDER BY COUNT(*) DESC LIMIT 1;", + "sql_explanation": "This query joins the songs table with the genres table based on the genre_id column. It then groups the records by the genre column, calculates the count for each group, sorts the results in descending order and finally returns the genre with the highest number of songs by selecting the first record." +}, { + "id": "2503", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names of all playlists created by users from the United States?", + "sql_context": "CREATE TABLE users (user_id INT, username VARCHAR(50), country VARCHAR(50)); INSERT INTO users (user_id, username, country) VALUES (1, \u0027jane123\u0027, \u0027United States\u0027), (2, \u0027musicfan01\u0027, \u0027Canada\u0027); CREATE TABLE playlists (playlist_id INT, playlist_name VARCHAR(50), user_id INT); INSERT INTO playlists (playlist_id, playlist_name, user_id) VALUES (1, \u0027My Favorites\u0027, 1), (2, \u0027Chill Vibes\u0027, 1), (3, \u0027Rock Playlist\u0027, 2);", + "sql": "SELECT playlist_name FROM playlists JOIN users ON playlists.user_id \u003d users.user_id WHERE users.country \u003d \u0027United States\u0027;", + "sql_explanation": "First, join the playlists and users tables on the user_id field. Then, filter the results to only include playlists from users in the United States, and finally, return the playlist_name." +}, { + "id": "2056", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of satellites launched by a country in a year?", + "sql_context": " CREATE TABLE country_satellite_launches (country_id INT, launch_year INT, number_of_satellites INT); CREATE TABLE countries (id INT, name VARCHAR(50));", + "sql": "SELECT c.name, MAX(cs.number_of_satellites) FROM countries c JOIN country_satellite_launches cs ON c.id \u003d cs.country_id GROUP BY c.name;", + "sql_explanation": "This query lists the maximum number of satellites launched by a country in a year by joining the countries and country_satellite_launches tables on the country_id column. It then groups the results by country name and calculates the maximum number of satellites launched for each country." +}, { + "id": "2183", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the earliest launch date of a satellite in the SpaceX database?", + "sql_context": "CREATE TABLE spacex_missions (mission_id INT, launch_date DATE);CREATE TABLE satellites (satellite_id INT, mission_id INT, launch_date DATE);", + "sql": "SELECT MIN(satellites.launch_date) FROM satellites INNER JOIN spacex_missions ON satellites.mission_id \u003d spacex_missions.mission_id;", + "sql_explanation": "This query retrieves the earliest launch date of a satellite in the SpaceX database by joining the spacex_missions and satellites tables and selecting the minimum launch date." +}, { + "id": "394", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total energy consumption (in TWh) for residential buildings in the United States, categorized by energy source, for the year 2020?", + "sql_context": "CREATE TABLE residential_buildings (id INT, country VARCHAR(2), energy_consumption FLOAT); INSERT INTO residential_buildings (id, country, energy_consumption) VALUES (1, \u0027USA\u0027, 900000), (2, \u0027USA\u0027, 1100000), (3, \u0027USA\u0027, 700000), (4, \u0027USA\u0027, 1300000); CREATE TABLE energy_source (id INT, source VARCHAR(20), residential_buildings_id INT); INSERT INTO energy_source (id, source, residential_buildings_id) VALUES (1, \u0027Solar\u0027, 1), (2, \u0027Wind\u0027, 2), (3, \u0027Natural Gas\u0027, 3), (4, \u0027Coal\u0027, 4);", + "sql": "SELECT e.source, SUM(rb.energy_consumption) as total_energy_consumption FROM residential_buildings rb JOIN energy_source e ON rb.id \u003d e.residential_buildings_id WHERE rb.country \u003d \u0027USA\u0027 AND YEAR(rb.timestamp) \u003d 2020 GROUP BY e.source;", + "sql_explanation": "This query joins the \u0027residential_buildings\u0027 table and \u0027energy_source\u0027 table on their common column \u0027id\u0027. It then filters the results to only include rows where \u0027country\u0027 is \u0027USA\u0027 and the year of the timestamp is 2020. The query then groups the results by the \u0027source\u0027 column from the \u0027energy_source\u0027 table and calculates the sum of \u0027energy_consumption\u0027 for each group." +}, { + "id": "670", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average rating of appliances for each type and energy efficiency standard in the appliances and energy_efficiency_standards tables, where the year is between 2015 and 2020?", + "sql_context": "CREATE TABLE energy_efficiency_standards (id INT, standard VARCHAR(255), year INT, min_rating FLOAT); CREATE TABLE appliances (id INT, type VARCHAR(255), standard VARCHAR(255), rating FLOAT); INSERT INTO energy_efficiency_standards (id, standard, year, min_rating) VALUES (1, \u0027Energy Star\u0027, 2010, 0.5), (2, \u0027EU Energy Label\u0027, 2015, 0.3); INSERT INTO appliances (id, type, standard, rating) VALUES (1, \u0027Refrigerator\u0027, \u0027Energy Star\u0027, 0.6), (2, \u0027Washing Machine\u0027, \u0027EU Energy Label\u0027, 0.4), (3, \u0027Dishwasher\u0027, \u0027EU Energy Label\u0027, 0.5);", + "sql": "SELECT a.type, e.standard, AVG(a.rating) as avg_rating FROM appliances a INNER JOIN energy_efficiency_standards e ON a.standard \u003d e.standard WHERE e.year BETWEEN 2015 AND 2020 GROUP BY a.type, e.standard;", + "sql_explanation": "This query calculates the average rating of appliances for each type and energy efficiency standard in the \u0027appliances\u0027 and \u0027energy_efficiency_standards\u0027 tables, where the standard year is between 2015 and 2020. It groups the results by appliance type and energy efficiency standard." +}, { + "id": "818", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of solar installations in \u0027renewables\u0027 and \u0027energy_storage\u0027 schemas?", + "sql_context": "CREATE TABLE renewables.solar_installations (installation_id int, name varchar(50), capacity int); INSERT INTO renewables.solar_installations (installation_id, name, capacity) VALUES (1, \u0027Installation X\u0027, 250), (2, \u0027Installation Y\u0027, 300); CREATE TABLE energy_storage.solar_storage (storage_id int, installation_id int, capacity int); INSERT INTO energy_storage.solar_storage (storage_id, installation_id, capacity) VALUES (1, 1, 150), (2, 2, 180);", + "sql": "SELECT COUNT(DISTINCT r.installation_id) + COUNT(DISTINCT e.installation_id) FROM renewables.solar_installations r JOIN energy_storage.solar_storage e ON r.installation_id \u003d e.installation_id;", + "sql_explanation": "This query performs a full outer join between \u0027solar_installations\u0027 from the \u0027renewables\u0027 schema and \u0027solar_storage\u0027 from the \u0027energy_storage\u0027 schema, then counts the number of distinct installation IDs from both tables to provide the total number of solar installations in both schemas." +}, { + "id": "886", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total installed capacity of wind and solar power plants in each country in the world, by year?", + "sql_context": "CREATE TABLE wind_power (country text, year integer, capacity integer);CREATE TABLE solar_power (country text, year integer, capacity integer);", + "sql": "SELECT w.country, w.year, SUM(w.capacity + s.capacity) as total_capacity FROM wind_power w INNER JOIN solar_power s ON w.country \u003d s.country AND w.year \u003d s.year GROUP BY w.country, w.year;", + "sql_explanation": "This SQL query performs an inner join on the wind_power and solar_power tables, then groups by country and year and sums the total capacity for wind and solar power plants." +}, { + "id": "668", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many players have reached \u0027Expert\u0027 status in \u0027Galactic Crusaders\u0027 per continent?", + "sql_context": "CREATE TABLE Continents (ContinentID INT, Continent VARCHAR(255)); INSERT INTO Continents (ContinentID, Continent) VALUES (1, \u0027Africa\u0027); INSERT INTO Continents (ContinentID, Continent) VALUES (2, \u0027Asia\u0027); INSERT INTO Players (PlayerID, PlayerStatus, GameName, ContinentID) VALUES (1, \u0027Expert\u0027, \u0027Galactic Crusaders\u0027, 2); INSERT INTO Players (PlayerID, PlayerStatus, GameName, ContinentID) VALUES (2, \u0027Beginner\u0027, \u0027Galactic Crusaders\u0027, 1);", + "sql": "SELECT Continent, COUNT(*) as PlayerCount FROM Players JOIN Continents ON Players.ContinentID \u003d Continents.ContinentID WHERE PlayerStatus \u003d \u0027Expert\u0027 AND GameName \u003d \u0027Galactic Crusaders\u0027 GROUP BY Continent;", + "sql_explanation": "The data is partitioned by continent and the number of players with \u0027Expert\u0027 status in \u0027Galactic Crusaders\u0027 per continent is calculated using the COUNT function and the GROUP BY clause." +}, { + "id": "857", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average playtime of adventure games that have cross-platform support, grouped by player\u0027s country of residence.", + "sql_context": "CREATE TABLE Players (PlayerID INT, Name VARCHAR(100), Country VARCHAR(50), TotalHoursPlayed INT); INSERT INTO Players VALUES (1, \u0027Alex Garcia\u0027, \u0027Mexico\u0027, 60); INSERT INTO Players VALUES (2, \u0027Sophia Lee\u0027, \u0027South Korea\u0027, 80); CREATE TABLE GameDesign (GameID INT, GameName VARCHAR(100), Genre VARCHAR(50), CrossPlatform BIT); INSERT INTO GameDesign VALUES (1, \u0027GameX\u0027, \u0027Adventure\u0027, 1); INSERT INTO GameDesign VALUES (2, \u0027GameY\u0027, \u0027Puzzle\u0027, 0);", + "sql": "SELECT P.Country, AVG(P.TotalHoursPlayed) as AvgPlaytime FROM Players P JOIN GameDesign GD ON P.PlayerID \u003d GD.GameID WHERE GD.Genre \u003d \u0027Adventure\u0027 AND GD.CrossPlatform \u003d 1 GROUP BY P.Country;", + "sql_explanation": "1. Joins Players and GameDesign tables on PlayerID. 2. Filters records where Genre is \u0027Adventure\u0027 and CrossPlatform \u003d 1. 3. Groups results by Country. 4. Calculates average playtime for each Country." +}, { + "id": "1265", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average playtime of \u0027Cosmic Racers\u0027 for players aged 25 or older?", + "sql_context": "CREATE TABLE GamePlay (PlayerID INT, GameName VARCHAR(255), Playtime INT); INSERT INTO GamePlay (PlayerID, GameName, Playtime) VALUES (1, \u0027Cosmic Racers\u0027, 120); INSERT INTO GamePlay (PlayerID, GameName, Playtime) VALUES (2, \u0027Cosmic Racers\u0027, 180); CREATE TABLE Players (PlayerID INT, PlayerAge INT, GameName VARCHAR(255)); INSERT INTO Players (PlayerID, PlayerAge, GameName) VALUES (1, 27, \u0027Cosmic Racers\u0027); INSERT INTO Players (PlayerID, PlayerAge, GameName) VALUES (2, 30, \u0027Cosmic Racers\u0027);", + "sql": "SELECT AVG(GamePlay.Playtime) FROM GamePlay JOIN Players ON GamePlay.PlayerID \u003d Players.PlayerID WHERE Players.PlayerAge \u003e\u003d 25 AND GamePlay.GameName \u003d \u0027Cosmic Racers\u0027;", + "sql_explanation": "The data is joined based on the PlayerID, and the average playtime is calculated for players aged 25 or older in \u0027Cosmic Racers\u0027 using the AVG function." +}, { + "id": "1451", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the average age of players who play RPG games in North America?", + "sql_context": "CREATE TABLE Players (PlayerID INT, Age INT, Gender VARCHAR(10), Location VARCHAR(20)); INSERT INTO Players (PlayerID, Age, Gender, Location) VALUES (1, 30, \u0027Non-binary\u0027, \u0027USA\u0027); INSERT INTO Players (PlayerID, Age, Gender, Location) VALUES (2, 25, \u0027Male\u0027, \u0027Canada\u0027); CREATE TABLE Games (GameID INT, GameName VARCHAR(20), Genre VARCHAR(20)); INSERT INTO Games (GameID, GameName, Genre) VALUES (1, \u0027Epic Saga\u0027, \u0027RPG\u0027);", + "sql": "SELECT AVG(Players.Age) FROM Players INNER JOIN Games ON Players.Location \u003d Games.GameName WHERE Games.Genre \u003d \u0027RPG\u0027 AND Players.Location IN (\u0027USA\u0027, \u0027Canada\u0027);", + "sql_explanation": "Join the Players and Games tables, filter rows where the genre is RPG and the location is North America, then calculate the average age." +}, { + "id": "2039", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum age of players who have played multiplayer games, categorized by the genre of the game?", + "sql_context": "CREATE TABLE Players (PlayerID INT, Age INT, GameGenre VARCHAR(10));CREATE TABLE MultiplayerGames (GameID INT, PlayerID INT);", + "sql": "SELECT g.GameGenre, MAX(p.Age) as MaxAge FROM Players p INNER JOIN MultiplayerGames mg ON p.PlayerID \u003d mg.PlayerID GROUP BY g.GameGenre;", + "sql_explanation": "This query first joins the Players table and MultiplayerGames table on PlayerID, then groups the results by GameGenre and calculates the maximum age of players for each game genre." +}, { + "id": "2103", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the PlayerName and VRAdopted date for players who adopted VR technology in 2021", + "sql_context": "CREATE TABLE Players (PlayerID INT, PlayerName VARCHAR(50), Country VARCHAR(50)); INSERT INTO Players (PlayerID, PlayerName, Country) VALUES (1, \u0027John Smith\u0027, \u0027Canada\u0027); INSERT INTO Players (PlayerID, PlayerName, Country) VALUES (2, \u0027Jane Doe\u0027, \u0027USA\u0027); CREATE TABLE VRAdoption (PlayerID INT, VRAdopted DATE); INSERT INTO VRAdoption (PlayerID, VRAdopted) VALUES (1, \u00272021-08-01\u0027); INSERT INTO VRAdoption (PlayerID, VRAdopted) VALUES (2, \u00272020-08-01\u0027);", + "sql": "SELECT p.PlayerName, va.VRAdopted FROM Players p INNER JOIN VRAdoption va ON p.PlayerID \u003d va.PlayerID WHERE YEAR(va.VRAdopted) \u003d 2021;", + "sql_explanation": "This query joins the Players table and VRAdoption table on PlayerID, filters for records where the VRAdopted date is in 2021, and then selects the PlayerName and VRAdopted columns." +}, { + "id": "2331", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all virtual reality (VR) games and their designers.", + "sql_context": "CREATE TABLE Games (GameID INT, Title VARCHAR(50), Genre VARCHAR(20), Platform VARCHAR(10)); CREATE TABLE VRGames (GameID INT, Designer VARCHAR(50)); INSERT INTO Games (GameID, Title, Genre, Platform) VALUES (1, \u0027CyberSphere\u0027, \u0027Action\u0027, \u0027PC\u0027); INSERT INTO VRGames (GameID, Designer) VALUES (1, \u0027John Doe\u0027);", + "sql": "SELECT Games.Title, VRGames.Designer FROM Games INNER JOIN VRGames ON Games.GameID \u003d VRGames.GameID WHERE Games.Platform \u003d \u0027VR\u0027;", + "sql_explanation": "Join the Games and VRGames tables on GameID, then filter for VR games and list their titles and designers." +}, { + "id": "2702", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many VR technologies have been adopted by players from France?", + "sql_context": "CREATE TABLE VRTechnology (TechID INT PRIMARY KEY, TechName VARCHAR(50), ReleaseDate DATE); INSERT INTO VRTechnology (TechID, TechName, ReleaseDate) VALUES (1, \u0027VR1\u0027, \u00272016-01-01\u0027), (2, \u0027VR2\u0027, \u00272020-01-01\u0027), (3, \u0027VR3\u0027, \u00272022-01-01\u0027); CREATE TABLE PlayerVR (PlayerID INT, TechID INT); INSERT INTO PlayerVR (PlayerID, TechID) VALUES (6, 1), (6, 2), (7, 3); CREATE TABLE Players (PlayerID INT PRIMARY KEY, Age INT, Gender VARCHAR(10), Country VARCHAR(50)); INSERT INTO Players (PlayerID, Age, Gender, Country) VALUES (6, 28, \u0027Female\u0027, \u0027France\u0027), (7, 30, \u0027Male\u0027, \u0027Germany\u0027);", + "sql": "SELECT COUNT(*) FROM PlayerVR JOIN Players ON PlayerVR.PlayerID \u003d Players.PlayerID WHERE Players.Country \u003d \u0027France\u0027;", + "sql_explanation": "Count the number of VR technologies adopted by players from France." +}, { + "id": "2875", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the game preferences of players who have participated in esports events?", + "sql_context": "CREATE TABLE players (id INT, name VARCHAR(50), age INT, game_preference VARCHAR(20)); CREATE TABLE esports_events (id INT, event_name VARCHAR(50), date DATE, player_id INT); INSERT INTO players (id, name, age, game_preference) VALUES (1, \u0027John Doe\u0027, 25, \u0027VR\u0027); INSERT INTO esports_events (id, event_name, date, player_id) VALUES (1, \u0027GameX\u0027, \u00272023-06-01\u0027, 1);", + "sql": "SELECT players.game_preference FROM players INNER JOIN esports_events ON players.id \u003d esports_events.player_id;", + "sql_explanation": "1. Join the players table with the esports_events table using the player_id. 2. Select the game_preference from the players table." +}, { + "id": "2891", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of players who adopted VR technology in Canada", + "sql_context": "CREATE TABLE Players (PlayerID INT, PlayerName VARCHAR(50), Country VARCHAR(50)); INSERT INTO Players (PlayerID, PlayerName, Country) VALUES (1, \u0027John Smith\u0027, \u0027Canada\u0027); INSERT INTO Players (PlayerID, PlayerName, Country) VALUES (2, \u0027Jane Doe\u0027, \u0027USA\u0027); CREATE TABLE VRAdoption (PlayerID INT, VRAdopted DATE); INSERT INTO VRAdoption (PlayerID, VRAdopted) VALUES (1, \u00272021-08-01\u0027);", + "sql": "SELECT COUNT(*) FROM Players p INNER JOIN VRAdoption va ON p.PlayerID \u003d va.PlayerID WHERE p.Country \u003d \u0027Canada\u0027;", + "sql_explanation": "This query joins the Players table and VRAdoption table on PlayerID, filters for players from Canada, and then counts the number of records." +}, { + "id": "3117", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of players who joined the platform in 2021 from the \u0027Gaming\u0027 region", + "sql_context": "CREATE TABLE Players (id INT, name VARCHAR(50), join_date DATE, region VARCHAR(20)); INSERT INTO Players (id, name, join_date, region) VALUES (1, \u0027John Doe\u0027, \u00272021-01-01\u0027, \u0027Gaming\u0027), (2, \u0027Jane Smith\u0027, \u00272020-01-01\u0027, \u0027Other\u0027);", + "sql": "SELECT COUNT(*) FROM Players WHERE join_date BETWEEN \u00272021-01-01\u0027 AND \u00272021-12-31\u0027 AND region \u003d \u0027Gaming\u0027;", + "sql_explanation": "This query counts the number of players who joined in 2021 from the \u0027Gaming\u0027 region by filtering the join_date between 2021-01-01 and 2021-12-31 and region equals \u0027Gaming\u0027." +}, { + "id": "151", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify countries with higher diamond exports than imports between 2015 and 2017.", + "sql_context": "CREATE TABLE diamond_export (year INT, country TEXT, export_amount FLOAT); INSERT INTO diamond_export (year, country, export_amount) VALUES (2015, \u0027Botswana\u0027, 5000.0), (2016, \u0027Botswana\u0027, 5200.0), (2017, \u0027Botswana\u0027, 5400.0), (2015, \u0027Russia\u0027, 6000.0), (2016, \u0027Russia\u0027, 6200.0), (2017, \u0027Russia\u0027, 6400.0); CREATE TABLE diamond_import (year INT, country TEXT, import_amount FLOAT); INSERT INTO diamond_import (year, country, import_amount) VALUES (2015, \u0027US\u0027, 4500.0), (2016, \u0027US\u0027, 4700.0), (2017, \u0027US\u0027, 4900.0), (2015, \u0027Hong_Kong\u0027, 2500.0), (2016, \u0027Hong_Kong\u0027, 2600.0), (2017, \u0027Hong_Kong\u0027, 2700.0);", + "sql": "SELECT diamond_export.country FROM diamond_export INNER JOIN diamond_import ON diamond_export.country \u003d diamond_import.country WHERE diamond_export.year BETWEEN 2015 AND 2017 GROUP BY diamond_export.country HAVING SUM(diamond_export.export_amount) \u003e SUM(diamond_import.import_amount);", + "sql_explanation": "Compare diamond export and import amounts for selected countries between 2015 and 2017, then group the results by country to identify the countries with higher diamond export amounts than import amounts." +}, { + "id": "184", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average monthly labor productivity (measured in tons of ore per worker) for each mining company in 2022?", + "sql_context": "CREATE TABLE CompanyProductivity (CompanyID int, ProductivityDate date, Material varchar(10), Quantity int, Workers int); INSERT INTO CompanyProductivity VALUES (1, \u00272022-01-01\u0027, \u0027Gold\u0027, 1000, 50), (1, \u00272022-01-15\u0027, \u0027Gold\u0027, 1500, 60), (2, \u00272022-01-30\u0027, \u0027Gold\u0027, 800, 45), (1, \u00272022-02-05\u0027, \u0027Gold\u0027, 1200, 55), (2, \u00272022-03-01\u0027, \u0027Gold\u0027, 1000, 40);", + "sql": "SELECT m.CompanyName, AVG(cp.Quantity/cp.Workers) as AvgMonthlyLaborProductivity FROM CompanyProductivity cp JOIN MiningCompanies m ON cp.CompanyID \u003d m.CompanyID WHERE cp.ProductivityDate BETWEEN \u00272022-01-01\u0027 AND \u00272022-12-31\u0027 AND cp.Material \u003d \u0027Gold\u0027 GROUP BY m.CompanyName;", + "sql_explanation": "This query joins the CompanyProductivity and MiningCompanies tables using the CompanyID. It then filters for gold extractions in 2022 and calculates the average monthly labor productivity for each mining company." +}, { + "id": "766", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many labor accidents occurred in mines located in the Arctic region in Q1 2022?", + "sql_context": "CREATE TABLE MineLocations (MineID int, Location varchar(50)); INSERT INTO MineLocations VALUES (1, \u0027Arctic Region\u0027), (2, \u0027Andes Mountains\u0027), (3, \u0027Sahara Desert\u0027); CREATE TABLE AccidentData (MineID int, AccidentDate date); INSERT INTO AccidentData VALUES (1, \u00272022-01-15\u0027), (1, \u00272022-02-18\u0027), (3, \u00272022-03-04\u0027), (2, \u00272022-01-12\u0027), (1, \u00272022-02-29\u0027);", + "sql": "SELECT COUNT(*) as LaborAccidents FROM AccidentData ad JOIN MineLocations ml ON ad.MineID \u003d ml.MineID WHERE ml.Location \u003d \u0027Arctic Region\u0027 AND ad.AccidentDate BETWEEN \u00272022-01-01\u0027 AND \u00272022-03-31\u0027;", + "sql_explanation": "This query joins the AccidentData and MineLocations tables using the MineID. It then filters for accidents in the Arctic region and in Q1 2022, and counts the number of records." +}, { + "id": "1061", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the geological survey information for each mine, including the mine name, coordinates, and geological features, and group the results by continent.", + "sql_context": "CREATE TABLE geological_survey (mine_id INT, continent TEXT, x_coordinate INT, y_coordinate INT, geological_feature TEXT); INSERT INTO geological_survey (mine_id, continent, x_coordinate, y_coordinate, geological_feature) VALUES (5, \u0027North America\u0027, 10, 20, \u0027Granite\u0027), (5, \u0027North America\u0027, 12, 22, \u0027Quartz\u0027), (6, \u0027South America\u0027, 15, 25, \u0027Shale\u0027), (6, \u0027South America\u0027, 18, 28, \u0027Limestone\u0027), (7, \u0027Africa\u0027, 30, 40, \u0027Iron Ore\u0027); CREATE TABLE mines (mine_id INT, mine_name TEXT); INSERT INTO mines (mine_id, mine_name) VALUES (5, \u0027MineP\u0027), (6, \u0027MineQ\u0027), (7, \u0027MineR\u0027);", + "sql": "SELECT m.mine_name, gs.continent, gs.x_coordinate, gs.y_coordinate, gs.geological_feature FROM geological_survey gs JOIN mines m ON gs.mine_id \u003d m.mine_id GROUP BY gs.continent;", + "sql_explanation": "This query joins the geological_survey table with the mines table to get the names of the mines and the corresponding geological survey information. It then groups the results by continent, and includes the mine name, coordinates, and geological features." +}, { + "id": "606", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all countries with deep-sea exploration programs and their respective budgets.", + "sql_context": "CREATE TABLE countries (country_name TEXT, exploration_program TEXT); CREATE TABLE exploration_budgets (country_name TEXT, budget FLOAT); INSERT INTO countries (country_name, exploration_program) VALUES (\u0027Japan\u0027, \u0027Yes\u0027), (\u0027USA\u0027, \u0027Yes\u0027), (\u0027India\u0027, \u0027No\u0027); INSERT INTO exploration_budgets (country_name, budget) VALUES (\u0027Japan\u0027, 100000000.0), (\u0027USA\u0027, 200000000.0);", + "sql": "SELECT countries.country_name, exploration_budgets.budget FROM countries INNER JOIN exploration_budgets ON countries.country_name \u003d exploration_budgets.country_name WHERE countries.exploration_program \u003d \u0027Yes\u0027;", + "sql_explanation": "Join the countries and exploration_budgets tables on the country_name column and filter rows where exploration_program is \u0027Yes\u0027." +}, { + "id": "935", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of vessels detained for maritime safety violations in each region?", + "sql_context": "CREATE TABLE maritime_safety_violations (violation_id INT, region TEXT); INSERT INTO maritime_safety_violations (violation_id, region) VALUES (1, \u0027Atlantic\u0027), (2, \u0027Pacific\u0027), (3, \u0027Indian Ocean\u0027); CREATE TABLE vessels_detained (violation_id INT, vessel_name TEXT); INSERT INTO vessels_detained (violation_id, vessel_name) VALUES (1, \u0027Vessel A\u0027), (1, \u0027Vessel B\u0027), (2, \u0027Vessel C\u0027);", + "sql": "SELECT region, COUNT(vessel_name) FROM vessels_detained INNER JOIN maritime_safety_violations ON vessels_detained.violation_id \u003d maritime_safety_violations.violation_id GROUP BY region;", + "sql_explanation": "This query joins the vessels_detained and maritime_safety_violations tables on the violation_id column using the INNER JOIN clause. It then groups the results by the region column using the GROUP BY clause and counts the number of vessel_name using the COUNT() function." +}, { + "id": "1294", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum ocean acidification level for each ocean basin?", + "sql_context": "CREATE TABLE ocean_basins (id INT, name TEXT); CREATE TABLE ocean_acidification (id INT, ocean_basin_id INT, acidification_level FLOAT); INSERT INTO ocean_basins VALUES (1, \u0027Atlantic\u0027), (2, \u0027Pacific\u0027), (3, \u0027Indian\u0027); INSERT INTO ocean_acidification VALUES (1, 1, -7850), (2, 1, -7880), (3, 2, -7930), (4, 3, -7820);", + "sql": "SELECT ob.name, MAX(oa.acidification_level) as max_acidification FROM ocean_basins ob INNER JOIN ocean_acidification oa ON ob.id \u003d oa.ocean_basin_id GROUP BY ob.name;", + "sql_explanation": "This query calculates the maximum ocean acidification level for each ocean basin by joining the \u0027ocean_basins\u0027 and \u0027ocean_acidification\u0027 tables on their shared \u0027id\u0027 column. It then groups the results by ocean basin name and calculates the maximum ocean acidification level for each ocean basin." +}, { + "id": "1626", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify marine protected areas that do not have any deep-sea expeditions.", + "sql_context": "CREATE TABLE Expeditions ( id INT PRIMARY KEY, name VARCHAR(50), location VARCHAR(50), start_date DATE, end_date DATE); CREATE TABLE Protected_Areas ( id INT PRIMARY KEY, name VARCHAR(50), location VARCHAR(50), size FLOAT, protection_level VARCHAR(50));", + "sql": "SELECT Protected_Areas.name FROM Protected_Areas LEFT JOIN Expeditions ON Protected_Areas.location \u003d Expeditions.location WHERE Expeditions.id IS NULL;", + "sql_explanation": "This query uses a left join to combine the Protected_Areas and Expeditions tables based on the location of the protected areas and the expeditions. It then filters the results to only the protected areas that do not have any expeditions and returns their names." +}, { + "id": "1649", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum ocean acidification level for each ocean?", + "sql_context": "CREATE TABLE oceans (id INT, name TEXT); CREATE TABLE ocean_acidification (id INT, ocean_id INT, acidification_level FLOAT); INSERT INTO oceans VALUES (1, \u0027Atlantic\u0027), (2, \u0027Pacific\u0027), (3, \u0027Indian\u0027); INSERT INTO ocean_acidification VALUES (1, 1, -7900), (2, 2, -7950), (3, 3, -7800);", + "sql": "SELECT o.name, MIN(oa.acidification_level) as min_acidification FROM oceans o INNER JOIN ocean_acidification oa ON o.id \u003d oa.ocean_id GROUP BY o.name;", + "sql_explanation": "This query calculates the minimum ocean acidification level for each ocean by joining the \u0027oceans\u0027 and \u0027ocean_acidification\u0027 tables on their shared \u0027id\u0027 column. It then groups the results by ocean name and calculates the minimum ocean acidification level for each ocean." +}, { + "id": "2431", + "domain": "oceans", + "domain_description": "Ocean data on marine conservation, ocean acidification, deep-sea exploration, and maritime safety.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names of the vessels that have not been used in any marine research expeditions?", + "sql_context": "CREATE TABLE research_expeditions (id INT PRIMARY KEY, vessel VARCHAR(255)); INSERT INTO research_expeditions (id, vessel) VALUES (1, \u0027Ocean Explorer\u0027), (2, \u0027Marine Discoverer\u0027); CREATE TABLE expedition_vessels (id INT PRIMARY KEY, expedition VARCHAR(255), vessel VARCHAR(255)); INSERT INTO expedition_vessels (id, expedition, vessel) VALUES (1, \u0027Deep Sea Dive\u0027, \u0027Ocean Explorer\u0027);", + "sql": "SELECT v.vessel FROM expedition_vessels v LEFT JOIN research_expeditions re ON v.vessel \u003d re.vessel WHERE re.vessel IS NULL;", + "sql_explanation": "This query first creates tables \u0027research_expeditions\u0027 and \u0027expedition_vessels\u0027 with their respective columns. It then inserts a few rows for research expeditions and their vessels. The SQL query performs a left join on the \u0027expedition_vessels\u0027 and \u0027research_expeditions\u0027 tables on the \u0027vessel\u0027 column, filtering the results to show only the rows where there are no matching vessels in the \u0027research_expeditions\u0027 table." +}, { + "id": "160", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the monthly sales trend for a specific product category in a given year.", + "sql_context": "CREATE TABLE sales (id INT, product_id INT, sale_date DATE, quantity_sold INT); INSERT INTO sales (id, product_id, sale_date, quantity_sold) VALUES (1, 1, \u00272022-01-01\u0027, 50), (2, 2, \u00272022-01-02\u0027, 75), (3, 3, \u00272022-01-03\u0027, 100); CREATE TABLE products (id INT, name VARCHAR(255), category VARCHAR(255)); INSERT INTO products (id, name, category) VALUES (1, \u0027Product X\u0027, \u0027Eco-friendly\u0027), (2, \u0027Product Y\u0027, \u0027Fair Trade\u0027), (3, \u0027Product Z\u0027, \u0027Vegan\u0027);", + "sql": "SELECT EXTRACT(MONTH FROM sale_date) as month, category, AVG(quantity_sold) as avg_quantity_sold FROM sales JOIN products ON sales.product_id \u003d products.id WHERE category \u003d \u0027Eco-friendly\u0027 AND sale_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-12-31\u0027 GROUP BY month, category ORDER BY month;", + "sql_explanation": "This query calculates the average monthly sales quantity for eco-friendly products by joining the sales and products tables, filtering for eco-friendly products and a specific year, grouping the results by month and category, and ordering them by month." +}, { + "id": "663", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average price of items manufactured by factories in \u0027Asia\u0027 and \u0027Europe\u0027?", + "sql_context": "CREATE TABLE Factories (FactoryID int, FactoryName varchar(50), Address varchar(100), Country varchar(50)); INSERT INTO Factories VALUES (1, \u0027Factory1\u0027, \u0027123 Main St, China\u0027, \u0027China\u0027); INSERT INTO Factories VALUES (2, \u0027Factory2\u0027, \u0027456 Oak St, Germany\u0027, \u0027Germany\u0027); INSERT INTO Factories VALUES (3, \u0027Factory3\u0027, \u0027789 Elm St, India\u0027, \u0027India\u0027); CREATE TABLE Products (ProductID int, ProductName varchar(50), FactoryID int, Price int); INSERT INTO Products VALUES (1, \u0027Product1\u0027, 1, 50); INSERT INTO Products VALUES (2, \u0027Product2\u0027, 2, 100); INSERT INTO Products VALUES (3, \u0027Product3\u0027, 3, 150); INSERT INTO Products VALUES (4, \u0027Product4\u0027, 1, 55); INSERT INTO Products VALUES (5, \u0027Product5\u0027, 3, 145);", + "sql": "SELECT AVG(Products.Price) FROM Products INNER JOIN Factories ON Products.FactoryID \u003d Factories.FactoryID WHERE Factories.Country \u003d \u0027China\u0027 OR Factories.Country \u003d \u0027Germany\u0027 OR Factories.Country \u003d \u0027India\u0027;", + "sql_explanation": "This SQL query calculates the average price of items manufactured by factories. It first joins the \u0027Products\u0027 table and the \u0027Factories\u0027 table on the \u0027FactoryID\u0027 column. Then, it filters the records where the \u0027Country\u0027 column in the \u0027Factories\u0027 table is \u0027China\u0027, \u0027Germany\u0027, or \u0027India\u0027. Finally, it calculates the average of the \u0027Price\u0027 column for all the filtered records." +}, { + "id": "685", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 5 cities with the highest number of recycling centers.", + "sql_context": "CREATE TABLE cities (id INT, name TEXT, country TEXT); CREATE TABLE recycling_centers (id INT, city_id INT, type TEXT); INSERT INTO cities VALUES (1, \u0027City A\u0027, \u0027Country A\u0027), (2, \u0027City B\u0027, \u0027Country A\u0027), (3, \u0027City C\u0027, \u0027Country B\u0027); INSERT INTO recycling_centers VALUES (1, 1, \u0027Glass\u0027), (2, 1, \u0027Paper\u0027), (3, 2, \u0027Plastic\u0027), (4, 3, \u0027Glass\u0027), (5, 3, \u0027Plastic\u0027);", + "sql": "SELECT cities.name, COUNT(recycling_centers.id) AS center_count FROM cities INNER JOIN recycling_centers ON cities.id \u003d recycling_centers.city_id GROUP BY cities.name ORDER BY center_count DESC LIMIT 5;", + "sql_explanation": "Join cities and recycling_centers tables, group by city name, count the number of recycling centers, and list top 5 cities with the highest number of recycling centers." +}, { + "id": "687", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 5 customers by total spending", + "sql_context": "CREATE TABLE customers (customer_id INT, customer_name VARCHAR(255)); INSERT INTO customers (customer_id, customer_name) VALUES (1, \u0027John Doe\u0027), (2, \u0027Jane Smith\u0027), (3, \u0027Bob Johnson\u0027); CREATE TABLE sales (sale_id INT, customer_id INT, revenue INT); INSERT INTO sales (sale_id, customer_id, revenue) VALUES (1, 1, 100), (2, 2, 50), (3, 1, 200);", + "sql": "SELECT customers.customer_name, SUM(sales.revenue) FROM sales INNER JOIN customers ON sales.customer_id \u003d customers.customer_id GROUP BY customers.customer_name ORDER BY SUM(sales.revenue) DESC LIMIT 5;", + "sql_explanation": "The query lists the top 5 customers by total spending by summing up the revenue column in the sales table, grouping the results by the customer_name column in the customers table, ordering the results by the sum of the revenue column in descending order, and limiting the results to the top 5." +}, { + "id": "748", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the total quantity of products in circulation that are made from recycled materials across all suppliers.", + "sql_context": "CREATE TABLE Supplier_Products (supplier_id INT, product_id INT, is_recycled BOOLEAN); INSERT INTO Supplier_Products (supplier_id, product_id, is_recycled) VALUES (1, 100, true), (2, 101, false), (3, 102, true), (4, 103, false), (5, 104, true); CREATE TABLE Products (product_id INT, quantity_in_stock INT); INSERT INTO Products (product_id, quantity_in_stock) VALUES (100, 500), (101, 300), (102, 700), (103, 200), (104, 800);", + "sql": "SELECT SUM(quantity_in_stock) as total_recycled_quantity FROM Products INNER JOIN Supplier_Products ON Products.product_id \u003d Supplier_Products.product_id WHERE Supplier_Products.is_recycled \u003d true;", + "sql_explanation": "This query joins the Supplier_Products and Products tables on the product_id column and filters for products made from recycled materials. It then sums the quantity_in_stock column to get the total quantity of products made from recycled materials." +}, { + "id": "870", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the number of unique consumers who have purchased products from suppliers with ethical labor practices.", + "sql_context": "CREATE TABLE Purchases (purchase_id INT, consumer_id INT, supplier_id INT, purchase_date DATE); INSERT INTO Purchases (purchase_id, consumer_id, supplier_id, purchase_date) VALUES (1, 100, 1, \u00272022-01-01\u0027), (2, 101, 2, \u00272022-02-15\u0027), (3, 102, 3, \u00272022-03-05\u0027), (4, 103, 1, \u00272022-04-10\u0027), (5, 104, 4, \u00272022-05-22\u0027); CREATE TABLE Suppliers (supplier_id INT, ethical_practices BOOLEAN); INSERT INTO Suppliers (supplier_id, ethical_practices) VALUES (1, true), (2, false), (3, true), (4, false);", + "sql": "SELECT COUNT(DISTINCT consumer_id) as unique_ethical_consumers FROM Purchases INNER JOIN Suppliers ON Purchases.supplier_id \u003d Suppliers.supplier_id WHERE Suppliers.ethical_practices \u003d true;", + "sql_explanation": "This query joins the Purchases and Suppliers tables on the supplier_id column and filters for suppliers with ethical labor practices. It then calculates the number of unique consumers who have made purchases from those suppliers." +}, { + "id": "958", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the daily sales revenue of recycled products in Germany?", + "sql_context": "CREATE TABLE sales (sale_id int, product_id int, sale_date date, revenue decimal(5,2)); CREATE TABLE products (product_id int, product_name varchar(255), is_recycled boolean, country varchar(50)); INSERT INTO sales (sale_id, product_id, sale_date, revenue) VALUES (1, 1, \u00272022-01-01\u0027, 50.00); INSERT INTO products (product_id, product_name, is_recycled, country) VALUES (1, \u0027Recycled Tote Bag\u0027, true, \u0027Germany\u0027);", + "sql": "SELECT sale_date, SUM(revenue) AS daily_revenue FROM sales JOIN products ON sales.product_id \u003d products.product_id WHERE is_recycled \u003d true AND country \u003d \u0027Germany\u0027 GROUP BY sale_date;", + "sql_explanation": "Calculates the daily sales revenue of recycled products in Germany by joining the sales and products table on product_id, filtering for recycled products sold in Germany, and then grouping the results by sale_date." +}, { + "id": "986", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the unique product categories that have been certified as fair trade.", + "sql_context": "CREATE TABLE Products (product_id INT, category TEXT); INSERT INTO Products (product_id, category) VALUES (1, \u0027Tops\u0027), (2, \u0027Pants\u0027), (3, \u0027Tops\u0027), (4, \u0027Shoes\u0027); CREATE TABLE FairTradeCertifications (product_id INT, certified BOOLEAN); INSERT INTO FairTradeCertifications (product_id, certified) VALUES (1, TRUE), (2, FALSE), (3, TRUE), (4, TRUE);", + "sql": "SELECT DISTINCT category FROM Products INNER JOIN FairTradeCertifications ON Products.product_id \u003d FairTradeCertifications.product_id WHERE FairTradeCertifications.certified \u003d TRUE;", + "sql_explanation": "The SQL query performs an INNER JOIN between the Products and FairTradeCertifications tables on the product_id column, filters the records where certified is TRUE, and lists the unique product categories that have been certified as fair trade by using the DISTINCT keyword." +}, { + "id": "1028", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of products sold by each brand, ordered by the total quantity in descending order?", + "sql_context": "CREATE TABLE brands (brand_id INT, brand_name VARCHAR(255)); INSERT INTO brands VALUES (1, \u0027BrandA\u0027), (2, \u0027BrandB\u0027), (3, \u0027BrandC\u0027); CREATE TABLE sales (sale_id INT, brand_id INT, product_id INT, quantity INT); INSERT INTO sales VALUES (1, 1, 1, 100), (2, 1, 2, 200), (3, 2, 3, 50), (4, 3, 4, 300), (5, 1, 5, 150);", + "sql": "SELECT brand_id, brand_name, SUM(quantity) as total_quantity FROM sales JOIN brands ON sales.brand_id \u003d brands.brand_id GROUP BY brand_id, brand_name ORDER BY total_quantity DESC;", + "sql_explanation": "The SQL query calculates the total quantity of products sold by each brand by summing the quantity column in the sales table, grouped by brand_id and brand_name. The result is ordered in descending order based on the total quantity." +}, { + "id": "1180", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average sale quantity of products in the circular_supply_sales table that are made in Asia?", + "sql_context": "CREATE TABLE circular_supply_sales (sale_id INT, product_id INT, sale_quantity INT); INSERT INTO circular_supply_sales (sale_id, product_id, sale_quantity) VALUES (1, 1, 10), (2, 2, 5), (3, 3, 15), (4, 4, 8), (5, 5, 12); CREATE TABLE circular_supply (product_id INT, product_name TEXT, country_of_origin TEXT); INSERT INTO circular_supply (product_id, product_name, country_of_origin) VALUES (1, \u0027Glass Jar\u0027, \u0027Asia\u0027), (2, \u0027Reclaimed Wood Coasters\u0027, \u0027North America\u0027), (3, \u0027Bamboo Cutting Board\u0027, \u0027Asia\u0027), (4, \u0027Recycled Plastic Tote Bag\u0027, \u0027South America\u0027), (5, \u0027Cork Place Mats\u0027, \u0027Europe\u0027);", + "sql": "SELECT AVG(sale_quantity) FROM circular_supply_sales JOIN circular_supply ON circular_supply_sales.product_id \u003d circular_supply.product_id WHERE country_of_origin \u003d \u0027Asia\u0027;", + "sql_explanation": "This query calculates the average sale quantity of products made in Asia by using the AVG() function and a JOIN clause to combine the circular_supply_sales and circular_supply tables. It then filters the results to only include records where the country_of_origin field in the circular_supply table is equal to \u0027Asia\u0027." +}, { + "id": "1216", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many circular supply chain products were sold last month?", + "sql_context": "CREATE TABLE products (product_id INT, name VARCHAR(255), circular_supply_chain BOOLEAN); INSERT INTO products (product_id, name, circular_supply_chain) VALUES (1, \u0027Refurbished Phone\u0027, TRUE), (2, \u0027Vintage Dress\u0027, FALSE); CREATE TABLE sales (sale_id INT, product_id INT, sale_date DATE); INSERT INTO sales (sale_id, product_id, sale_date) VALUES (1, 1, \u00272022-02-05\u0027), (2, 2, \u00272022-03-10\u0027);", + "sql": "SELECT COUNT(*) FROM products JOIN sales ON products.product_id \u003d sales.product_id WHERE circular_supply_chain \u003d TRUE AND sale_date BETWEEN \u00272022-02-01\u0027 AND \u00272022-02-28\u0027;", + "sql_explanation": "Joins the products and sales tables, filters for products in circular supply chains, and counts the number of sales in the past month." +}, { + "id": "1506", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average price of upcycled products sold by vendors in Oregon?", + "sql_context": "CREATE TABLE vendors (vendor_id INT, vendor_name VARCHAR(50), state VARCHAR(50)); INSERT INTO vendors VALUES (1, \u0027VendorA\u0027, \u0027Oregon\u0027); INSERT INTO vendors VALUES (2, \u0027VendorB\u0027, \u0027Texas\u0027); CREATE TABLE products (product_id INT, product_name VARCHAR(50), vendor_id INT, price DECIMAL(5,2), upcycled BOOLEAN); INSERT INTO products VALUES (1, \u0027Product1\u0027, 1, 150, true); INSERT INTO products VALUES (2, \u0027Product2\u0027, 1, 75, true); INSERT INTO products VALUES (3, \u0027Product3\u0027, 2, 100, false); INSERT INTO products VALUES (4, \u0027Product4\u0027, 1, 200, true); CREATE TABLE sales (sale_id INT, product_id INT, vendor_id INT, sale_amount DECIMAL(5,2)); INSERT INTO sales VALUES (1, 1, 1, 50); INSERT INTO sales VALUES (2, 2, 1, 75); INSERT INTO sales VALUES (3, 3, 2, 30); INSERT INTO sales VALUES (4, 4, 1, 60);", + "sql": "SELECT AVG(products.price) FROM products JOIN vendors ON products.vendor_id \u003d vendors.vendor_id WHERE products.upcycled \u003d true AND vendors.state \u003d \u0027Oregon\u0027;", + "sql_explanation": "First, join the products and vendors tables based on the vendor_id. Then, filter the joined table for upcycled products sold by vendors in Oregon. Lastly, calculate the average price for the filtered rows." +}, { + "id": "1523", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue generated by vendors practicing ethical labor in California?", + "sql_context": "CREATE TABLE vendors (vendor_id INT, vendor_name VARCHAR(50), state VARCHAR(50), ethical_labor BOOLEAN); INSERT INTO vendors VALUES (1, \u0027VendorA\u0027, \u0027California\u0027, true); INSERT INTO vendors VALUES (2, \u0027VendorB\u0027, \u0027Texas\u0027, false); CREATE TABLE sales (sale_id INT, product_id INT, vendor_id INT, sale_amount DECIMAL(5,2)); INSERT INTO sales VALUES (1, 1, 1, 50); INSERT INTO sales VALUES (2, 2, 1, 75); INSERT INTO sales VALUES (3, 3, 2, 30); INSERT INTO sales VALUES (4, 4, 1, 60);", + "sql": "SELECT SUM(sale_amount) FROM sales JOIN vendors ON sales.vendor_id \u003d vendors.vendor_id WHERE vendors.ethical_labor \u003d true AND vendors.state \u003d \u0027California\u0027;", + "sql_explanation": "First, join the sales and vendors tables based on the vendor_id. Then, filter the joined table for vendors practicing ethical labor in California. Lastly, calculate the sum of sale_amount for the filtered rows." +}, { + "id": "1638", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many products in each category are sold per day, on average, for stores in the US?", + "sql_context": "CREATE TABLE sales (id INT, date DATE, product VARCHAR(50), category VARCHAR(50), store VARCHAR(50), quantity INT); CREATE TABLE stores (id INT, name VARCHAR(50), location VARCHAR(50));", + "sql": "SELECT category, AVG(quantity) as avg_sales_per_day FROM sales JOIN stores ON sales.store \u003d stores.name WHERE stores.location \u003d \u0027US\u0027 GROUP BY category;", + "sql_explanation": "The SQL query calculates the average number of products sold per day, for each category, in stores located in the US. It first joins the sales and stores tables on the store column, then filters the records to only include those from stores located in the US. It then groups the data by category using the GROUP BY clause, and calculates the average number of sales for each category using the AVG function." +}, { + "id": "1678", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue generated from sales of ethically-sourced products in stores located in the UK?", + "sql_context": "CREATE TABLE sales (sale_id INT, product_id INT, sale_price DECIMAL(5,2), is_ethically_sourced BOOLEAN, store_location VARCHAR(50)); INSERT INTO sales (sale_id, product_id, sale_price, is_ethically_sourced, store_location) VALUES (1, 1, 25.99, TRUE, \u0027UK\u0027), (2, 2, 55.99, FALSE, \u0027USA\u0027), (3, 3, 89.99, FALSE, \u0027Canada\u0027), (4, 4, 54.99, TRUE, \u0027UK\u0027); CREATE TABLE products (product_id INT, product_name VARCHAR(50), price DECIMAL(5,2), is_ethically_sourced BOOLEAN); INSERT INTO products (product_id, product_name, price, is_ethically_sourced) VALUES (1, \u0027T-Shirt\u0027, 20.99, TRUE), (2, \u0027Jeans\u0027, 55.99, FALSE), (3, \u0027Sneakers\u0027, 79.99, FALSE), (4, \u0027Backpack\u0027, 49.99, TRUE);", + "sql": "SELECT SUM(sale_price) FROM sales JOIN products ON sales.product_id \u003d products.product_id WHERE is_ethically_sourced \u003d TRUE AND store_location \u003d \u0027UK\u0027;", + "sql_explanation": "This SQL query calculates the total revenue generated from sales of ethically-sourced products in stores located in the UK by using the SUM() function. It joins the sales and products tables on the product_id column and filters the rows to only include those where is_ethically_sourced is TRUE and store_location is \u0027UK\u0027." +}, { + "id": "1805", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum consumer education score for products in each country, by brand?", + "sql_context": "CREATE TABLE Brands (id INT, brand VARCHAR(255), country VARCHAR(255)); INSERT INTO Brands (id, brand, country) VALUES (1, \u0027BrandA\u0027, \u0027USA\u0027), (2, \u0027BrandB\u0027, \u0027Canada\u0027), (3, \u0027BrandC\u0027, \u0027Mexico\u0027); CREATE TABLE Products (id INT, product VARCHAR(255), consumer_education_score DECIMAL(3, 2)); INSERT INTO Products (id, product, consumer_education_score) VALUES (1, \u0027Product1\u0027, 4.50), (2, \u0027Product2\u0027, 4.75), (3, \u0027Product3\u0027, 3.25), (4, \u0027Product4\u0027, 3.50), (5, \u0027Product5\u0027, 4.00), (6, \u0027Product6\u0027, 4.25);", + "sql": "SELECT b.country, b.brand, MIN(p.consumer_education_score) AS min_score FROM Products p JOIN Brands b ON p.id \u003d b.id GROUP BY b.country, b.brand;", + "sql_explanation": "Joins the Products and Brands tables on the id column. Calculates the minimum consumer education score for products in each country, grouped by brand using the MIN function and GROUP BY clause." +}, { + "id": "2246", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue of products that are sustainably sourced?", + "sql_context": "CREATE TABLE products (product_id INT, name VARCHAR(50), is_sustainable BOOLEAN); INSERT INTO products (product_id, name, is_sustainable) VALUES (1, \u0027Organic Cotton Shirt\u0027, true), (2, \u0027Regular Cotton Shirt\u0027, false); CREATE TABLE sales (sale_id INT, product_id INT, revenue DECIMAL(10,2)); INSERT INTO sales (sale_id, product_id, revenue) VALUES (1, 1, 50), (2, 2, 30), (3, 1, 40), (4, 2, 35);", + "sql": "SELECT SUM(sales.revenue) FROM sales JOIN products ON sales.product_id \u003d products.product_id WHERE products.is_sustainable \u003d true;", + "sql_explanation": "The SQL query calculates the total revenue of sustainably sourced products by joining the sales and products tables on the product_id column, and filtering for rows where the is_sustainable column is true. It then calculates the sum of the revenue column." +}, { + "id": "2258", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many of each product category are there in the inventory?", + "sql_context": "CREATE TABLE Products (product_id INT, category TEXT); INSERT INTO Products (product_id, category) VALUES (1, \u0027Tops\u0027), (2, \u0027Pants\u0027), (3, \u0027Tops\u0027), (4, \u0027Shoes\u0027); CREATE TABLE Inventory (product_id INT, quantity INT); INSERT INTO Inventory (product_id, quantity) VALUES (1, 10), (2, 20), (3, 30), (4, 40);", + "sql": "SELECT category, SUM(quantity) FROM Inventory INNER JOIN Products ON Inventory.product_id \u003d Products.product_id GROUP BY category;", + "sql_explanation": "The SQL query performs an INNER JOIN between the Products and Inventory tables on the product_id column, groups the results by the category column, and calculates the total quantity of each product category by using the SUM aggregate function." +}, { + "id": "2343", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of products sold by suppliers with a diversity rating higher than 3?", + "sql_context": "CREATE TABLE sales (sale_id INT, product_id INT, supplier_id INT, quantity INT); CREATE TABLE supplier_diversity (supplier_id INT, diversity_rating INT); INSERT INTO sales (sale_id, product_id, supplier_id, quantity) VALUES (1, 1, 1, 10), (2, 2, 2, 5), (3, 3, 3, 8); INSERT INTO supplier_diversity (supplier_id, diversity_rating) VALUES (1, 5), (2, 3), (3, 4);", + "sql": "SELECT SUM(s.quantity) FROM sales s JOIN supplier_diversity sd ON s.supplier_id \u003d sd.supplier_id WHERE sd.diversity_rating \u003e 3;", + "sql_explanation": "This SQL query calculates the total quantity of products sold by suppliers with a diversity rating higher than 3 (sd.diversity_rating \u003e 3) by joining the sales and supplier_diversity tables and applying the SUM aggregation function." +}, { + "id": "2596", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average CO2 emission for products sold in Canada?", + "sql_context": "CREATE TABLE vendors (vendor_id INT, vendor_name TEXT, country TEXT);CREATE TABLE products (product_id INT, product_name TEXT, price DECIMAL, CO2_emission INT, vendor_id INT); INSERT INTO vendors (vendor_id, vendor_name, country) VALUES (1, \u0027VendorA\u0027, \u0027Canada\u0027), (2, \u0027VendorB\u0027, \u0027USA\u0027); INSERT INTO products (product_id, product_name, price, CO2_emission, vendor_id) VALUES (1, \u0027ProductX\u0027, 15.99, 500, 1), (2, \u0027ProductY\u0027, 12.49, 300, 1), (3, \u0027ProductZ\u0027, 9.99, 800, 2);", + "sql": "SELECT AVG(CO2_emission) FROM products JOIN vendors ON products.vendor_id \u003d vendors.vendor_id WHERE country \u003d \u0027Canada\u0027;", + "sql_explanation": "Join the vendors and products tables, filter for products sold in Canada, and calculate the average CO2 emission." +}, { + "id": "3230", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get products that are present in both \u0027products\u0027 and \u0027products_sustainability\u0027 tables", + "sql_context": "CREATE TABLE products (id INT, name TEXT, category TEXT);CREATE TABLE products_sustainability (id INT, name TEXT, sustainable_label TEXT);", + "sql": "SELECT * FROM products INNER JOIN products_sustainability ON products.id \u003d products_sustainability.id;", + "sql_explanation": "The SQL query uses the INNER JOIN operation to find products that are present in both \u0027products\u0027 and \u0027products_sustainability\u0027 tables." +}, { + "id": "11", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of games won by each team in the 2022 NBA playoffs?", + "sql_context": "CREATE TABLE nba_teams (team_id INT, team_name VARCHAR(255)); INSERT INTO nba_teams VALUES (1, \u0027TeamA\u0027), (2, \u0027TeamB\u0027), (3, \u0027TeamC\u0027); CREATE TABLE nba_games (game_id INT, home_team_id INT, away_team_id INT, home_score INT, away_score INT, playoff_round VARCHAR(255)); INSERT INTO nba_games VALUES (1, 1, 2, 90, 85, \u0027First Round\u0027), (2, 1, 3, 80, 85, \u0027First Round\u0027), (3, 2, 1, 95, 90, \u0027First Round\u0027), (4, 2, 3, 88, 82, \u0027First Round\u0027);", + "sql": "SELECT t.team_name, (SUM(CASE WHEN g.home_team_id \u003d t.team_id THEN 1 ELSE 0 END) + SUM(CASE WHEN g.away_team_id \u003d t.team_id THEN 1 ELSE 0 END) - SUM(CASE WHEN (g.home_team_id \u003d t.team_id AND g.home_score \u003c g.away_score) OR (g.away_team_id \u003d t.team_id AND g.home_score \u003e g.away_score) THEN 1 ELSE 0 END)) * 100.0 / COUNT(*) AS win_percentage FROM nba_teams t JOIN nba_games g ON t.team_id IN (g.home_team_id, g.away_team_id) WHERE g.playoff_round \u003d \u0027First Round\u0027 GROUP BY t.team_name;", + "sql_explanation": "Join the nba_teams and nba_games tables, filter for the 2022 NBA playoffs\u0027 First Round, and calculate the percentage of games won by each team." +}, { + "id": "862", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of three-point shots made per game by players in the BasketballPlayers and BasketballPlayerStats tables, for players with more than 500 total points scored?", + "sql_context": "CREATE TABLE BasketballPlayers (PlayerID INT, Name VARCHAR(50)); CREATE TABLE BasketballPlayerStats (PlayerID INT, GameID INT, Points INT, ThreePointShots INT);", + "sql": "SELECT AVG(ThreePointShots) FROM BasketballPlayerStats INNER JOIN BasketballPlayers ON BasketballPlayerStats.PlayerID \u003d BasketballPlayers.PlayerID GROUP BY PlayerID HAVING SUM(Points) \u003e 500;", + "sql_explanation": "The query performs a join between the BasketballPlayers and BasketballPlayerStats tables to get the points and three-point shots scored by each player in each game. It then calculates the average number of three-point shots made per game for players with more than 500 total points scored." +}, { + "id": "1068", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the top 5 table tennis players by the number of tournaments won", + "sql_context": "CREATE TABLE players (id INT, name VARCHAR(50), sport VARCHAR(20)); CREATE TABLE tournaments (id INT, player_id INT, name VARCHAR(50), location VARCHAR(50), year INT);", + "sql": "SELECT players.name, COUNT(*) as won_tournaments FROM players JOIN tournaments ON players.id \u003d tournaments.player_id GROUP BY players.name ORDER BY won_tournaments DESC LIMIT 5;", + "sql_explanation": "This query shows the top 5 table tennis players by the number of tournaments won by joining the players and tournaments tables, grouping the results by player name, and ordering the results by the number of tournaments won in descending order." +}, { + "id": "1238", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of games played by each team in the FIFA World Cup?", + "sql_context": "CREATE TABLE teams (team_id INT, team_name VARCHAR(100)); CREATE TABLE games (game_id INT, home_team INT, away_team INT);", + "sql": "SELECT teams.team_name, COUNT(games.game_id) as total_games FROM teams INNER JOIN games ON teams.team_id IN (games.home_team, games.away_team) GROUP BY teams.team_name;", + "sql_explanation": "This query joins the teams and games tables on the team_id field and calculates the total number of games played by each team in the FIFA World Cup. It does this by counting the number of games.game_id values for each team and groups the results by team_name. It then returns the total number of games played by each team." +}, { + "id": "1358", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the average ticket prices for each venue in the \u0027stadiums\u0027 table?", + "sql_context": "CREATE TABLE ticket_prices (venue_id INT, avg_ticket_price DECIMAL(5,2));", + "sql": "SELECT s.stadium_name, AVG(t.avg_ticket_price) AS avg_ticket_price FROM stadiums s INNER JOIN ticket_prices t ON s.stadium_id \u003d t.venue_id GROUP BY s.stadium_name;", + "sql_explanation": "This query joins the \u0027stadiums\u0027 and \u0027ticket_prices\u0027 tables to find the average ticket price for each stadium." +}, { + "id": "1555", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names of athletes who have won the \u0027Most Valuable Player\u0027 award and their corresponding sports?", + "sql_context": "CREATE TABLE Athletes (AthleteID INT PRIMARY KEY, Name VARCHAR(100), Sport VARCHAR(50), TeamID INT); CREATE TABLE Awards (AwardID INT PRIMARY KEY, AthleteID INT, Award VARCHAR(50), Year INT);", + "sql": "SELECT Athletes.Name, Athletes.Sport FROM Athletes INNER JOIN Awards ON Athletes.AthleteID \u003d Awards.AthleteID WHERE Awards.Award \u003d \u0027Most Valuable Player\u0027;", + "sql_explanation": "Joining the Athletes and Awards table on AthleteID, this query retrieves the names and sports of athletes who have won the \u0027Most Valuable Player\u0027 award." +}, { + "id": "1662", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of wins for each team?", + "sql_context": "CREATE TABLE teams (team_id INT, team_name TEXT, city TEXT); CREATE TABLE games (game_id INT, team_id INT, won BOOLEAN);", + "sql": "SELECT t.team_name, (SUM(g.won) * 100.0 / COUNT(g.game_id)) as win_percentage FROM games g JOIN teams t ON g.team_id \u003d t.team_id GROUP BY t.team_name;", + "sql_explanation": "We are joining the teams and games tables on the team_id column, calculating the percentage of wins for each team." +}, { + "id": "1823", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum number of points scored by a cricket player from India in a single match?", + "sql_context": "CREATE TABLE Players (PlayerID INT PRIMARY KEY, Name VARCHAR(100), Age INT, Sport VARCHAR(50), Country VARCHAR(50)); CREATE TABLE Players_Matches (PlayerID INT, MatchID INT, Points INT, FOREIGN KEY (PlayerID) REFERENCES Players(PlayerID)); INSERT INTO Players_Matches (PlayerID, MatchID, Points) VALUES (1, 1, 25); INSERT INTO Players_Matches (PlayerID, MatchID, Points) VALUES (2, 2, 30);", + "sql": "SELECT MIN(Points) as MinPoints FROM Players_Matches JOIN Players ON Players.PlayerID \u003d Players_Matches.PlayerID WHERE Players.Country \u003d \u0027India\u0027;", + "sql_explanation": "Find the minimum number of points scored by a cricket player from India in a single match." +}, { + "id": "2046", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the number of games played by each team in the teams table", + "sql_context": "CREATE TABLE teams (team_id INT, name VARCHAR(50), city VARCHAR(50)); CREATE TABLE games (game_id INT, team_id INT, home_team BOOLEAN);", + "sql": "SELECT teams.name, COUNT(games.game_id) AS games_played FROM teams LEFT JOIN games ON teams.team_id \u003d games.team_id GROUP BY teams.name;", + "sql_explanation": "This query determines the number of games played by each team in the teams table. It uses a LEFT JOIN to combine rows from the teams and games tables, where the team_id matches in both tables. The LEFT JOIN returns all rows from the teams table and the matched rows from the games table. Then, it groups the results by team name and calculates the count of games for each team." +}, { + "id": "2155", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total number of goals scored by the team \u0027Barcelona\u0027 in the \u0027La Liga\u0027 league.", + "sql_context": "CREATE TABLE teams (team_id INT, name TEXT, league TEXT); INSERT INTO teams (team_id, name, league) VALUES (1, \u0027Barcelona\u0027, \u0027La Liga\u0027), (2, \u0027Real Madrid\u0027, \u0027La Liga\u0027); CREATE TABLE goals (goal_id INT, team_id INT, goals INT); INSERT INTO goals (goal_id, team_id, goals) VALUES (1, 1, 3), (2, 1, 2), (3, 2, 1);", + "sql": "SELECT SUM(goals) FROM goals JOIN teams ON goals.team_id \u003d teams.team_id WHERE teams.name \u003d \u0027Barcelona\u0027 AND teams.league \u003d \u0027La Liga\u0027;", + "sql_explanation": "This query calculates the total number of goals scored by the \u0027Barcelona\u0027 team in the \u0027La Liga\u0027 league. It does this by joining the \u0027goals\u0027 table with the \u0027teams\u0027 table based on the \u0027team_id\u0027 and filtering for rows where the \u0027name\u0027 is \u0027Barcelona\u0027 and the \u0027league\u0027 is \u0027La Liga\u0027. Then, it calculates the sum of the \u0027goals\u0027 column for those filtered rows." +}, { + "id": "2253", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all the football teams and their respective stadium capacities.", + "sql_context": "CREATE TABLE teams (id INT, name VARCHAR(50), sport VARCHAR(20)); CREATE TABLE stadiums (id INT, name VARCHAR(50), capacity INT, team_id INT); INSERT INTO teams (id, name, sport) VALUES (1, \u0027New York Giants\u0027, \u0027Football\u0027); INSERT INTO teams (id, name, sport) VALUES (2, \u0027New York Jets\u0027, \u0027Football\u0027); INSERT INTO stadiums (id, name, capacity, team_id) VALUES (1, \u0027MetLife Stadium\u0027, 82500, 1); INSERT INTO stadiums (id, name, capacity, team_id) VALUES (2, \u0027MetLife Stadium\u0027, 82500, 2);", + "sql": "SELECT teams.name, stadiums.capacity FROM teams INNER JOIN stadiums ON teams.id \u003d stadiums.team_id WHERE teams.sport \u003d \u0027Football\u0027;", + "sql_explanation": "This SQL query retrieves the names of all football teams and their respective stadium capacities from the \u0027teams\u0027 and \u0027stadiums\u0027 tables by using an INNER JOIN on the \u0027team_id\u0027 column, and filtering for the sport \u0027Football\u0027 in the WHERE clause." +}, { + "id": "4952", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which athletes in the \u0027Athletes\u0027 table have the same name as a team in the \u0027Teams\u0027 table?", + "sql_context": "CREATE TABLE athletes (athlete_id INT, name VARCHAR(50)); INSERT INTO athletes (athlete_id, name) VALUES (1, \u0027John Doe\u0027); CREATE TABLE teams (team_id INT, name VARCHAR(50)); INSERT INTO teams (team_id, name) VALUES (1, \u0027Los Angeles Lakers\u0027);", + "sql": "SELECT a.name FROM athletes a INNER JOIN teams t ON a.name \u003d t.name;", + "sql_explanation": "This SQL query finds athletes in the \u0027Athletes\u0027 table who have the same name as a team in the \u0027Teams\u0027 table. It uses an INNER JOIN to combine the \u0027Athletes\u0027 and \u0027Teams\u0027 tables based on the name column, and returns the names that appear in both tables." +}, { + "id": "349", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which countries have been involved in the most defense diplomacy events in the last 7 years, along with the number of events?", + "sql_context": "CREATE TABLE DefenseDiplomacy (ID INT, EventName TEXT, EventDate DATE, Country TEXT, ParticipatingCountries TEXT); INSERT INTO DefenseDiplomacy VALUES (1, \u0027Event 1\u0027, \u00272016-01-01\u0027, \u0027USA\u0027, \u0027Canada, Mexico\u0027); CREATE VIEW DiplomacyCountries AS SELECT Country FROM DefenseDiplomacy WHERE Country IN (\u0027USA\u0027, \u0027Canada\u0027, \u0027Mexico\u0027, \u0027Brazil\u0027, \u0027Argentina\u0027);", + "sql": "SELECT dc.Country, COUNT(*) as DiplomacyCount FROM DefenseDiplomacy d JOIN DiplomacyCountries dc ON d.Country \u003d dc.Country WHERE d.EventDate BETWEEN DATEADD(year, -7, GETDATE()) AND GETDATE() GROUP BY dc.Country ORDER BY DiplomacyCount DESC;", + "sql_explanation": "This query first creates a table for DefenseDiplomacy and inserts data into it. A view for DiplomacyCountries is also created. The SQL query then joins the DefenseDiplomacy table and DiplomacyCountries view based on Country. It filters the records based on the condition that EventDate should be within the last 7 years and groups the records by Country. Lastly, it calculates the count of total defense diplomacy events and orders the records by DiplomacyCount in descending order." +}, { + "id": "664", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the peacekeeping operations with the most troops from the \u0027peacekeeping_operations\u0027 and \u0027troop_deployments\u0027 tables?", + "sql_context": "CREATE TABLE peacekeeping_operations (operation_id INT, operation_name VARCHAR(50)); CREATE TABLE troop_deployments (deployment_id INT, operation_id INT, troop_count INT); INSERT INTO peacekeeping_operations VALUES (1, \u0027MINUSTAH\u0027), (2, \u0027UNMIL\u0027), (3, \u0027MONUSCO\u0027); INSERT INTO troop_deployments VALUES (1, 1, 5000), (2, 1, 3000), (3, 2, 4000), (4, 2, 6000), (5, 3, 7000);", + "sql": "SELECT p.operation_name, SUM(t.troop_count) as total_troops FROM peacekeeping_operations p JOIN troop_deployments t ON p.operation_id \u003d t.operation_id GROUP BY p.operation_name ORDER BY total_troops DESC;", + "sql_explanation": "This query joins the peacekeeping_operations and troop_deployments tables on the operation_id and calculates the total troops for each operation. It then orders the result by the total troops in descending order." +}, { + "id": "1072", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total military expenditure for each country over the past 5 years?", + "sql_context": "CREATE TABLE military_expenditure (id INT, country VARCHAR(255), year INT, expenditure INT); INSERT INTO military_expenditure (id, country, year, expenditure) VALUES (1, \u0027USA\u0027, 2017, 700000), (2, \u0027China\u0027, 2017, 300000), (3, \u0027Russia\u0027, 2017, 200000), (4, \u0027USA\u0027, 2018, 750000), (5, \u0027China\u0027, 2018, 350000), (6, \u0027Russia\u0027, 2018, 220000); CREATE TABLE countries (id INT, name VARCHAR(255)); INSERT INTO countries (id, name) VALUES (1, \u0027USA\u0027), (2, \u0027China\u0027), (3, \u0027Russia\u0027);", + "sql": "SELECT c.name, SUM(me.expenditure) as total_expenditure FROM military_expenditure me JOIN countries c ON me.country \u003d c.name WHERE me.year BETWEEN 2017 AND 2021 GROUP BY c.name;", + "sql_explanation": "This query joins the military_expenditure and countries tables on the country and name columns, respectively. It then calculates the total military expenditure for each country over the past 5 years by grouping the records by the name column and summing the expenditure column. The query filters the records for the years 2017 to 2021 using the BETWEEN operator." +}, { + "id": "1169", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average military expenditure by region over the past 5 years?", + "sql_context": "CREATE TABLE military_expenditure (id INT, region VARCHAR(255), year INT, expenditure INT); INSERT INTO military_expenditure (id, region, year, expenditure) VALUES (1, \u0027Asia\u0027, 2017, 50000), (2, \u0027Europe\u0027, 2017, 30000), (3, \u0027Africa\u0027, 2017, 25000), (4, \u0027Asia\u0027, 2018, 55000), (5, \u0027Europe\u0027, 2018, 35000), (6, \u0027Africa\u0027, 2018, 27000); CREATE TABLE regions (id INT, name VARCHAR(255)); INSERT INTO regions (id, name) VALUES (1, \u0027Asia\u0027), (2, \u0027Europe\u0027), (3, \u0027Africa\u0027);", + "sql": "SELECT r.name, AVG(me.expenditure) as avg_expenditure FROM military_expenditure me JOIN regions r ON me.region \u003d r.name WHERE me.year BETWEEN 2017 AND 2021 GROUP BY r.name;", + "sql_explanation": "This query joins the military_expenditure and regions tables on the region and name columns, respectively. It then calculates the average military expenditure over the past 5 years for each region by grouping the records by the name column and averaging the expenditure column. The query filters the records for the years 2017 to 2021 using the BETWEEN operator." +}, { + "id": "1708", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which countries have participated in defense diplomacy events but not in peacekeeping operations?", + "sql_context": "CREATE TABLE peacekeeping_operations (id INT, country VARCHAR(255), operation VARCHAR(255)); CREATE TABLE defense_diplomacy (id INT, country VARCHAR(255), event VARCHAR(255));", + "sql": "SELECT ddip.country FROM defense_diplomacy ddip LEFT JOIN peacekeeping_operations pkops ON ddip.country \u003d pkops.country WHERE pkops.country IS NULL;", + "sql_explanation": "This SQL query performs a left join on the defense_diplomacy and peacekeeping_operations tables, based on their country columns. It then filters for rows where the peacekeeping_operations country is null, indicating that the country has participated in defense diplomacy events but not in peacekeeping operations. It returns a list of such countries." +}, { + "id": "2083", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the military innovation spending for each country in descending order", + "sql_context": "CREATE TABLE military_innovation (country VARCHAR(255), spending DECIMAL(10, 2)); CREATE VIEW top_military_spenders AS SELECT country, SUM(spending) as total_spending FROM military_innovation GROUP BY country ORDER BY total_spending DESC;", + "sql": "SELECT m.country, m.spending FROM military_innovation m JOIN top_military_spenders t ON m.country \u003d t.country ORDER BY m.spending DESC;", + "sql_explanation": "This query joins the military_innovation table with the top_military_spenders view on the country column. It returns the military innovation spending for each country and orders the results in descending order of spending." +}, { + "id": "342", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average salary for non-sustainable departments, and filters the results to only show departments with more than 1 employee?", + "sql_context": "CREATE TABLE employee (id INT PRIMARY KEY, name VARCHAR(100), department VARCHAR(50), salary DECIMAL(5,2), sustainable BOOLEAN); CREATE TABLE department (id INT PRIMARY KEY, name VARCHAR(100), manager_id INT, sustainable BOOLEAN); CREATE TABLE manufacturer (id INT PRIMARY KEY, name VARCHAR(100), country VARCHAR(50), sustainable BOOLEAN);", + "sql": "SELECT department.name as department_name, AVG(employee.salary) as average_salary FROM employee INNER JOIN department ON employee.department_id \u003d department.id WHERE department.sustainable \u003d FALSE GROUP BY department.name HAVING COUNT(*) \u003e 1;", + "sql_explanation": "This query calculates the average salary for non-sustainable departments, and filters the results to only show departments with more than 1 employee. It uses the employee, department, and manufacturer tables." +}, { + "id": "356", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average quantity of \u0027Sustainable Shoes\u0027 sold in \u0027Canada\u0027 for the \u0027Autumn 2023\u0027 season?", + "sql_context": "CREATE TABLE Sales (SaleID INT, ProductID INT, QuantitySold INT, Country VARCHAR(50), SaleDate DATE); INSERT INTO Sales (SaleID, ProductID, QuantitySold, Country, SaleDate) VALUES (1, 6, 10, \u0027Canada\u0027, \u00272023-10-21\u0027), (2, 7, 15, \u0027Canada\u0027, \u00272023-10-03\u0027), (3, 6, 12, \u0027Canada\u0027, \u00272023-11-15\u0027); CREATE TABLE Products (ProductID INT, ProductType VARCHAR(20), Sustainable BOOLEAN); INSERT INTO Products (ProductID, ProductType, Sustainable) VALUES (6, \u0027Sustainable Shoes\u0027, TRUE), (7, \u0027Regular Shoes\u0027, FALSE);", + "sql": "SELECT AVG(QuantitySold) as AvgQuantity FROM Sales S JOIN Products P ON S.ProductID \u003d P.ProductID WHERE P.ProductType \u003d \u0027Sustainable Shoes\u0027 AND Country \u003d \u0027Canada\u0027 AND SaleDate BETWEEN \u00272023-10-01\u0027 AND \u00272023-12-31\u0027 AND Country IN (\u0027Canada\u0027);", + "sql_explanation": "The SQL query performs an INNER JOIN between the Sales and Products tables, on the ProductID column. It then filters the records based on the \u0027Autumn 2023\u0027 season and the \u0027Canada\u0027 region, and calculates the average quantity of \u0027Sustainable Shoes\u0027 sold by taking the average of the QuantitySold field. The query also restricts the countries to \u0027Canada\u0027 to ensure accurate representation." +}, { + "id": "371", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the number of units sold per category, for each store in the city of New York.", + "sql_context": "CREATE TABLE store_location (store_id INT, city VARCHAR(255)); CREATE TABLE sales_data (store_id INT, category VARCHAR(255), sale_date DATE, units_sold INT);", + "sql": "SELECT store_location.store_id, store_location.city, category, SUM(units_sold) FROM sales_data INNER JOIN store_location ON sales_data.store_id \u003d store_location.store_id WHERE city \u003d \u0027New York\u0027 GROUP BY store_location.store_id, category;", + "sql_explanation": "The query lists the number of units sold per category, for each store in the city of New York. It performs an inner join between sales_data and store_location on store_id, filters the data for New York stores, groups the data by store_id and category, and sums the units_sold for each combination." +}, { + "id": "521", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the average quantity of each fabric type used in 2021?", + "sql_context": "CREATE TABLE Production (production_id INT, garment_id INT, fabric_id INT, quantity INT); INSERT INTO Production (production_id, garment_id, fabric_id, quantity) VALUES (1, 101, 1, 5), (2, 102, 2, 3);", + "sql": "SELECT Fabrics.fabric_type, AVG(Production.quantity) as avg_quantity FROM Fabrics JOIN Production ON Fabrics.fabric_id \u003d Production.fabric_id WHERE YEAR(Production.production_date) \u003d 2021 GROUP BY Fabrics.fabric_type;", + "sql_explanation": "The SQL query joins the Fabrics and Production tables on fabric_id, filters for productions in 2021, groups by fabric_type, and calculates the average quantity of each fabric type used." +}, { + "id": "764", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many units of sustainable fabrics were sold in the last quarter?", + "sql_context": "CREATE TABLE fabrics (fabric_id INT, fabric_type VARCHAR(255), is_sustainable BOOLEAN);CREATE TABLE garments (garment_id INT, garment_name VARCHAR(255), fabric_id INT, quantity INT);CREATE TABLE sales (sale_id INT, sale_date DATE);CREATE VIEW sales_garments AS SELECT s.sale_id, g.garment_id, g.fabric_id, g.quantity FROM sales s JOIN garments g ON TRUE;", + "sql": "SELECT SUM(g.quantity) AS units_sold FROM sales_garments sg JOIN fabrics f ON sg.fabric_id \u003d f.fabric_id WHERE f.is_sustainable \u003d TRUE AND sg.sale_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 3 MONTH);", + "sql_explanation": "This query first creates a view called sales_garments that combines the sales and garments tables. It then joins the sales_garments view with the fabrics table on the fabric_id and filters for sustainable fabrics. Finally, it calculates the total number of units sold in the last quarter by grouping by the garment_id and summing the quantity." +}, { + "id": "923", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average CO2 emissions for \u0027Shirt\u0027 production across factories in \u0027Bangladesh\u0027 and \u0027Vietnam\u0027?", + "sql_context": "CREATE TABLE factories (factory_id INT, factory_name VARCHAR(20), country VARCHAR(20)); CREATE TABLE production (production_id INT, product_type VARCHAR(20), factory_id INT, co2_emissions FLOAT); INSERT INTO factories (factory_id, factory_name, country) VALUES (1, \u0027Factory1\u0027, \u0027Bangladesh\u0027), (2, \u0027Factory2\u0027, \u0027Vietnam\u0027), (3, \u0027Factory3\u0027, \u0027Bangladesh\u0027); INSERT INTO production (production_id, product_type, factory_id, co2_emissions) VALUES (1, \u0027Shirt\u0027, 1, 5.5), (2, \u0027Trousers\u0027, 1, 4.5), (3, \u0027Shirt\u0027, 2, 3.5), (4, \u0027Dress\u0027, 2, 7.5), (5, \u0027Shirt\u0027, 3, 4.8), (6, \u0027Trousers\u0027, 3, 3.8);", + "sql": "SELECT AVG(co2_emissions) FROM production JOIN factories ON production.factory_id \u003d factories.factory_id WHERE factories.country IN (\u0027Bangladesh\u0027, \u0027Vietnam\u0027) AND product_type \u003d \u0027Shirt\u0027;", + "sql_explanation": "This SQL query calculates the average CO2 emissions for \u0027Shirt\u0027 production across factories in \u0027Bangladesh\u0027 and \u0027Vietnam\u0027 by averaging the \u0027co2_emissions\u0027 column from the \u0027production\u0027 table where the \u0027country\u0027 column in the \u0027factories\u0027 table is \u0027Bangladesh\u0027 or \u0027Vietnam\u0027 and the \u0027product_type\u0027 is \u0027Shirt\u0027." +}, { + "id": "1093", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total sales revenue per garment category?", + "sql_context": "CREATE TABLE garment_sales (sales_id INT PRIMARY KEY, garment_id INT, store_id INT, quantity INT, price DECIMAL(5,2), date DATE); CREATE TABLE garments (garment_id INT PRIMARY KEY, garment_name TEXT, garment_category TEXT, sustainability_score INT); INSERT INTO garments (garment_id, garment_name, garment_category, sustainability_score) VALUES (1, \u0027Cotton Shirt\u0027, \u0027Tops\u0027, 80), (2, \u0027Denim Jeans\u0027, \u0027Bottoms\u0027, 60), (3, \u0027Silk Scarf\u0027, \u0027Accessories\u0027, 90); INSERT INTO garment_sales (sales_id, garment_id, store_id, quantity, price, date) VALUES (1, 1, 1, 2, 50, \u00272022-01-01\u0027), (2, 2, 1, 1, 100, \u00272022-01-01\u0027), (3, 3, 2, 3, 30, \u00272022-01-01\u0027);", + "sql": "SELECT g.garment_category, SUM(gs.quantity * gs.price) as total_sales_revenue FROM garment_sales gs JOIN garments g ON gs.garment_id \u003d g.garment_id GROUP BY g.garment_category;", + "sql_explanation": "This query joins the garment_sales and garments tables on garment_id and calculates the total sales revenue per garment category." +}, { + "id": "1156", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average waste generated per garment for each manufacturer?", + "sql_context": "CREATE TABLE Manufacturers (ManufacturerID INT, ManufacturerName VARCHAR(100)); INSERT INTO Manufacturers (ManufacturerID, ManufacturerName) VALUES (1, \u0027ABC Garments\u0027), (2, \u0027XYZ Textiles\u0027); CREATE TABLE Waste (WasteID INT, ManufacturerID INT, WastePerGarment DECIMAL(5,2)); INSERT INTO Waste (WasteID, ManufacturerID, WastePerGarment) VALUES (1, 1, 5.3), (2, 1, 4.8), (3, 2, 6.1), (4, 2, 5.9);", + "sql": "SELECT m.ManufacturerName, AVG(w.WastePerGarment) AS AvgWastePerGarment FROM Manufacturers m JOIN Waste w ON m.ManufacturerID \u003d w.ManufacturerID GROUP BY m.ManufacturerName;", + "sql_explanation": "This query calculates the average waste generated per garment for each manufacturer. It does this by joining the Manufacturers table with the Waste table on the ManufacturerID field, then grouping the results by the ManufacturerName and calculating the average waste per garment." +}, { + "id": "1801", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total revenue of sustainable fabric sales in Japan", + "sql_context": "CREATE TABLE sales (id INT, garment_id INT, price DECIMAL(5,2), country VARCHAR(255)); CREATE TABLE garments (id INT, garment_type VARCHAR(255), material VARCHAR(255), sustainable BOOLEAN);", + "sql": "SELECT SUM(sales.price) FROM sales JOIN garments ON sales.garment_id \u003d garments.id WHERE garments.sustainable \u003d TRUE AND sales.country \u003d \u0027Japan\u0027;", + "sql_explanation": "This query calculates the total revenue of sustainable fabric sales in Japan by joining the sales and garments tables and applying the SUM function to the price column. It only considers sales that involve sustainable garments and are made in Japan." +}, { + "id": "1949", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of \u0027Dress\u0027 items sold by stores located in \u0027Paris\u0027?", + "sql_context": "CREATE TABLE stores (store_id INT, store_name VARCHAR(20), city VARCHAR(20)); INSERT INTO stores (store_id, store_name, city) VALUES (1, \u0027Store1\u0027, \u0027Paris\u0027), (2, \u0027Store2\u0027, \u0027London\u0027); CREATE TABLE sales (sale_id INT, product_type VARCHAR(20), store_id INT, quantity_sold INT); INSERT INTO sales (sale_id, product_type, store_id, quantity_sold) VALUES (1, \u0027Dress\u0027, 1, 50), (2, \u0027Trousers\u0027, 1, 30), (3, \u0027Shirts\u0027, 2, 40), (4, \u0027Dress\u0027, 2, 60);", + "sql": "SELECT SUM(quantity_sold) FROM sales JOIN stores ON sales.store_id \u003d stores.store_id WHERE stores.city \u003d \u0027Paris\u0027 AND product_type \u003d \u0027Dress\u0027;", + "sql_explanation": "This SQL query calculates the total quantity of \u0027Dress\u0027 items sold by stores located in \u0027Paris\u0027 by summing the \u0027quantity_sold\u0027 column from the \u0027sales\u0027 table where the \u0027city\u0027 column in the \u0027stores\u0027 table is \u0027Paris\u0027 and the \u0027product_type\u0027 is \u0027Dress\u0027." +}, { + "id": "2470", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names of all garments made from sustainable fabrics with a sustainability score greater than 60?", + "sql_context": "CREATE TABLE fabrics (id INT, name VARCHAR(50), type VARCHAR(50), sustainability_score INT); INSERT INTO fabrics (id, name, type, sustainability_score) VALUES (1, \u0027Organic Cotton\u0027, \u0027Natural\u0027, 85); INSERT INTO fabrics (id, name, type, sustainability_score) VALUES (2, \u0027Recycled Polyester\u0027, \u0027Synthetic\u0027, 65);", + "sql": "SELECT garments.name FROM garments JOIN fabrics ON garments.fabric_id \u003d fabrics.id WHERE fabrics.sustainability_score \u003e 60;", + "sql_explanation": "Join the garments and fabrics tables, selecting the names of garments made from fabrics with a sustainability score greater than 60." +}, { + "id": "439", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average income and loan amount for customers who have participated in socially responsible lending?", + "sql_context": "CREATE TABLE customer_demographics (id INT PRIMARY KEY, customer_id INT, age INT, gender VARCHAR(10), income DECIMAL(10, 2), created_at TIMESTAMP); CREATE TABLE socially_responsible_lending (id INT PRIMARY KEY, loan_id INT, customer_id INT, loan_amount DECIMAL(10, 2), created_at TIMESTAMP);", + "sql": "SELECT cd.customer_id, AVG(cd.income) as avg_income, AVG(srl.loan_amount) as avg_loan_amount FROM customer_demographics cd INNER JOIN socially_responsible_lending srl ON cd.customer_id \u003d srl.customer_id GROUP BY cd.customer_id;", + "sql_explanation": "Calculate the average income and average loan amount for customers who have participated in socially responsible lending by joining the customer_demographics table and the socially_responsible_lending table and grouping by customer_id." +}, { + "id": "887", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of socially responsible loans issued by each bank in a specific region?", + "sql_context": "CREATE TABLE banks (bank_id INT, bank_name VARCHAR(50), total_assets FLOAT, region_id INT);CREATE TABLE loans (loan_id INT, bank_id INT, loan_amount FLOAT, socially_responsible BOOLEAN);", + "sql": "SELECT b.bank_name, SUM(l.loan_amount) as total_loans FROM banks b INNER JOIN loans l ON b.bank_id \u003d l.bank_id WHERE b.region_id \u003d 1 AND l.socially_responsible \u003d TRUE GROUP BY b.bank_name;", + "sql_explanation": "This SQL query uses an INNER JOIN to combine the banks and loans tables based on the bank_id field. It then calculates the total sum of loan_amount for each bank_name where socially_responsible is true and the region_id is 1 using the SUM function and groups the result by bank_name." +}, { + "id": "963", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average financial capability score for customers in each city, for cities with at least 100 customers.", + "sql_context": "CREATE TABLE customers (id INT, first_name VARCHAR(20), last_name VARCHAR(20), city VARCHAR(20), financial_capability_score DECIMAL(3, 2)); INSERT INTO customers (id, first_name, last_name, city, financial_capability_score) VALUES (1, \u0027Ahmad\u0027, \u0027Ali\u0027, \u0027San Francisco\u0027, 85); CREATE TABLE cities (id INT, name VARCHAR(20), description VARCHAR(50)); INSERT INTO cities (id, name, description) VALUES (1, \u0027San Francisco\u0027, \u0027City in California\u0027);", + "sql": "SELECT cities.name, AVG(customers.financial_capability_score) FROM customers INNER JOIN cities ON customers.city \u003d cities.name GROUP BY cities.name HAVING COUNT(customers.id) \u003e\u003d 100;", + "sql_explanation": "This SQL query performs an inner join between the customers and cities tables on the city and name columns respectively. It then groups the results by city and calculates the average financial capability score for customers in each city. Finally, it filters the results to only include cities with at least 100 customers." +}, { + "id": "2005", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many customers have taken out socially responsible loans?", + "sql_context": "CREATE TABLE customers (id INT, name TEXT); CREATE TABLE loans (id INT, customer_id INT, amount REAL, socially_responsible BOOLEAN);", + "sql": "SELECT COUNT(DISTINCT customers.id) FROM customers JOIN loans ON customers.id \u003d loans.customer_id WHERE loans.socially_responsible \u003d TRUE;", + "sql_explanation": "This SQL query performs an inner join on the \u0027customers\u0027 and \u0027loans\u0027 tables, filtering the \u0027loans\u0027 table to only show rows where \u0027socially_responsible\u0027 is true, and counting the number of unique \u0027customer_id\u0027 values in the \u0027customers\u0027 table." +}, { + "id": "2091", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 3 banks with the most Shariah-compliant loans?", + "sql_context": "CREATE TABLE bank (id INT, name VARCHAR(50)); INSERT INTO bank (id, name) VALUES (1, \u0027ABC Islamic Bank\u0027), (2, \u0027Islamic Finance Co.\u0027), (3, \u0027Shariah Compliant Ltd\u0027), (4, \u0027Standard Bank\u0027); CREATE TABLE loans (id INT, bank_id INT); INSERT INTO loans (id, bank_id) VALUES (1, 1), (2, 2), (3, 3), (4, 3), (5, 1), (6, 4);", + "sql": "SELECT b.name, COUNT(l.id) as loan_count FROM bank b JOIN loans l ON b.id \u003d l.bank_id GROUP BY b.name ORDER BY loan_count DESC LIMIT 3;", + "sql_explanation": "Join the \u0027bank\u0027 and \u0027loans\u0027 tables based on bank_id, then group the results by bank name and calculate the number of Shariah-compliant loans for each bank, returning the top 3 banks with the most loans." +}, { + "id": "2404", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of Shariah-compliant loans issued to clients in each country?", + "sql_context": "CREATE TABLE shariah_compliant_loans(id INT, client_id INT, amount INT);CREATE TABLE clients(id INT, name TEXT, country TEXT);", + "sql": "SELECT c.country, SUM(s.amount) FROM shariah_compliant_loans s INNER JOIN clients c ON s.client_id \u003d c.id GROUP BY c.country;", + "sql_explanation": "This SQL query calculates the total amount of Shariah-compliant loans issued to clients in each country by joining the shariah_compliant_loans table with the clients table based on the client_id column and then grouping the joined table based on the country column from the clients table, and then calculating the sum of the amount column from the shariah_compliant_loans table for each group." +}, { + "id": "1334", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many access to justice initiatives were implemented per country?", + "sql_context": "CREATE TABLE countries (country_id INT, country VARCHAR(50)); INSERT INTO countries (country_id, country) VALUES (1, \u0027USA\u0027), (2, \u0027Canada\u0027), (3, \u0027Mexico\u0027), (4, \u0027Brazil\u0027), (5, \u0027Argentina\u0027); CREATE TABLE initiatives (id INT, country_id INT, implemented BOOLEAN); INSERT INTO initiatives (id, country_id, implemented) VALUES (1, 1, TRUE), (2, 1, TRUE), (3, 2, FALSE), (4, 3, TRUE), (5, 4, FALSE), (6, 5, TRUE);", + "sql": "SELECT c.country, COUNT(i.id) AS total_initiatives FROM initiatives i JOIN countries c ON i.country_id \u003d c.country_id WHERE i.implemented \u003d TRUE GROUP BY c.country;", + "sql_explanation": "This SQL query calculates the total number of access to justice initiatives implemented per country by filtering the records where implemented is TRUE, joining the countries and initiatives tables based on the country_id column and grouping the records based on the country column." +}, { + "id": "2122", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who were the mediators involved in cases resolved through restorative justice in 2020?", + "sql_context": "CREATE TABLE mediators (id INT, name VARCHAR(255), cases_mediated INT, year INT); CREATE TABLE cases (id INT, case_name VARCHAR(255), resolution_type VARCHAR(255), mediator_id INT, year INT); INSERT INTO mediators (id, name, cases_mediated, year) VALUES (1, \u0027Alex\u0027, 22, 2020), (2, \u0027Taylor\u0027, 28, 2020); INSERT INTO cases (id, case_name, resolution_type, mediator_id, year) VALUES (1, \u0027Case A\u0027, \u0027Restorative Justice\u0027, 1, 2020), (2, \u0027Case B\u0027, \u0027Restorative Justice\u0027, 2, 2020);", + "sql": "SELECT m.name FROM mediators m JOIN cases c ON m.id \u003d c.mediator_id WHERE c.resolution_type \u003d \u0027Restorative Justice\u0027 AND c.year \u003d 2020;", + "sql_explanation": "This query retrieves the names of the mediators involved in cases resolved through restorative justice in the year 2020, by joining the mediators and cases tables on the mediator_id column and filtering the results based on the resolution_type and year columns." +}, { + "id": "2247", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average waiting time for legal aid in rural areas?", + "sql_context": "CREATE TABLE legal_aid_waiting_times (id INT, clinic_name VARCHAR(50), location VARCHAR(10), waiting_time INT); CREATE TABLE rural_areas (id INT, location VARCHAR(10));", + "sql": "SELECT AVG(waiting_time) FROM legal_aid_waiting_times lat INNER JOIN rural_areas ra ON lat.location \u003d ra.location WHERE ra.id \u003d 1;", + "sql_explanation": "The SQL query performs an inner join on the legal_aid_waiting_times and rural_areas tables based on the location. It then calculates the average waiting time for legal aid in the rural area specified in the rural_areas table." +}, { + "id": "2397", + "domain": "justice", + "domain_description": "Justice data on restorative justice, access to justice, criminal justice reform, and legal technology.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the restorative measures taken for incidents of assault?", + "sql_context": "CREATE TABLE incidents (id INT PRIMARY KEY, type VARCHAR(255), location VARCHAR(255)); INSERT INTO incidents (id, type, location) VALUES (1, \u0027Assault\u0027, \u0027Park\u0027);", + "sql": "SELECT rj.restorative_measures FROM restorative_justice rj JOIN incidents i ON rj.incident_id \u003d i.id WHERE i.type \u003d \u0027Assault\u0027;", + "sql_explanation": "Select the restorative measures from restorative_justice table, join incidents table on incident_id and where type is Assault." +}, { + "id": "9", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the exhibitions with the highest number of visitors from outside the EU.", + "sql_context": "CREATE TABLE Exhibition (id INT, name VARCHAR(100), Visitor_id INT); CREATE TABLE Visitor (id INT, name VARCHAR(100), country VARCHAR(50)); INSERT INTO Exhibition (id, name, Visitor_id) VALUES (1, \u0027Ancient Civilizations\u0027, 1), (2, \u0027Modern Art\u0027, 2); INSERT INTO Visitor (id, name, country) VALUES (1, \u0027James Bond\u0027, \u0027UK\u0027), (2, \u0027Maria Garcia\u0027, \u0027Mexico\u0027);", + "sql": "SELECT Exhibition.name FROM Exhibition JOIN Visitor ON Exhibition.Visitor_id \u003d Visitor.id WHERE Visitor.country NOT IN (\u0027Austria\u0027, \u0027Belgium\u0027, \u0027Bulgaria\u0027, \u0027Croatia\u0027, \u0027Cyprus\u0027, \u0027Czech Republic\u0027, \u0027Denmark\u0027, \u0027Estonia\u0027, \u0027Finland\u0027, \u0027France\u0027, \u0027Germany\u0027, \u0027Greece\u0027, \u0027Hungary\u0027, \u0027Ireland\u0027, \u0027Italy\u0027, \u0027Latvia\u0027, \u0027Lithuania\u0027, \u0027Luxembourg\u0027, \u0027Malta\u0027, \u0027Netherlands\u0027, \u0027Poland\u0027, \u0027Portugal\u0027, \u0027Romania\u0027, \u0027Slovakia\u0027, \u0027Slovenia\u0027, \u0027Spain\u0027, \u0027Sweden\u0027) GROUP BY Exhibition.name ORDER BY COUNT(DISTINCT Exhibition.Visitor_id) DESC LIMIT 1;", + "sql_explanation": "This query joins the \u0027Exhibition\u0027 and \u0027Visitor\u0027 tables and filters the countries outside the EU. It then calculates the count of unique visitors for each exhibition, groups them, orders in descending order and returns the exhibition name with the highest number of visitors." +}, { + "id": "99", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which exhibitions have had no visitors from underrepresented communities in the last year?", + "sql_context": "CREATE TABLE Exhibition (id INT, name VARCHAR(100), Visitor_id INT); CREATE TABLE Visitor (id INT, name VARCHAR(100), country VARCHAR(50), interaction_date DATE); INSERT INTO Exhibition (id, name, Visitor_id) VALUES (1, \u0027Ancient Civilizations\u0027, 1), (2, \u0027Modern Art\u0027, 2), (3, \u0027Nature Photography\u0027, 3); INSERT INTO Visitor (id, name, country, interaction_date) VALUES (1, \u0027James Bond\u0027, \u0027Singapore\u0027, \u00272022-03-01\u0027), (2, \u0027Maria Garcia\u0027, \u0027Australia\u0027, \u00272022-05-15\u0027), (3, \u0027Anna Kim\u0027, \u0027South Korea\u0027, \u00272022-04-01\u0027);", + "sql": "SELECT Exhibition.name FROM Exhibition LEFT JOIN Visitor ON Exhibition.Visitor_id \u003d Visitor.id WHERE Visitor.country NOT IN (\u0027Africa\u0027, \u0027Latin America\u0027, \u0027South East Asia\u0027) OR Visitor.interaction_date \u003c CURDATE() - INTERVAL 1 YEAR GROUP BY Exhibition.name HAVING COUNT(DISTINCT Exhibition.Visitor_id) \u003d 0;", + "sql_explanation": "This query joins the \u0027Exhibition\u0027 and \u0027Visitor\u0027 tables, filters visitors from underrepresented communities and calculates the count of unique visitors for each exhibition in the last year. It then groups them, filters the exhibitions with no visitors and returns the exhibition names." +}, { + "id": "121", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of visitors who identified as Indigenous that attended in-person exhibitions in Sydney, Australia in 2025 and their average rating?", + "sql_context": "CREATE TABLE Visitors (ID INT, Age INT, Gender VARCHAR(10), Rating INT, City VARCHAR(20), Country VARCHAR(20), Ethnicity VARCHAR(20)); INSERT INTO Visitors (ID, Age, Gender, Rating, City, Country, Ethnicity) VALUES (1, 35, \u0027Female\u0027, 8, \u0027Sydney\u0027, \u0027Australia\u0027, \u0027Indigenous\u0027); CREATE TABLE Exhibitions (ID INT, Title VARCHAR(50), City VARCHAR(20), Country VARCHAR(20), Date DATE, InPerson BOOLEAN); INSERT INTO Exhibitions (ID, Title, City, Country, Date, InPerson) VALUES (1, \u0027The Art of the Dreamtime\u0027, \u0027Sydney\u0027, \u0027Australia\u0027, \u00272025-03-01\u0027, TRUE);", + "sql": "SELECT AVG(Visitors.Rating), COUNT(Visitors.ID) FROM Visitors INNER JOIN Exhibitions ON Visitors.City \u003d Exhibitions.City AND Visitors.Country \u003d Exhibitions.Country WHERE Exhibitions.InPerson \u003d TRUE AND Visitors.Ethnicity \u003d \u0027Indigenous\u0027 AND Exhibitions.Date BETWEEN \u00272025-01-01\u0027 AND \u00272025-12-31\u0027;", + "sql_explanation": "This query calculates the number of visitors who identified as Indigenous and attended in-person exhibitions in Sydney, Australia in 2025 and their average rating. It first performs an inner join on the Visitors and Exhibitions tables based on the city and country. Then, it filters the records for in-person exhibitions that took place in Sydney, Australia in 2025, and visitors who identified as Indigenous, and finally calculates the average rating of those visitors and counts the number of those visitors." +}, { + "id": "339", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 3 most visited exhibitions by visitors from the Asia-Pacific region.", + "sql_context": "CREATE TABLE Exhibition (id INT, name VARCHAR(100), Visitor_id INT); CREATE TABLE Visitor (id INT, name VARCHAR(100), country VARCHAR(50)); INSERT INTO Exhibition (id, name, Visitor_id) VALUES (1, \u0027Ancient Civilizations\u0027, 1), (2, \u0027Modern Art\u0027, 2), (3, \u0027Nature Photography\u0027, 3); INSERT INTO Visitor (id, name, country) VALUES (1, \u0027James Bond\u0027, \u0027Singapore\u0027), (2, \u0027Maria Garcia\u0027, \u0027Australia\u0027), (3, \u0027Anna Kim\u0027, \u0027South Korea\u0027);", + "sql": "SELECT Exhibition.name FROM Exhibition JOIN Visitor ON Exhibition.Visitor_id \u003d Visitor.id WHERE Visitor.country IN (\u0027Singapore\u0027, \u0027Australia\u0027, \u0027South Korea\u0027) GROUP BY Exhibition.name ORDER BY COUNT(DISTINCT Exhibition.Visitor_id) DESC LIMIT 3;", + "sql_explanation": "This query joins the \u0027Exhibition\u0027 and \u0027Visitor\u0027 tables, filters visitors from the Asia-Pacific region and calculates the count of unique visitors for each exhibition. It then groups them, orders in descending order and returns the top 3 exhibitions." +}, { + "id": "454", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of visitors who have interacted with digital exhibits, broken down by age group?", + "sql_context": "CREATE TABLE Visitor_Demographics (Visitor_ID INT, Age INT, Gender VARCHAR(255)); INSERT INTO Visitor_Demographics (Visitor_ID, Age, Gender) VALUES (1001, 25, \u0027Female\u0027), (1002, 35, \u0027Male\u0027), (1003, 45, \u0027Female\u0027), (1004, 55, \u0027Male\u0027); CREATE TABLE Digital_Exhibit_Interactions (Visitor_ID INT, Interaction_Date DATE); INSERT INTO Digital_Exhibit_Interactions (Visitor_ID, Interaction_Date) VALUES (1001, \u00272022-01-01\u0027), (1002, \u00272022-01-02\u0027), (1003, \u00272022-01-03\u0027), (1004, \u00272022-01-04\u0027), (1001, \u00272022-01-05\u0027); CREATE VIEW Interacted_Digital_Exhibits AS SELECT Visitor_ID FROM Digital_Exhibit_Interactions GROUP BY Visitor_ID HAVING COUNT(DISTINCT Interaction_Date) \u003e 0;", + "sql": "SELECT FLOOR(Age/10)*10 AS Age_Group, COUNT(DISTINCT Visitor_ID) FROM Visitor_Demographics INNER JOIN Interacted_Digital_Exhibits ON Visitor_Demographics.Visitor_ID \u003d Interacted_Digital_Exhibits.Visitor_ID GROUP BY Age_Group;", + "sql_explanation": "This query joins the Visitor_Demographics and Interacted_Digital_Exhibits tables on the Visitor_ID column, groups by the Age column with the age group calculated as the floor value divided by 10 and multiplied by 10 (Age_Group), and then counts the number of unique visitors (COUNT(DISTINCT Visitor_ID)) for each age group." +}, { + "id": "1340", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total number of visitors that attended exhibitions in Berlin and have a membership.", + "sql_context": "CREATE TABLE Members (id INT, membership BOOLEAN, city VARCHAR(50)); INSERT INTO Members (id, membership, city) VALUES (1, TRUE, \u0027Berlin\u0027); CREATE TABLE Exhibitions (id INT, city VARCHAR(50), visitors INT); INSERT INTO Exhibitions (id, city, visitors) VALUES (1, \u0027Berlin\u0027, 3500);", + "sql": "SELECT SUM(Exhibitions.visitors) FROM Exhibitions INNER JOIN Members ON Exhibitions.city \u003d Members.city WHERE Members.city \u003d \u0027Berlin\u0027 AND Members.membership \u003d TRUE;", + "sql_explanation": "Summarize the total number of visitors that attended exhibitions in Berlin and have a membership." +}, { + "id": "1401", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the number of visitors and art-related exhibitions per country.", + "sql_context": "CREATE TABLE visitors_by_country (id INT, country VARCHAR(50), num_visitors INT); INSERT INTO visitors_by_country (id, country, num_visitors) VALUES (1, \u0027USA\u0027, 1000), (2, \u0027Mexico\u0027, 800); CREATE TABLE art_exhibitions_by_country (id INT, country VARCHAR(50), num_exhibitions INT); INSERT INTO art_exhibitions_by_country (id, country, num_exhibitions) VALUES (1, \u0027USA\u0027, 5), (2, \u0027Mexico\u0027, 3);", + "sql": "SELECT vbc.country, vbc.num_visitors, aebc.num_exhibitions FROM visitors_by_country vbc INNER JOIN art_exhibitions_by_country aebc ON vbc.country \u003d aebc.country;", + "sql_explanation": "The query uses an INNER JOIN to combine the visitors_by_country and art_exhibitions_by_country tables based on the country column. This provides the number of visitors and art-related exhibitions per country." +}, { + "id": "1481", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of days between visits for each visitor?", + "sql_context": "CREATE TABLE Visitors (VisitorID INT, FirstVisit DATE); CREATE TABLE Visits (VisitID INT, VisitorID INT, VisitDate DATE); INSERT INTO Visitors (VisitorID, FirstVisit) VALUES (1, \u00272022-01-01\u0027), (2, \u00272022-01-05\u0027), (3, \u00272022-01-10\u0027); INSERT INTO Visits (VisitID, VisitorID, VisitDate) VALUES (1, 1, \u00272022-01-02\u0027), (2, 1, \u00272022-01-05\u0027), (3, 2, \u00272022-01-06\u0027), (4, 2, \u00272022-01-08\u0027), (5, 3, \u00272022-01-15\u0027);", + "sql": "SELECT VisitorID, AVG(DATEDIFF(VisitDate, FirstVisit)) AS AvgDaysBetweenVisits FROM Visitors A JOIN Visits B ON A.VisitorID \u003d B.VisitorID GROUP BY VisitorID;", + "sql_explanation": "The SQL query performs an inner join between the Visitors and Visits tables based on the VisitorID. It then calculates the average number of days between visits for each visitor using the AVG() and DATEDIFF() functions." +}, { + "id": "1699", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of visitors for each exhibition type", + "sql_context": "CREATE TABLE Exhibitions (id INT, name VARCHAR(255), type VARCHAR(255)); CREATE TABLE Tickets (id INT, visitor_id INT, exhibition_id INT);", + "sql": "SELECT Exhibitions.type, COUNT(Tickets.visitor_id) FROM Exhibitions JOIN Tickets ON Exhibitions.id \u003d Tickets.exhibition_id GROUP BY Exhibitions.type;", + "sql_explanation": "This query joins the Exhibitions and Tickets tables, groups by exhibition type, and counts the number of visitors for each type." +}, { + "id": "27", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average travel advisory level for each country in Europe?", + "sql_context": "CREATE TABLE if not exists countries (id INT, name VARCHAR(20)); CREATE TABLE if not exists advisories (id INT, country_id INT, level INT);", + "sql": "SELECT c.name, AVG(a.level) FROM advisories a JOIN countries c ON a.country_id \u003d c.id WHERE c.name IN (\u0027Austria\u0027, \u0027Belgium\u0027, \u0027Czech Republic\u0027, \u0027Denmark\u0027, \u0027Estonia\u0027, \u0027Finland\u0027, \u0027France\u0027, \u0027Germany\u0027, \u0027Greece\u0027, \u0027Hungary\u0027, \u0027Ireland\u0027, \u0027Italy\u0027, \u0027Latvia\u0027, \u0027Lithuania\u0027, \u0027Luxembourg\u0027, \u0027Malta\u0027, \u0027Netherlands\u0027, \u0027Norway\u0027, \u0027Poland\u0027, \u0027Portugal\u0027, \u0027Slovakia\u0027, \u0027Slovenia\u0027, \u0027Spain\u0027, \u0027Sweden\u0027, \u0027Switzerland\u0027, \u0027United Kingdom\u0027) GROUP BY c.name;", + "sql_explanation": "This SQL query joins the advisories table and the countries table on the country_id column. It then groups the results by the name column in the countries table and calculates the average level for each country in Europe. This represents the average travel advisory level for each country in Europe." +}, { + "id": "218", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 5 destinations with the highest increase in eco-rating from 2021 to 2022.", + "sql_context": "CREATE TABLE destinations_2021_2022 (destination VARCHAR(50), avg_eco_rating FLOAT, year INT); INSERT INTO destinations_2021_2022 (destination, avg_eco_rating, year) VALUES (\u0027Bali\u0027, 8.5, 2021), (\u0027Bali\u0027, 8.7, 2022), (\u0027Maldives\u0027, 8.7, 2021), (\u0027Maldives\u0027, 8.9, 2022), (\u0027New Zealand\u0027, 9.2, 2021), (\u0027New Zealand\u0027, 9.4, 2022), (\u0027Costa Rica\u0027, 9.0, 2021), (\u0027Costa Rica\u0027, 9.2, 2022), (\u0027Nepal\u0027, 8.8, 2021), (\u0027Nepal\u0027, 9.0, 2022);", + "sql": "SELECT d.destination, (d.avg_eco_rating - d_prev.avg_eco_rating) AS eco_rating_change FROM destinations_2021_2022 d JOIN destinations_2021_2022 d_prev ON d.destination \u003d d_prev.destination AND d_prev.year \u003d (YEAR(d.year) - 1) ORDER BY eco_rating_change DESC LIMIT 5;", + "sql_explanation": "Join the destinations_2021_2022 table to itself on the destination column and the previous year, calculate the change in eco-rating, and limit the results to the top 5 destinations." +}, { + "id": "384", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the number of destinations without any sustainable tourism practices data.", + "sql_context": "CREATE TABLE SustainableTourism (id INT, destination VARCHAR(50), PRIMARY KEY(id)); INSERT INTO SustainableTourism (id, destination) VALUES (1, \u0027CountryA\u0027), (2, \u0027CountryB\u0027), (3, \u0027CountryC\u0027), (4, \u0027CountryD\u0027), (5, \u0027CountryE\u0027); CREATE TABLE Destinations (id INT, name VARCHAR(50), PRIMARY KEY(id)); INSERT INTO Destinations (id, name) VALUES (1, \u0027CountryA\u0027), (2, \u0027CountryB\u0027), (3, \u0027CountryC\u0027), (6, \u0027CountryF\u0027);", + "sql": "SELECT COUNT(DISTINCT Destinations.name) - COUNT(DISTINCT SustainableTourism.destination) FROM Destinations LEFT JOIN SustainableTourism ON Destinations.name \u003d SustainableTourism.destination WHERE SustainableTourism.destination IS NULL;", + "sql_explanation": "This query identifies the number of destinations without any sustainable tourism practices data by finding the difference between the number of distinct destinations and the number of distinct sustainable tourism practices, then filtering the result to only include the rows where the sustainable tourism data is null." +}, { + "id": "678", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the year-over-year change in international visitor arrivals for each country between 2018 and 2019.", + "sql_context": "CREATE TABLE visitor_arrivals (country VARCHAR(50), year INT, arrivals INT); INSERT INTO visitor_arrivals (country, year, arrivals) VALUES (\u0027Japan\u0027, 2018, 31_191_000), (\u0027Japan\u0027, 2019, 31_890_000), (\u0027France\u0027, 2018, 89_300_000), (\u0027France\u0027, 2019, 90_100_000);", + "sql": "SELECT a.country, ROUND(100.0 * (b.arrivals - a.arrivals) / a.arrivals, 2) as yoy_change FROM visitor_arrivals a JOIN visitor_arrivals b ON a.country \u003d b.country WHERE a.year \u003d 2018 AND b.year \u003d 2019;", + "sql_explanation": "The query uses a self-join to compare international visitor arrivals for each country between 2018 and 2019. The ROUND() function is used to round the year-over-year change to two decimal places." +}, { + "id": "1319", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total number of tourists visiting South American countries in 2017?", + "sql_context": "CREATE TABLE Tourist_Arrivals ( id INT, country_id INT, year INT, visitors INT, FOREIGN KEY (country_id) REFERENCES Countries(id) ); INSERT INTO Tourist_Arrivals (id, country_id, year, visitors) VALUES (1, 2, 2017, 7000000); INSERT INTO Tourist_Arrivals (id, country_id, year, visitors) VALUES (2, 9, 2017, 4000000);", + "sql": "SELECT SUM(t.visitors) as total_visitors FROM Tourist_Arrivals t INNER JOIN Countries c ON t.country_id \u003d c.id WHERE c.continent \u003d \u0027South America\u0027 AND t.year \u003d 2017;", + "sql_explanation": "This query calculates the total number of tourists visiting South American countries in 2017 by joining the Tourist_Arrivals table and the Countries table, filtering on continent \u003d \u0027South America\u0027 and year \u003d 2017, and summing the visitors." +}, { + "id": "1492", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total number of tourists visiting Asian countries in 2018?", + "sql_context": "CREATE TABLE Tourist_Arrivals ( id INT, country_id INT, year INT, visitors INT, FOREIGN KEY (country_id) REFERENCES Countries(id) ); INSERT INTO Tourist_Arrivals (id, country_id, year, visitors) VALUES (1, 3, 2018, 8000000); INSERT INTO Tourist_Arrivals (id, country_id, year, visitors) VALUES (2, 4, 2018, 5000000);", + "sql": "SELECT SUM(t.visitors) as total_visitors FROM Tourist_Arrivals t INNER JOIN Countries c ON t.country_id \u003d c.id WHERE c.continent \u003d \u0027Asia\u0027 AND t.year \u003d 2018;", + "sql_explanation": "This query calculates the total number of tourists visiting Asian countries in 2018 by joining the Tourist_Arrivals table and the Countries table, filtering on continent \u003d \u0027Asia\u0027 and year \u003d 2018, and summing the visitors." +}, { + "id": "1918", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of eco-certified accommodations in each country and the average eco-rating.", + "sql_context": "CREATE TABLE countries (id INT, country VARCHAR(50), num_eco_accommodations INT, total_eco_rating INT); INSERT INTO countries (id, country, num_eco_accommodations, total_eco_rating) VALUES (1, \u0027Costa Rica\u0027, 500, 4500), (2, \u0027New Zealand\u0027, 300, 2700), (3, \u0027Norway\u0027, 450, 3600); CREATE TABLE eco_accommodations (id INT, country VARCHAR(50), eco_rating INT); INSERT INTO eco_accommodations (id, country, eco_rating) VALUES (1, \u0027Costa Rica\u0027, 9), (2, \u0027Costa Rica\u0027, 8), (3, \u0027New Zealand\u0027, 8), (4, \u0027New Zealand\u0027, 9), (5, \u0027Norway\u0027, 7), (6, \u0027Norway\u0027, 8);", + "sql": "SELECT c.country, AVG(e.eco_rating) AS avg_eco_rating FROM countries c JOIN eco_accommodations e ON c.country \u003d e.country GROUP BY c.country;", + "sql_explanation": "Join the countries and eco_accommodations tables on the country column, calculate the average eco-rating, and group by country." +}, { + "id": "306", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total volume of timber sold by each salesperson, broken down by month?", + "sql_context": "CREATE TABLE salesperson (salesperson_id INT, name TEXT, region TEXT); INSERT INTO salesperson (salesperson_id, name, region) VALUES (1, \u0027John Doe\u0027, \u0027North\u0027), (2, \u0027Jane Smith\u0027, \u0027South\u0027); CREATE TABLE timber_sales (sales_id INT, salesperson_id INT, volume REAL, sale_date DATE); INSERT INTO timber_sales (sales_id, salesperson_id, volume, sale_date) VALUES (1, 1, 120, \u00272021-01-01\u0027), (2, 1, 150, \u00272021-02-01\u0027), (3, 2, 180, \u00272021-01-01\u0027);", + "sql": "SELECT salesperson_id, DATE_PART(\u0027month\u0027, sale_date) as month, SUM(volume) as total_volume FROM timber_sales JOIN salesperson ON timber_sales.salesperson_id \u003d salesperson.salesperson_id GROUP BY salesperson_id, month ORDER BY salesperson_id, month;", + "sql_explanation": "Joins timber_sales and salesperson tables, groups sales by salesperson and month, calculates total volume sold by each salesperson in each month, and orders the results by salesperson and month." +}, { + "id": "648", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which salesperson has sold the most timber in total?", + "sql_context": "CREATE TABLE salesperson (salesperson_id INT, name TEXT, region TEXT); INSERT INTO salesperson (salesperson_id, name, region) VALUES (1, \u0027John Doe\u0027, \u0027North\u0027), (2, \u0027Jane Smith\u0027, \u0027South\u0027); CREATE TABLE timber_sales (sales_id INT, salesperson_id INT, volume REAL, sale_date DATE); INSERT INTO timber_sales (sales_id, salesperson_id, volume, sale_date) VALUES (1, 1, 120, \u00272021-01-01\u0027), (2, 1, 150, \u00272021-02-01\u0027), (3, 2, 180, \u00272021-01-01\u0027);", + "sql": "SELECT salesperson_id, SUM(volume) as total_volume FROM timber_sales JOIN salesperson ON timber_sales.salesperson_id \u003d salesperson.salesperson_id GROUP BY salesperson_id ORDER BY total_volume DESC LIMIT 1;", + "sql_explanation": "Joins timber_sales and salesperson tables, groups sales by salesperson, calculates total volume sold by each salesperson, and returns the salesperson with the highest volume." +}, { + "id": "723", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which regions have the most wildlife habitats?", + "sql_context": "CREATE TABLE Regions (RegionID INT, RegionName TEXT); INSERT INTO Regions (RegionID, RegionName) VALUES (1, \u0027Northeast\u0027), (2, \u0027Southeast\u0027); CREATE TABLE Habitats (HabitatID INT, RegionID INT, Wildlife TEXT); INSERT INTO Habitats (HabitatID, RegionID, Wildlife) VALUES (1, 1, \u0027Deer\u0027), (2, 1, \u0027Bear\u0027), (3, 2, \u0027Raccoon\u0027);", + "sql": "SELECT Regions.RegionName, COUNT(Habitats.HabitatID) as TotalHabitats FROM Regions INNER JOIN Habitats ON Regions.RegionID \u003d Habitats.RegionID GROUP BY Regions.RegionName ORDER BY TotalHabitats DESC;", + "sql_explanation": "This SQL query finds the regions with the most wildlife habitats. It does this by joining the Regions table with the Habitats table on the RegionID column, and then grouping the results by the RegionName. For each unique RegionName, it calculates the count of the HabitatID column. It then orders the results in descending order based on the total number of habitats." +}, { + "id": "1356", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many wildlife species are present in each forest type?", + "sql_context": "CREATE TABLE ForestTypes (id INT, name VARCHAR(255)); INSERT INTO ForestTypes (id, name) VALUES (1, \u0027Coniferous\u0027), (2, \u0027Deciduous\u0027), (3, \u0027Mixed\u0027); CREATE TABLE Wildlife (id INT, forest_type_id INT, species VARCHAR(255)); INSERT INTO Wildlife (id, forest_type_id, species) VALUES (1, 1, \u0027Squirrel\u0027), (2, 1, \u0027Deer\u0027), (3, 2, \u0027Raccoon\u0027), (4, 2, \u0027Bear\u0027), (5, 3, \u0027Fox\u0027), (6, 3, \u0027Owl\u0027);", + "sql": "SELECT f.forest_type_id, f.name AS forest_type_name, COUNT(w.id) AS species_count FROM ForestTypes f LEFT JOIN Wildlife w ON f.id \u003d w.forest_type_id GROUP BY f.id;", + "sql_explanation": "This SQL query performs a left join between the ForestTypes and Wildlife tables on the forest_type_id column. Then, it groups the results by forest_type_id (or forest_type_name) and calculates the number of wildlife species present in each forest type." +}, { + "id": "1606", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average carbon sequestration rate for each tree species in the year 2005?", + "sql_context": "CREATE TABLE TreeSpecies (id INT, name VARCHAR(255)); INSERT INTO TreeSpecies (id, name) VALUES (1, \u0027Pine\u0027), (2, \u0027Oak\u0027), (3, \u0027Maple\u0027), (4, \u0027Birch\u0027); CREATE TABLE CarbonSeq (id INT, tree_species_id INT, year INT, rate FLOAT); INSERT INTO CarbonSeq (id, tree_species_id, year, rate) VALUES (1, 1, 2000, 2.5), (2, 1, 2005, 3.0), (3, 2, 2000, 4.0), (4, 2, 2005, 4.5), (5, 3, 2000, 3.5), (6, 3, 2005, 4.0), (7, 4, 2000, 4.5), (8, 4, 2005, 5.0);", + "sql": "SELECT ts.id, ts.name, AVG(cs.rate) AS avg_rate FROM TreeSpecies ts JOIN CarbonSeq cs ON ts.id \u003d cs.tree_species_id WHERE cs.year \u003d 2005 GROUP BY ts.id;", + "sql_explanation": "This SQL query joins the TreeSpecies and CarbonSeq tables on the tree_species_id column, then filters for the year 2005 and groups the results by tree species to calculate the average carbon sequestration rate for each." +}, { + "id": "1770", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total volume of trees in the boreal forest that belong to the Pinus family?", + "sql_context": "CREATE TABLE biomes (biome_id INT PRIMARY KEY, name VARCHAR(50), area_km2 FLOAT); INSERT INTO biomes (biome_id, name, area_km2) VALUES (1, \u0027Tropical Rainforest\u0027, 15000000.0), (2, \u0027Temperate Rainforest\u0027, 250000.0), (3, \u0027Boreal Forest\u0027, 12000000.0); CREATE TABLE trees (tree_id INT PRIMARY KEY, species VARCHAR(50), biome_id INT, family VARCHAR(50), volume FLOAT, FOREIGN KEY (biome_id) REFERENCES biomes(biome_id)); INSERT INTO trees (tree_id, species, biome_id, family, volume) VALUES (1, \u0027White Pine\u0027, 3, \u0027Pinus\u0027, 400.0), (2, \u0027Red Pine\u0027, 3, \u0027Pinus\u0027, 500.0), (3, \u0027Jack Pine\u0027, 3, \u0027Pinus\u0027, 300.0);", + "sql": "SELECT SUM(trees.volume) FROM trees JOIN biomes ON trees.biome_id \u003d biomes.biome_id WHERE trees.family \u003d \u0027Pinus\u0027 AND biomes.name \u003d \u0027Boreal Forest\u0027;", + "sql_explanation": "First, the query selects the total volume of rows from the trees table where the family column is \u0027Pinus\u0027. Then, it filters the rows based on the biome_id foreign key that corresponds to the \u0027Boreal Forest\u0027 biome." +}, { + "id": "1785", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total volume of timber produced by region in 2020?", + "sql_context": "CREATE TABLE region (id INT, name VARCHAR(255)); INSERT INTO region (id, name) VALUES (1, \u0027North\u0027), (2, \u0027South\u0027), (3, \u0027East\u0027), (4, \u0027West\u0027); CREATE TABLE timber_production (region_id INT, year INT, volume INT); INSERT INTO timber_production (region_id, year, volume) VALUES (1, 2020, 12000), (1, 2019, 11000), (2, 2020, 15000), (2, 2019, 14000), (3, 2020, 9000), (3, 2019, 8000), (4, 2020, 13000), (4, 2019, 12000);", + "sql": "SELECT r.name, SUM(tp.volume) as total_volume FROM timber_production tp JOIN region r ON tp.region_id \u003d r.id WHERE tp.year \u003d 2020 GROUP BY r.name;", + "sql_explanation": "This SQL query joins the timber_production table with the region table on the region_id column. It then filters the data to only include rows where the year column is equal to 2020. Next, it groups the data by the name column from the region table. Finally, it calculates the sum of the volume column for each group, giving the total volume of timber produced by region in 2020." +}, { + "id": "2001", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which species have a population smaller than 1000 in protected areas?", + "sql_context": "CREATE TABLE protected_areas (id INT, name VARCHAR(50)); INSERT INTO protected_areas (id, name) VALUES (1, \u0027Area 1\u0027), (2, \u0027Area 2\u0027); CREATE TABLE species (id INT, name VARCHAR(50), population DECIMAL(5,2), protected_area_id INT); INSERT INTO species (id, name, population, protected_area_id) VALUES (1, \u0027Species 1\u0027, 500.00, 1), (2, \u0027Species 2\u0027, 800.00, 1), (3, \u0027Species 3\u0027, 1200.00, 2), (4, \u0027Species 4\u0027, 700.00, 2);", + "sql": "SELECT s.name FROM species s INNER JOIN protected_areas pa ON s.protected_area_id \u003d pa.id GROUP BY s.name HAVING SUM(s.population) \u003c 1000;", + "sql_explanation": "This query retrieves the names of species with a population smaller than 1000 in protected areas by joining the species and protected_areas tables on the protected_area_id column. It then groups the results by species name and filters for a total population of less than 1000 using the HAVING clause." +}, { + "id": "169", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the total cargo weight transported by each vessel in the Mediterranean Sea in the last year, excluding liquefied gas carriers.", + "sql_context": "CREATE TABLE Vessels (id INT, name VARCHAR(50), type VARCHAR(50)); CREATE TABLE CargoTransports (id INT, vessel_id INT, weight INT, transport_time TIMESTAMP);", + "sql": "SELECT V.name, SUM(C.weight) as total_weight FROM Vessels V JOIN CargoTransports C ON V.id \u003d C.vessel_id WHERE V.type !\u003d \u0027Liquefied Gas Carrier\u0027 AND C.transport_time \u003e NOW() - INTERVAL \u00271 year\u0027 AND C.latitude BETWEEN 30 AND 45 AND C.longitude BETWEEN -20 AND 45 GROUP BY V.name;", + "sql_explanation": "The query joins the Vessels and CargoTransports tables, filters out liquefied gas carriers, and selects the total weight of cargo transports in the Mediterranean Sea in the last year for each vessel." +}, { + "id": "391", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the vessel with the longest continuous journey", + "sql_context": "CREATE TABLE VesselPositions (vessel_id INT, timestamp TIMESTAMP, latitude DECIMAL(9,6), longitude DECIMAL(9,6));", + "sql": "SELECT t1.vessel_id, MAX(t2.timestamp) - MIN(t1.timestamp) AS duration FROM VesselPositions t1 JOIN VesselPositions t2 ON t1.vessel_id \u003d t2.vessel_id AND t2.timestamp \u003e t1.timestamp GROUP BY t1.vessel_id ORDER BY duration DESC LIMIT 1;", + "sql_explanation": "This query finds the vessel with the longest continuous journey. It calculates the duration between each position update for each vessel, groups the data by vessel_id, and orders the results by duration in descending order, returning the top 1 row." +}, { + "id": "732", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total tonnage of non-hazardous cargo shipped by each vessel in the last year?", + "sql_context": "CREATE TABLE Vessels (VesselID int, Name varchar(50), Type varchar(50), AverageSpeed float); CREATE TABLE Cargo (CargoID int, VesselID int, MaterialType varchar(50), Tonnage int, TransportDate date); INSERT INTO Vessels VALUES (1, \u0027Vessel1\u0027, \u0027Transport\u0027, 15); INSERT INTO Cargo VALUES (1, 1, \u0027Non-Hazardous\u0027, 100, \u00272022-01-01\u0027);", + "sql": "SELECT V.Name, SUM(C.Tonnage) FROM Vessels V INNER JOIN Cargo C ON V.VesselID \u003d C.VesselID WHERE C.TransportDate \u003e\u003d DATEADD(year, -1, GETDATE()) AND C.MaterialType \u003d \u0027Non-Hazardous\u0027 GROUP BY V.Name;", + "sql_explanation": "The SQL query calculates the total tonnage of non-hazardous cargo shipped by each vessel in the last year. It first performs an inner join between the Vessels and Cargo tables based on the VesselID. It then filters the records where the TransportDate is within the last year and MaterialType is \u0027Non-Hazardous\u0027. Finally, it groups the records by the Name of the vessels and calculates the sum of the Tonnage for each group." +}, { + "id": "907", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify vessels that have not had any safety incidents in the last year", + "sql_context": "CREATE TABLE Vessels (VesselID INT, VesselName VARCHAR(50)); CREATE TABLE Incidents (IncidentID INT, IncidentType VARCHAR(50), VesselID INT, IncidentDate DATE); INSERT INTO Vessels (VesselID, VesselName) VALUES (1, \u0027SeaLion\u0027), (2, \u0027OceanRunner\u0027); INSERT INTO Incidents (IncidentID, IncidentType, VesselID, IncidentDate) VALUES (1, \u0027Collision\u0027, 1, \u00272021-06-05\u0027), (2, \u0027Grounding\u0027, 2, \u00272022-01-10\u0027);", + "sql": "SELECT VesselName FROM Vessels LEFT JOIN Incidents ON Vessels.VesselID \u003d Incidents.VesselID WHERE Incidents.IncidentDate IS NULL OR Incidents.IncidentDate \u003c DATEADD(year, -1, GETDATE());", + "sql_explanation": "This query identifies vessels that have not had any safety incidents in the last year. It does this by performing a left join between the Vessels and Incidents tables on the VesselID, then filtering for rows where the IncidentDate is either null (indicating no incidents) or is more than one year old." +}, { + "id": "994", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum and maximum distance from shore for vessels in the Pacific Ocean, grouped by vessel type?", + "sql_context": "CREATE TABLE vessels (id INT, name TEXT, type TEXT, gps_position TEXT); CREATE TABLE gps_positions (id INT, latitude FLOAT, longitude FLOAT, country TEXT, distance_from_shore FLOAT);", + "sql": "SELECT v.type, MIN(g.distance_from_shore), MAX(g.distance_from_shore) FROM vessels v JOIN gps_positions g ON v.gps_position \u003d g.id WHERE g.country \u003d \u0027Pacific Ocean\u0027 GROUP BY v.type;", + "sql_explanation": "This query calculates the minimum and maximum distance from shore for vessels in the Pacific Ocean, grouped by vessel type. It first joins the \u0027vessels\u0027 table and the \u0027gps_positions\u0027 table based on the \u0027gps_position\u0027 and \u0027id\u0027 fields respectively. It then filters the results to only include rows where the \u0027country\u0027 field in the \u0027gps_positions\u0027 table is \u0027Pacific Ocean\u0027. Finally, it groups the results by the \u0027type\u0027 field in the \u0027vessels\u0027 table and calculates the minimum and maximum \u0027distance_from_shore\u0027 for each group." +}, { + "id": "1036", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display vessels with a length greater than 250 meters that have not been inspected in 2023.", + "sql_context": "CREATE TABLE vessels (vessel_id INT, vessel_name VARCHAR(255), length INT, year_built INT); CREATE TABLE vessel_inspections (vessel_id INT, inspection_date DATE, inspection_type VARCHAR(255), inspection_results VARCHAR(255));", + "sql": "SELECT v.vessel_id, v.vessel_name, v.length FROM vessels v LEFT JOIN vessel_inspections vi ON v.vessel_id \u003d vi.vessel_id WHERE v.length \u003e 250 AND YEAR(vi.inspection_date) IS NULL;", + "sql_explanation": "The SQL query uses a LEFT JOIN to combine the \"vessels\" table with the \"vessel_inspections\" table on the \"vessel_id\" column. The WHERE clause filters out any vessels with a length less than or equal to 250 meters and those with an entry in the \"vessel_inspections\" table for 2023, leaving only vessels with a length greater than 250 meters that have not been inspected that year." +}, { + "id": "1150", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the vessel with the highest total cargo weight", + "sql_context": "CREATE TABLE Vessels (Id INT, Name VARCHAR(50)); CREATE TABLE Cargo (VesselId INT, Weight FLOAT); INSERT INTO Vessels (Id, Name) VALUES (1, \u0027Vessel1\u0027), (2, \u0027Vessel2\u0027), (3, \u0027Vessel3\u0027); INSERT INTO Cargo (VesselId, Weight) VALUES (1, 15000), (1, 12000), (2, 18000), (3, 20000), (3, 25000);", + "sql": "SELECT Vessels.Name, SUM(Cargo.Weight) AS TotalCargoWeight FROM Vessels JOIN Cargo ON Vessels.Id \u003d Cargo.VesselId GROUP BY Vessels.Id ORDER BY TotalCargoWeight DESC LIMIT 1;", + "sql_explanation": "Join vessels and cargo tables, group by vessel, calculate the total cargo weight for each vessel, and find the vessel with the highest total cargo weight." +}, { + "id": "1172", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total tonnage of cargo transported by vessels in the \u0027Fishing\u0027 category in the last month?", + "sql_context": "CREATE TABLE vessels (id INT, name TEXT, type TEXT);CREATE TABLE cargoes (id INT, vessel_id INT, tonnage INT, cargo_type TEXT); INSERT INTO vessels (id, name, type) VALUES (1, \u0027Fishing Vessel A\u0027, \u0027Fishing\u0027), (2, \u0027Fishing Vessel B\u0027, \u0027Fishing\u0027); INSERT INTO cargoes (id, vessel_id, tonnage, cargo_type) VALUES (1, 1, 1000, \u0027Fish\u0027), (2, 1, 2000, \u0027Crustaceans\u0027), (3, 2, 3000, \u0027Mollusks\u0027);", + "sql": "SELECT SUM(cargoes.tonnage) FROM cargoes JOIN vessels ON cargoes.vessel_id \u003d vessels.id WHERE vessels.type \u003d \u0027Fishing\u0027 AND cargoes.id \u003e\u003d DATEADD(\u0027month\u0027, -1, CURRENT_DATE);", + "sql_explanation": "Calculates the total tonnage of cargo transported by vessels in the \u0027Fishing\u0027 category in the last month." +}, { + "id": "2485", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List ports with safety incidents in 2020", + "sql_context": "CREATE TABLE ports (id VARCHAR(20), name VARCHAR(20)); CREATE TABLE safety_incidents (port_id VARCHAR(20), incident_type VARCHAR(50), incident_date DATE); INSERT INTO ports (id, name) VALUES (\u0027POR013\u0027, \u0027PORT013\u0027), (\u0027POR015\u0027, \u0027PORT015\u0027), (\u0027POR016\u0027, \u0027PORT016\u0027); INSERT INTO safety_incidents (port_id, incident_type, incident_date) VALUES (\u0027POR013\u0027, \u0027Collision\u0027, \u00272020-03-15\u0027), (\u0027POR015\u0027, \u0027Fire\u0027, \u00272019-12-10\u0027), (\u0027POR015\u0027, \u0027Grounding\u0027, \u00272020-06-22\u0027), (\u0027POR016\u0027, \u0027Collision\u0027, \u00272020-11-28\u0027);", + "sql": "SELECT ports.name FROM ports JOIN safety_incidents ON ports.id \u003d safety_incidents.port_id WHERE YEAR(incident_date) \u003d 2020;", + "sql_explanation": "This query lists ports with safety incidents in 2020 by joining the ports and safety_incidents tables based on their relationships and filtering for the incident date year." +}, { + "id": "2564", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total cargo weight transported by vessels with an average speed above 15?", + "sql_context": "CREATE TABLE vessels(id INT, name VARCHAR(50), country VARCHAR(50), average_speed DECIMAL(5,2)); CREATE TABLE cargo_data(id INT, vessel_name VARCHAR(50), destination VARCHAR(50), cargo_weight DECIMAL(5,2)); INSERT INTO vessels(id, name, country, average_speed) VALUES (1, \u0027Vessel A\u0027, \u0027Philippines\u0027, 14.5), (2, \u0027Vessel B\u0027, \u0027Philippines\u0027, 16.3); INSERT INTO cargo_data(id, vessel_name, destination, cargo_weight) VALUES (1, \u0027Vessel A\u0027, \u0027Port C\u0027, 200.0), (2, \u0027Vessel B\u0027, \u0027Port D\u0027, 250.0);", + "sql": "SELECT SUM(cargo_weight) FROM cargo_data JOIN vessels ON cargo_data.vessel_name \u003d vessels.name WHERE average_speed \u003e 15;", + "sql_explanation": "This query calculates the total cargo weight transported by vessels with an average speed above 15 by using a JOIN between the vessels and cargo_data tables, and filtering on the average_speed column. The total cargo weight is calculated using the SUM function on the cargo_weight column." +}, { + "id": "3423", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average weight of cargoes for each vessel type?", + "sql_context": "CREATE TABLE vessels (id VARCHAR(20), name VARCHAR(20), type VARCHAR(20)); CREATE TABLE cargoes (id INT, vessel_id VARCHAR(20), weight INT); INSERT INTO vessels (id, name, type) VALUES (\u00271\u0027, \u0027VesselA\u0027, \u0027Cargo\u0027), (\u00272\u0027, \u0027VesselB\u0027, \u0027Tanker\u0027), (\u00273\u0027, \u0027VesselC\u0027, \u0027Cargo\u0027), (\u00274\u0027, \u0027VesselD\u0027, \u0027Passenger\u0027); INSERT INTO cargoes (id, vessel_id, weight) VALUES (1, \u00271\u0027, 50000), (2, \u00271\u0027, 75000), (3, \u00272\u0027, 100000), (4, \u00273\u0027, 60000), (5, \u00273\u0027, 65000), (6, \u00274\u0027, 0);", + "sql": "SELECT v.type, AVG(c.weight) FROM vessels v JOIN cargoes c ON v.id \u003d c.vessel_id GROUP BY v.type;", + "sql_explanation": "Join vessels and cargoes tables on id and vessel_id columns. Group vessels table by type and calculate the average weight of cargoes for each type." +}, { + "id": "280", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the number of employees and total funding for each company in the e-commerce sector, sorted by funding amount in descending order.", + "sql_context": "CREATE TABLE companies (id INT, name TEXT, industry TEXT, employees INT); INSERT INTO companies (id, name, industry, employees) VALUES (1, \u0027ShopEase\u0027, \u0027E-commerce\u0027, 50); CREATE TABLE funding (company_id INT, amount INT); INSERT INTO funding (company_id, amount) VALUES (1, 750000);", + "sql": "SELECT companies.name, companies.employees, SUM(funding.amount) FROM companies INNER JOIN funding ON companies.id \u003d funding.company_id WHERE companies.industry \u003d \u0027E-commerce\u0027 GROUP BY companies.name, companies.employees ORDER BY SUM(funding.amount) DESC;", + "sql_explanation": "The SQL query performs an inner join between the companies and funding tables, filters for companies in the e-commerce industry, calculates the sum of the funding amounts for each company, groups the results by company name and number of employees, and sorts the results by funding amount in descending order." +}, { + "id": "368", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of startups founded by individuals from historically marginalized communities in the augmented reality industry that have received funding, but only for startups founded after 2015?", + "sql_context": "CREATE TABLE company (id INT, name TEXT, industry TEXT, founding_date DATE, founder_marginalized TEXT); INSERT INTO company (id, name, industry, founding_date, founder_marginalized) VALUES (1, \u0027AugmentedRealityCo\u0027, \u0027Augmented Reality\u0027, \u00272016-01-01\u0027, \u0027Yes\u0027);", + "sql": "SELECT COUNT(DISTINCT company.id) FROM company JOIN funding_records ON company.id \u003d funding_records.company_id WHERE company.industry \u003d \u0027Augmented Reality\u0027 AND company.founder_marginalized \u003d \u0027Yes\u0027 AND company.founding_date \u003e \u00272015-01-01\u0027;", + "sql_explanation": "The SQL query identifies the number of startups founded by individuals from historically marginalized communities in the augmented reality industry that have received funding, but only for startups founded after 2015. It does so by joining the company and funding_records tables on the company id and filtering for the augmented reality industry, founders from historically marginalized communities, and a founding date after 2015. It then counts the distinct number of company ids." +}, { + "id": "517", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total funding received by companies with female founders in the last 5 years", + "sql_context": "CREATE TABLE funds (company_id INT, funding_amount DECIMAL(10, 2), funding_date DATE); INSERT INTO funds VALUES (1, 500000, \u00272022-01-01\u0027); INSERT INTO funds VALUES (2, 300000, \u00272021-06-15\u0027);", + "sql": "SELECT SUM(funding_amount) FROM funds INNER JOIN companies ON funds.company_id \u003d companies.company_id WHERE companies.founder_gender \u003d \u0027Female\u0027 AND funds.funding_date BETWEEN DATEADD(year, -5, GETDATE()) AND GETDATE();", + "sql_explanation": "This SQL query joins the \u0027funds\u0027 and \u0027companies\u0027 tables on company_id. It filters for companies with female founders and funding dates within the last 5 years, then calculates the total funding amount." +}, { + "id": "795", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of employees for companies founded by BIPOC (Black, Indigenous, and People of Color) in the e-commerce industry?", + "sql_context": "CREATE TABLE companies (id INT, name TEXT, industry TEXT, founding_year INT, founder_identity TEXT); INSERT INTO companies (id, name, industry, founding_year, founder_identity) VALUES (1, \u0027EcomStore\u0027, \u0027E-commerce\u0027, 2018, \u0027BIPOC\u0027); INSERT INTO companies (id, name, industry, founding_year, founder_identity) VALUES (2, \u0027ShopNet\u0027, \u0027Retail\u0027, 2019, \u0027White\u0027);", + "sql": "SELECT MAX(num_employees) FROM company_details INNER JOIN companies ON company_details.company_id \u003d companies.id WHERE companies.founder_identity \u003d \u0027BIPOC\u0027 AND companies.industry \u003d \u0027E-commerce\u0027;", + "sql_explanation": "The SQL query calculates the maximum number of employees for companies founded by BIPOC in the e-commerce industry by joining the company_details and companies tables on the company_id column. It then filters for rows where the founder_identity is \u0027BIPOC\u0027 and the industry is \u0027E-commerce\u0027, and finally calculates the maximum number of employees using the MAX aggregation function." +}, { + "id": "829", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum amount of funding raised by companies founded by immigrants in the fintech industry?", + "sql_context": "CREATE TABLE companies (id INT, name TEXT, industry TEXT, founding_year INT, founder_immigrant TEXT); INSERT INTO companies (id, name, industry, founding_year, founder_immigrant) VALUES (1, \u0027FinTechInnovations\u0027, \u0027Fintech\u0027, 2015, \u0027Yes\u0027); INSERT INTO companies (id, name, industry, founding_year, founder_immigrant) VALUES (2, \u0027FinanceServices\u0027, \u0027Finance\u0027, 2017, \u0027No\u0027);", + "sql": "SELECT MIN(funding_amount) FROM funding_records INNER JOIN companies ON funding_records.company_id \u003d companies.id WHERE companies.founder_immigrant \u003d \u0027Yes\u0027 AND companies.industry \u003d \u0027Fintech\u0027;", + "sql_explanation": "The SQL query calculates the minimum amount of funding raised by companies founded by immigrants in the fintech industry by joining the funding_records and companies tables on the company_id column. It then filters for rows where the founder_immigrant is \u0027Yes\u0027 and the industry is \u0027Fintech\u0027, and finally calculates the minimum funding amount using the MIN aggregation function." +}, { + "id": "859", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of funding records for companies with male founders in the EdTech industry?", + "sql_context": "CREATE TABLE Companies (id INT, name VARCHAR(50), industry VARCHAR(50), country VARCHAR(50), founding_year INT, founder_gender VARCHAR(10)); CREATE TABLE Funding (id INT, company_name VARCHAR(50), funding_amount INT); INSERT INTO Companies (id, name, industry, country, founding_year, founder_gender) VALUES (1, \u0027EduMale\u0027, \u0027EdTech\u0027, \u0027USA\u0027, 2016, \u0027Male\u0027); INSERT INTO Funding (id, company_name, funding_amount) VALUES (1, \u0027EduMale\u0027, 3000000);", + "sql": "SELECT COUNT(*) as funding_records_count FROM Funding INNER JOIN Companies ON Funding.company_name \u003d Companies.name WHERE Companies.industry \u003d \u0027EdTech\u0027 AND Companies.founder_gender \u003d \u0027Male\u0027;", + "sql_explanation": "The SQL query first creates a table for funding information and inserts a record for an EdTech company with a male founder. Then, it calculates the total number of funding records for companies with male founders in the EdTech industry by joining the two tables and counting the number of records for the filtered records." +}, { + "id": "942", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the names of companies that received funding after being founded for more than 5 years.", + "sql_context": "CREATE TABLE companies (id INT, name TEXT, founding_date DATE, founder_gender TEXT); INSERT INTO companies (id, name, founding_date, founder_gender) VALUES (1, \u0027TechCo\u0027, \u00272012-01-01\u0027, \u0027Male\u0027); INSERT INTO companies (id, name, founding_date, founder_gender) VALUES (2, \u0027GreenEnergy\u0027, \u00272015-01-01\u0027, \u0027Female\u0027); CREATE TABLE funding_records (id INT, company_id INT, funding_amount INT, funding_date DATE); INSERT INTO funding_records (id, company_id, funding_amount, funding_date) VALUES (1, 1, 1000000, \u00272018-01-01\u0027);", + "sql": "SELECT companies.name FROM companies JOIN funding_records ON companies.id \u003d funding_records.company_id WHERE DATEDIFF(funding_records.funding_date, companies.founding_date) \u003e (5 * 365)", + "sql_explanation": "This query joins the companies table with the funding_records table, then filters the results to include only those companies where the difference between the funding_date and founding_date is more than 5 years." +}, { + "id": "1137", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the founders of the startups that have received funding of over 3000000 and are from underrepresented communities?", + "sql_context": "CREATE TABLE company (name VARCHAR(255), founder_underrepresented BOOLEAN, founder_name VARCHAR(100)); INSERT INTO company (name, founder_underrepresented, founder_name) VALUES (\u0027CompanyA\u0027, FALSE, \u0027John Smith\u0027), (\u0027CompanyB\u0027, TRUE, \u0027Jane Doe\u0027), (\u0027CompanyC\u0027, TRUE, \u0027Michael Brown\u0027), (\u0027CompanyD\u0027, FALSE, \u0027Sarah Johnson\u0027), (\u0027CompanyE\u0027, TRUE, \u0027David Kim\u0027), (\u0027CompanyF\u0027, FALSE, \u0027Emily Wong\u0027); CREATE TABLE funding (company_name VARCHAR(255), amount INT); INSERT INTO funding (company_name, amount) VALUES (\u0027CompanyA\u0027, 2000000), (\u0027CompanyB\u0027, 3500000), (\u0027CompanyC\u0027, 4000000), (\u0027CompanyD\u0027, 1000000), (\u0027CompanyE\u0027, 3000000), (\u0027CompanyF\u0027, 5000000);", + "sql": "SELECT company.founder_name FROM company INNER JOIN funding ON company.name \u003d funding.company_name WHERE funding.amount \u003e 3000000 AND company.founder_underrepresented \u003d TRUE;", + "sql_explanation": "The SQL query joins the company and funding tables on the name and company_name columns respectively. It then filters the records where the amount is greater than 3000000 and the founder_underrepresented is TRUE and returns the founder_name column for the filtered records." +}, { + "id": "1291", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average funding amount for companies in the healthcare industry?", + "sql_context": "CREATE TABLE funding (funding_id INT, company_id INT, amount DECIMAL(10,2)); CREATE TABLE companies (company_id INT, industry VARCHAR(255), founding_year INT); INSERT INTO funding (funding_id, company_id, amount) VALUES (1, 1, 150000.00), (2, 2, 200000.00), (3, 3, 250000.00); INSERT INTO companies (company_id, industry, founding_year) VALUES (1, \u0027Tech\u0027, 2018), (2, \u0027Healthcare\u0027, 2017), (3, \u0027Tech\u0027, 2019);", + "sql": "SELECT AVG(funding.amount) as avg_funding_healthcare FROM funding JOIN companies ON funding.company_id \u003d companies.company_id WHERE companies.industry \u003d \u0027Healthcare\u0027;", + "sql_explanation": "This query calculates the average funding amount for companies in the healthcare industry. It joins the funding and companies tables on the company_id column and filters the result to only include rows where the industry column is \u0027Healthcare\u0027. It then calculates the average of the amount column for the filtered result." +}, { + "id": "1339", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum amount of funding received by a company founded by a veteran in the defense sector?", + "sql_context": "CREATE TABLE company (id INT, name TEXT, industry TEXT, founder_veteran BOOLEAN, founding_date DATE);CREATE TABLE funding (id INT, company_id INT, amount INT); INSERT INTO company (id, name, industry, founder_veteran, founding_date) VALUES (1, \u0027SecureTech\u0027, \u0027Defense\u0027, true, \u00272018-02-22\u0027); INSERT INTO funding (id, company_id, amount) VALUES (1, 1, 1500000);", + "sql": "SELECT MAX(funding.amount) FROM funding INNER JOIN company ON funding.company_id \u003d company.id WHERE company.founder_veteran \u003d true AND company.industry \u003d \u0027Defense\u0027;", + "sql_explanation": "This query calculates the maximum amount of funding received by a company founded by a veteran in the defense sector. It first joins the funding and company tables on the company_id field. Then, it filters the results to only include records where the founder_veteran field is true and the industry is \u0027Defense\u0027. Finally, it calculates the maximum funding amount using the MAX function." +}, { + "id": "2216", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many startups were founded by women in the food sector?", + "sql_context": "CREATE TABLE companies (id INT, name TEXT, industry TEXT, founder_gender TEXT); INSERT INTO companies (id, name, industry, founder_gender) VALUES (1, \u0027LatAmFood\u0027, \u0027Food\u0027, \u0027Female\u0027); INSERT INTO companies (id, name, industry, founder_gender) VALUES (2, \u0027CleanEnergyMale\u0027, \u0027CleanEnergy\u0027, \u0027Male\u0027); CREATE TABLE founders (id INT, name TEXT, gender TEXT); INSERT INTO founders (id, name, gender) VALUES (1, \u0027Alex\u0027, \u0027Female\u0027); INSERT INTO founders (id, name, gender) VALUES (2, \u0027Jordan\u0027, \u0027Male\u0027);", + "sql": "SELECT COUNT(*) FROM companies INNER JOIN founders ON companies.founder_gender \u003d founders.gender WHERE companies.industry \u003d \u0027Food\u0027;", + "sql_explanation": "This SQL query counts the number of startups founded by women in the food sector. It first performs an inner join between the companies and founders tables, based on the founder_gender column. Then, it filters the results to only include rows where the industry is \u0027Food\u0027. Finally, it counts the number of rows in the resulting table." +}, { + "id": "2281", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the names of companies that have not received any funding", + "sql_context": "CREATE TABLE funds (company_id INT, funding_amount DECIMAL(10, 2), funding_date DATE); INSERT INTO funds VALUES (1, 500000, \u00272022-01-01\u0027); INSERT INTO funds VALUES (2, 300000, \u00272021-06-15\u0027);", + "sql": "SELECT company_name FROM companies LEFT JOIN funds ON companies.company_id \u003d funds.company_id WHERE funds.funding_amount IS NULL;", + "sql_explanation": "This SQL query performs a left join between the \u0027companies\u0027 and \u0027funds\u0027 tables on company_id. It then filters for companies with NULL funding_amount, indicating companies that have not received any funding." +}, { + "id": "2383", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum funding amount received by a startup in the biotech sector?", + "sql_context": "CREATE TABLE funding(startup_id INT, funding_amount DECIMAL(10, 2)); INSERT INTO funding(startup_id, funding_amount) VALUES (1, 1000000.00); CREATE TABLE startups(id INT, name TEXT, industry TEXT); INSERT INTO startups(id, name, industry) VALUES (1, \u0027BiotechMax\u0027, \u0027Biotech\u0027);", + "sql": "SELECT MAX(funding_amount) FROM funding JOIN startups ON startups.id \u003d funding.startup_id WHERE startups.industry \u003d \u0027Biotech\u0027;", + "sql_explanation": "The SQL query joins the funding and startups tables on the startup_id column, filters for rows where the industry is \u0027Biotech\u0027, and calculates the maximum value in the funding_amount column for these rows." +}, { + "id": "2473", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which company launched the most recent innovation?", + "sql_context": "CREATE TABLE Company (id INT, name VARCHAR(50)); INSERT INTO Company (id, name) VALUES (1, \u0027Acme Inc\u0027); INSERT INTO Company (id, name) VALUES (2, \u0027Beta Corp\u0027); INSERT INTO Company (id, name) VALUES (3, \u0027Gamma Startup\u0027); CREATE TABLE Innovation (company_id INT, innovation_type VARCHAR(50), launch_date DATE); INSERT INTO Innovation (company_id, innovation_type, launch_date) VALUES (1, \u0027AI\u0027, \u00272014-01-01\u0027); INSERT INTO Innovation (company_id, innovation_type, launch_date) VALUES (1, \u0027AR\u0027, \u00272017-01-01\u0027); INSERT INTO Innovation (company_id, innovation_type, launch_date) VALUES (2, \u0027Blockchain\u0027, \u00272016-05-10\u0027); INSERT INTO Innovation (company_id, innovation_type, launch_date) VALUES (3, \u0027Biotech\u0027, \u00272019-02-14\u0027);", + "sql": "SELECT c.name, MAX(launch_date) as max_launch_date FROM Company c JOIN Innovation i ON c.id \u003d i.company_id GROUP BY c.name;", + "sql_explanation": "This query joins the Company and Innovation tables, and calculates the most recent innovation launch date for each company." +}, { + "id": "741", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many satellites were launched by American companies in the 1990s?", + "sql_context": "CREATE TABLE Satellite (id INT, name VARCHAR(50), type VARCHAR(50), manufacturer VARCHAR(50), launch_date DATE); INSERT INTO Satellite (id, name, type, manufacturer, launch_date) VALUES (1, \u0027Landsat 1\u0027, \u0027Earth Observation\u0027, \u0027Boeing\u0027, \u00271972-07-23\u0027); INSERT INTO Satellite (id, name, type, manufacturer, launch_date) VALUES (2, \u0027Envisat\u0027, \u0027Earth Observation\u0027, \u0027Astrium\u0027, \u00272002-03-01\u0027); INSERT INTO Satellite (id, name, type, manufacturer, launch_date) VALUES (3, \u0027ResourceSat 1\u0027, \u0027Earth Observation\u0027, \u0027ISRO\u0027, \u00272003-10-17\u0027); INSERT INTO Satellite (id, name, type, manufacturer, launch_date) VALUES (4, \u0027Starlink 1\u0027, \u0027Communications\u0027, \u0027SpaceX\u0027, \u00271990-12-21\u0027);", + "sql": "SELECT COUNT(s.id) as satellite_count FROM Satellite s INNER JOIN Manufacturer m ON s.manufacturer \u003d m.name WHERE m.country \u003d \u0027United States\u0027 AND s.launch_date BETWEEN \u00271990-01-01\u0027 AND \u00271999-12-31\u0027;", + "sql_explanation": "This SQL query retrieves the number of satellites in the Satellite table that are manufactured by companies in the United States, as defined in the Manufacturer table, and were launched between the years 1990 and 1999. It uses an INNER JOIN to only include satellites with manufacturers in the Manufacturer table." +}, { + "id": "884", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names of the satellites deployed by Blue Origin and the countries from which they were deployed?", + "sql_context": "CREATE TABLE BlueOriginSatellites (SatelliteID INT, SatelliteName VARCHAR(50), Manufacturer VARCHAR(50));CREATE TABLE BlueOriginDeployment (ID INT, SatelliteID INT, Country VARCHAR(50));INSERT INTO BlueOriginSatellites (SatelliteID, SatelliteName, Manufacturer) VALUES (1, \u0027New Glenn 1\u0027, \u0027Blue Origin\u0027), (2, \u0027New Glenn 2\u0027, \u0027Blue Origin\u0027);INSERT INTO BlueOriginDeployment (ID, SatelliteID, Country) VALUES (1, 1, \u0027USA\u0027), (2, 2, \u0027Brazil\u0027);", + "sql": "SELECT SatelliteName, Country FROM BlueOriginSatellites JOIN BlueOriginDeployment ON BlueOriginSatellites.SatelliteID \u003d BlueOriginDeployment.SatelliteID WHERE Manufacturer \u003d \u0027Blue Origin\u0027;", + "sql_explanation": "This query lists all the names of the satellites deployed by Blue Origin and the countries from which they were deployed." +}, { + "id": "1236", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many spacecraft were launched by Chinese companies before 2000?", + "sql_context": "CREATE TABLE Spacecraft (id INT, name VARCHAR(50), manufacturer VARCHAR(50), launch_date DATE); INSERT INTO Spacecraft (id, name, manufacturer, launch_date) VALUES (1, \u0027Shenzhou 1\u0027, \u0027CNSA\u0027, \u00271999-11-20\u0027); INSERT INTO Spacecraft (id, name, manufacturer, launch_date) VALUES (2, \u0027Tanwen 1\u0027, \u0027CNSA\u0027, \u00271990-04-26\u0027);", + "sql": "SELECT COUNT(s.id) as spacecraft_count FROM Spacecraft s INNER JOIN Manufacturer m ON s.manufacturer \u003d m.name WHERE m.country \u003d \u0027China\u0027 AND s.launch_date \u003c \u00272000-01-01\u0027;", + "sql_explanation": "This SQL query retrieves the number of spacecraft in the Spacecraft table that are manufactured by companies in China, as defined in the Manufacturer table, and were launched before the year 2000. It uses an INNER JOIN to only include spacecraft with manufacturers in the Manufacturer table." +}, { + "id": "2931", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which aircraft have never been involved in an accident?", + "sql_context": "CREATE TABLE Aircraft (AircraftID INT, Name VARCHAR(50), Manufacturer VARCHAR(50)); CREATE TABLE Accidents (AccidentID INT, AircraftID INT, Date DATETIME, Description VARCHAR(50)); INSERT INTO Aircraft (AircraftID, Name, Manufacturer) VALUES (1, \u0027A320\u0027, \u0027Airbus\u0027), (2, \u0027737\u0027, \u0027Boeing\u0027); INSERT INTO Accidents (AccidentID, AircraftID, Date, Description) VALUES (1, 1, \u00272018-02-20\u0027, \u0027Landing gear failure\u0027);", + "sql": "SELECT A.Name FROM Aircraft A LEFT JOIN Accidents B ON A.AircraftID \u003d B.AircraftID WHERE B.AccidentID IS NULL;", + "sql_explanation": "This query performs a left join between the Aircraft and Accidents tables on the AircraftID column and filters the result set based on the AccidentID column, returning only those aircraft with no matching records in the Accidents table." +}, { + "id": "34", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average waste generation rate (kg/day) for the \u0027Nitrogen Fertilizer Production\u0027 process in the past quarter?", + "sql_context": "CREATE TABLE manufacturing_processes (process_id INT, name TEXT); CREATE TABLE waste_generation (process_id INT, waste_amount INT, generation_date DATE);", + "sql": "SELECT manufacturing_processes.name, AVG(waste_generation.waste_amount/1000.0/days_in_period) FROM manufacturing_processes INNER JOIN waste_generation ON manufacturing_processes.process_id \u003d waste_generation.process_id WHERE manufacturing_processes.name \u003d \u0027Nitrogen Fertilizer Production\u0027 AND waste_generation.generation_date \u003e DATEADD(quarter, -1, GETDATE()) GROUP BY manufacturing_processes.name;", + "sql_explanation": "The SQL query performs an inner join between the manufacturing_processes and waste_generation tables on the process_id column. It filters the results to only include rows where the name is \u0027Nitrogen Fertilizer Production\u0027 and the generation_date is within the past quarter. The query then calculates the average waste amount (converted to kilograms) for each group, dividing it by the number of days in the period. This returns the average waste generation rate (kg/day) for the \u0027Nitrogen Fertilizer Production\u0027 process in the past quarter." +}, { + "id": "309", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a view \u0027safety_protocols\u0027 that includes \u0027chemical_id\u0027, \u0027chemical_name\u0027, and \u0027safety_rating\u0027 columns from \u0027chemical_inventory\u0027 table and \u0027safety_ratings\u0027 table", + "sql_context": "CREATE TABLE safety_ratings (chemical_id INT, safety_rating INT);", + "sql": "CREATE VIEW safety_protocols AS SELECT chemical_inventory.chemical_id, chemical_inventory.chemical_name, safety_ratings.safety_rating FROM chemical_inventory INNER JOIN safety_ratings ON chemical_inventory.chemical_id \u003d safety_ratings.chemical_id;", + "sql_explanation": "Creating a view named \u0027safety_protocols\u0027 that includes \u0027chemical_id\u0027, \u0027chemical_name\u0027, and \u0027safety_rating\u0027 columns from \u0027chemical_inventory\u0027 and \u0027safety_ratings\u0027 tables by performing an INNER JOIN on the \u0027chemical_id\u0027 column" +}, { + "id": "746", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the total greenhouse gas emissions of chemical manufacturers in the US and Canada?", + "sql_context": "CREATE TABLE chemical_manufacturers (manufacturer_id INT, name VARCHAR(255), country VARCHAR(255)); INSERT INTO chemical_manufacturers (manufacturer_id, name, country) VALUES (1, \u0027ManufacturerA\u0027, \u0027USA\u0027), (2, \u0027ManufacturerB\u0027, \u0027Canada\u0027), (3, \u0027ManufacturerC\u0027, \u0027USA\u0027); CREATE TABLE emissions (emission_id INT, manufacturer_id INT, gas_type VARCHAR(255), amount INT); INSERT INTO emissions (emission_id, manufacturer_id, gas_type, amount) VALUES (1, 1, \u0027CO2\u0027, 1000), (2, 1, \u0027CH4\u0027, 200), (3, 2, \u0027CO2\u0027, 1500), (4, 3, \u0027CO2\u0027, 1200), (5, 3, \u0027CH4\u0027, 300)", + "sql": "SELECT cm.name, SUM(e.amount) FROM chemical_manufacturers cm JOIN emissions e ON cm.manufacturer_id \u003d e.manufacturer_id WHERE cm.country IN (\u0027USA\u0027, \u0027Canada\u0027) AND e.gas_type \u003d \u0027CO2\u0027 GROUP BY cm.name", + "sql_explanation": "This query first joins the chemical_manufacturers and emissions tables on the manufacturer_id column. It then filters the rows for manufacturers in the USA and Canada and for CO2 emissions. Finally, it calculates the total CO2 emissions for each manufacturer using the SUM function and groups the results by manufacturer name." +}, { + "id": "929", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum safe temperature for each chemical used in a specific product?", + "sql_context": "CREATE TABLE Chemicals (id INT, name VARCHAR(255), max_safe_temp FLOAT); CREATE TABLE Products (id INT, product_name VARCHAR(255), chemical_id INT);", + "sql": "SELECT Products.product_name, Chemicals.name, Chemicals.max_safe_temp FROM Products INNER JOIN Chemicals ON Products.chemical_id \u003d Chemicals.id WHERE Products.product_name \u003d \u0027Solvents\u0027;", + "sql_explanation": "This query returns the maximum safe temperature for each chemical used in a specific product. It does this by joining the Products and Chemicals tables on the chemical_id column and filtering the results to only include records where the product_name is \u0027Solvents\u0027. This returns the name and maximum safe temperature for each chemical used in the specified product." +}, { + "id": "1016", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of waste generated by each manufacturing plant in the month of January 2022?", + "sql_context": "CREATE TABLE manufacturing_plants (id INT, name VARCHAR(50));CREATE TABLE waste_generation (plant_id INT, date DATE, amount INT); INSERT INTO manufacturing_plants (id, name) VALUES (1, \u0027Plant A\u0027), (2, \u0027Plant B\u0027); INSERT INTO waste_generation (plant_id, date, amount) VALUES (1, \u00272022-01-01\u0027, 100), (1, \u00272022-01-15\u0027, 150), (2, \u00272022-01-05\u0027, 200), (2, \u00272022-01-20\u0027, 250);", + "sql": "SELECT m.name, SUM(w.amount) FROM manufacturing_plants m INNER JOIN waste_generation w ON m.id \u003d w.plant_id WHERE w.date \u003e\u003d \u00272022-01-01\u0027 AND w.date \u003c\u003d \u00272022-01-31\u0027 GROUP BY m.name;", + "sql_explanation": "This query performs an inner join between the manufacturing_plants and waste_generation tables, based on the plant_id foreign key in the waste_generation table. It then filters the results to only include records from January 2022, calculates the total waste amount for each plant, and groups the results by plant name." +}, { + "id": "1056", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many safety protocol violations have been recorded for each facility in 2022, and what is the total number of violations?", + "sql_context": "CREATE TABLE facility (id INT, name VARCHAR(255)); CREATE TABLE safety_record (id INT, facility_id INT, record_date DATE, violation_count INT); INSERT INTO facility (id, name) VALUES (1, \u0027Facility A\u0027), (2, \u0027Facility B\u0027); INSERT INTO safety_record (id, facility_id, record_date, violation_count) VALUES (1, 1, \u00272022-01-01\u0027, 3), (2, 1, \u00272022-02-01\u0027, 2), (3, 2, \u00272022-01-01\u0027, 1), (4, 2, \u00272022-02-01\u0027, 4);", + "sql": "SELECT f.name, SUM(sr.violation_count) as total_violations FROM facility f INNER JOIN safety_record sr ON f.id \u003d sr.facility_id WHERE YEAR(sr.record_date) \u003d 2022 GROUP BY f.name;", + "sql_explanation": "This SQL query calculates how many safety protocol violations have been recorded for each facility in 2022, and what is the total number of violations. It performs an inner join between the facility and safety_record tables on the facility_id column, filters records for 2022, and groups results by facility name to calculate the total number of violations." +}, { + "id": "1285", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which manufacturers in the Asian region have a safety score above 4.5 for their chemical products?", + "sql_context": "CREATE TABLE Manufacturers (ManufacturerID INT, ManufacturerName TEXT, Region TEXT); INSERT INTO Manufacturers (ManufacturerID, ManufacturerName, Region) VALUES (1, \u0027ABC Chemicals\u0027, \u0027Asia\u0027), (2, \u0027XYZ Chemicals\u0027, \u0027North America\u0027), (3, \u0027 DEF Chemicals\u0027, \u0027Asia\u0027); CREATE TABLE ChemicalProducts (ProductID INT, Chemical TEXT, ManufacturerID INT, SafetyScore DECIMAL(3,2)); INSERT INTO ChemicalProducts (ProductID, Chemical, ManufacturerID, SafetyScore) VALUES (1, \u0027Acetone\u0027, 1, 4.2), (2, \u0027Ethanol\u0027, 1, 4.8), (3, \u0027Methanol\u0027, 2, 5.0), (4, \u0027Propanol\u0027, 3, 4.7), (5, \u0027Butanol\u0027, 3, 4.9);", + "sql": "SELECT M.ManufacturerName FROM ChemicalProducts CP INNER JOIN Manufacturers M ON CP.ManufacturerID \u003d M.ManufacturerID WHERE M.Region \u003d \u0027Asia\u0027 AND CP.SafetyScore \u003e 4.5;", + "sql_explanation": "The SQL query performs an inner join between the ChemicalProducts and Manufacturers tables, based on the ManufacturerID, and filters for the Asian region and safety scores above 4.5. It then retrieves the names of manufacturers in the Asian region with a safety score above 4.5 for their chemical products." +}, { + "id": "1321", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which suppliers provide raw materials for the production of products \u0027A101\u0027 and \u0027C567\u0027?", + "sql_context": "CREATE TABLE Supplies (supplier_id INTEGER, raw_material_code TEXT); INSERT INTO Supplies (supplier_id, raw_material_code) VALUES (123, \u0027M123\u0027), (456, \u0027M234\u0027), (789, \u0027M345\u0027), (111, \u0027M456\u0027), (123, \u0027M567\u0027); CREATE TABLE Products (product_code TEXT, raw_material_code TEXT); INSERT INTO Products (product_code, raw_material_code) VALUES (\u0027A101\u0027, \u0027M123\u0027), (\u0027B203\u0027, \u0027M234\u0027), (\u0027C405\u0027, \u0027M345\u0027), (\u0027A101\u0027, \u0027M567\u0027), (\u0027C567\u0027, \u0027M456\u0027);", + "sql": "SELECT DISTINCT s.supplier_id FROM Supplies s JOIN Products p ON s.raw_material_code \u003d p.raw_material_code WHERE p.product_code \u003d \u0027A101\u0027 OR p.product_code \u003d \u0027C567\u0027;", + "sql_explanation": "This query first identifies the raw materials used in the production of products \u0027A101\u0027 and \u0027C567\u0027. It then finds the suppliers for these raw materials. The result set includes the supplier IDs for these suppliers." +}, { + "id": "1525", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum safety score for chemical products manufactured in the Asian region?", + "sql_context": "CREATE TABLE Manufacturers (ManufacturerID INT, ManufacturerName TEXT, Region TEXT); INSERT INTO Manufacturers (ManufacturerID, ManufacturerName, Region) VALUES (1, \u0027ABC Chemicals\u0027, \u0027Asia\u0027), (2, \u0027XYZ Chemicals\u0027, \u0027North America\u0027), (3, \u0027 DEF Chemicals\u0027, \u0027Asia\u0027); CREATE TABLE ChemicalProducts (ProductID INT, Chemical TEXT, ManufacturerID INT, SafetyScore DECIMAL(3,2)); INSERT INTO ChemicalProducts (ProductID, Chemical, ManufacturerID, SafetyScore) VALUES (1, \u0027Acetone\u0027, 1, 3.2), (2, \u0027Ethanol\u0027, 1, 4.5), (3, \u0027Methanol\u0027, 2, 5.0), (4, \u0027Propanol\u0027, 3, 4.8), (5, \u0027Butanol\u0027, 3, 5.0);", + "sql": "SELECT MAX(CP.SafetyScore) AS MaxScore FROM ChemicalProducts CP INNER JOIN Manufacturers M ON CP.ManufacturerID \u003d M.ManufacturerID WHERE M.Region \u003d \u0027Asia\u0027;", + "sql_explanation": "The SQL query performs an inner join between the ChemicalProducts and Manufacturers tables, based on the ManufacturerID, and filters for the Asian region. It then calculates the maximum safety score for chemical products manufactured in the Asian region." +}, { + "id": "1696", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum safe operating temperature for each chemical stored at a specific facility?", + "sql_context": "CREATE TABLE Chemicals (id INT, name VARCHAR(255), max_safe_temp FLOAT); CREATE TABLE Storage (id INT, chemical_id INT, facility_id INT, storage_date DATE);", + "sql": "SELECT Chemicals.name, Chemicals.max_safe_temp FROM Chemicals INNER JOIN Storage ON Chemicals.id \u003d Storage.chemical_id WHERE Storage.facility_id \u003d 1;", + "sql_explanation": "This query returns the maximum safe operating temperature for each chemical stored at a specific facility. It does this by joining the Chemicals and Storage tables on the chemical_id column and filtering the results to only include records where the facility_id is 1. This returns the name and maximum safe temperature for each chemical stored at the specified facility." +}, { + "id": "2240", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names and quantities of chemicals produced by factories located in California?", + "sql_context": "CREATE TABLE factories (id INT, name TEXT, location TEXT); INSERT INTO factories (id, name, location) VALUES (1, \u0027Factory A\u0027, \u0027California\u0027), (2, \u0027Factory B\u0027, \u0027Texas\u0027); CREATE TABLE chemical_produced (factory_id INT, chemical_name TEXT, quantity INT); INSERT INTO chemical_produced (factory_id, chemical_name, quantity) VALUES (1, \u0027Chemical X\u0027, 100), (1, \u0027Chemical Y\u0027, 200), (2, \u0027Chemical Z\u0027, 300);", + "sql": "SELECT chemical_name, quantity FROM chemical_produced CP JOIN factories F ON CP.factory_id \u003d F.id WHERE F.location \u003d \u0027California\u0027;", + "sql_explanation": "The SQL query first joins the chemical_produced table with the factories table on the factory_id column, then filters the results to only include rows where the location column of the factories table is \u0027California\u0027, and finally selects the chemical_name and quantity columns of the resulting dataset." +}, { + "id": "480", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many products have been recalled in each country for safety reasons, in the last year?", + "sql_context": "CREATE TABLE Product_Safety_Records (id INT, product_id INT, inspection_date DATE, result VARCHAR(255)); CREATE TABLE Recalls (id INT, product_id INT, recall_date DATE, reason VARCHAR(255), country VARCHAR(255));", + "sql": "SELECT r.country, COUNT(DISTINCT r.id) as number_of_recalls FROM Recalls r JOIN Product_Safety_Records psr ON r.product_id \u003d psr.product_id WHERE r.recall_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) GROUP BY r.country;", + "sql_explanation": "This query counts the number of product recalls in each country for safety reasons, in the last year. It does so by joining the Product_Safety_Records and Recalls tables together based on the product_id. Then, it filters the data by recall date and calculates the number of distinct recalls for each country, and groups the results by country." +}, { + "id": "767", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 2 cosmetic brands with the most product recalls in Canada?", + "sql_context": "CREATE TABLE ProductRecalls (BrandID INT, ProductID INT, RecallDate DATE); CREATE TABLE Brands (BrandID INT, BrandName VARCHAR(50)); INSERT INTO ProductRecalls (BrandID, ProductID, RecallDate) VALUES (2001, 200, \u00272022-01-01\u0027), (2002, 201, \u00272022-02-01\u0027), (2003, 202, \u00272022-03-01\u0027), (2001, 203, \u00272022-04-01\u0027), (2004, 204, \u00272022-05-01\u0027); INSERT INTO Brands (BrandID, BrandName) VALUES (2001, \u0027BrandA\u0027), (2002, \u0027BrandB\u0027), (2003, \u0027BrandC\u0027), (2004, \u0027BrandD\u0027);", + "sql": "SELECT B.BrandName, COUNT(*) AS RecallCount FROM ProductRecalls PR INNER JOIN Brands B ON PR.BrandID \u003d B.BrandID WHERE PR.Country \u003d \u0027Canada\u0027 GROUP BY B.BrandName ORDER BY RecallCount DESC LIMIT 2;", + "sql_explanation": "This query first creates and inserts data into the ProductRecalls and Brands tables. It then performs an inner join between the ProductRecalls and Brands tables, filtered by the country Canada, groups them by BrandName, and orders the result set in descending order by RecallCount. The query then limits the result set to the top 2 records." +}, { + "id": "768", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which ingredients are used in products that have had a safety recall in the last year?", + "sql_context": "CREATE TABLE ingredients (ingredient_id INT, product_id INT, ingredient_name TEXT); CREATE TABLE recalls (recall_id INT, product_id INT, recall_date DATE);", + "sql": "SELECT DISTINCT ingredients.ingredient_name FROM ingredients INNER JOIN recalls ON ingredients.product_id \u003d recalls.product_id WHERE recalls.recall_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR);", + "sql_explanation": "The SQL query joins the ingredients and recalls tables on the product_id column. It filters for rows where the recall_date is within the last year. Then it selects distinct ingredient_name values. The result is a list of ingredients used in products that have had a safety recall in the last year." +}, { + "id": "946", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total weight of ingredients sourced from India, for each product?", + "sql_context": "CREATE TABLE product_ingredients (ingredient_id INT, product_id INT, ingredient VARCHAR(255), source_country VARCHAR(255), weight DECIMAL(5,2)); CREATE TABLE products (product_id INT, product VARCHAR(255)); INSERT INTO product_ingredients (ingredient_id, product_id, ingredient, source_country, weight) VALUES (1, 1, \u0027Vitamin C\u0027, \u0027US\u0027, 2.00), (2, 1, \u0027Shea Butter\u0027, \u0027Ghana\u0027, 5.00), (3, 2, \u0027Jojoba Oil\u0027, \u0027India\u0027, 3.00), (4, 2, \u0027Aloe Vera\u0027, \u0027Mexico\u0027, 4.00), (5, 3, \u0027Coconut Oil\u0027, \u0027India\u0027, 6.00), (6, 3, \u0027Rose Water\u0027, \u0027India\u0027, 2.00); INSERT INTO products (product_id, product) VALUES (1, \u0027Face Cream\u0027), (2, \u0027Hair Serum\u0027), (3, \u0027Body Lotion\u0027);", + "sql": "SELECT p.product, SUM(pi.weight) as total_india_weight FROM product_ingredients pi JOIN products p ON pi.product_id \u003d p.product_id WHERE pi.source_country \u003d \u0027India\u0027 GROUP BY p.product;", + "sql_explanation": "We join the products and product_ingredients table on product_id and filter for ingredients sourced from India to calculate the total weight of ingredients sourced from India for each product." +}, { + "id": "978", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 5 most popular ingredients in cosmetics products?", + "sql_context": "CREATE TABLE ingredients (ingredient_id INT, ingredient_name VARCHAR(50), product_id INT); INSERT INTO ingredients (ingredient_id, ingredient_name, product_id) VALUES (1, \u0027Water\u0027, 1), (2, \u0027Glycerin\u0027, 1), (3, \u0027Shea Butter\u0027, 2), (4, \u0027Coconut Oil\u0027, 3), (5, \u0027Jojoba Oil\u0027, 3), (6, \u0027Aloe Vera\u0027, 4), (7, \u0027Beeswax\u0027, 4), (8, \u0027Rosehip Oil\u0027, 5); CREATE TABLE products (product_id INT, brand_id INT, product_name VARCHAR(50), certified_vegan BOOLEAN); INSERT INTO products (product_id, brand_id, product_name, certified_vegan) VALUES (1, 1, \u0027Soap\u0027, true), (2, 1, \u0027Lotion\u0027, false), (3, 2, \u0027Shower Gel\u0027, true), (4, 2, \u0027Body Butter\u0027, true), (5, 3, \u0027Foundation\u0027, false);", + "sql": "SELECT i.ingredient_name, COUNT(*) as product_count FROM ingredients i JOIN products p ON i.product_id \u003d p.product_id GROUP BY i.ingredient_name ORDER BY product_count DESC LIMIT 5;", + "sql_explanation": "This query joins the ingredients and products tables on the product_id column. It then groups the results by ingredient_name and uses the COUNT aggregate function to count the number of products that contain each ingredient. The results are then ordered in descending order by product count and limited to the top 5. This will return the top 5 most popular ingredients in cosmetics products." +}, { + "id": "1058", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which countries are the suppliers of ingredients for a specific product located in?", + "sql_context": "CREATE TABLE ingredients (ingredient_id INT, product_id INT, supplier_id INT, ingredient_name TEXT); CREATE TABLE suppliers (supplier_id INT, supplier_name TEXT, supplier_location TEXT); CREATE TABLE products (product_id INT, product_name TEXT);", + "sql": "SELECT DISTINCT suppliers.supplier_location FROM ingredients INNER JOIN suppliers ON ingredients.supplier_id \u003d suppliers.supplier_id WHERE ingredients.product_id \u003d [product_id];", + "sql_explanation": "The SQL query joins the ingredients and suppliers tables on the supplier_id column. It filters for rows where the product_id is a specific value. Then it selects distinct supplier_location values. The result is a list of countries where the suppliers of ingredients for a specific product are located." +}, { + "id": "1433", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the total sales of cruelty-free cosmetics in Canada and the USA?", + "sql_context": "CREATE TABLE products (product_id INT, product_name VARCHAR(100), sales INT, certification VARCHAR(20)); INSERT INTO products VALUES (1, \u0027Mascara\u0027, 5000, \u0027cruelty-free\u0027), (2, \u0027Lipstick\u0027, 7000, \u0027not_certified\u0027), (3, \u0027Foundation\u0027, 6000, \u0027cruelty-free\u0027); CREATE TABLE regions (region_id INT, region_name VARCHAR(50)); INSERT INTO regions VALUES (1, \u0027Canada\u0027), (2, \u0027USA\u0027);", + "sql": "SELECT region_name, SUM(sales) FROM products JOIN regions ON products.product_id \u003d regions.region_id WHERE certification \u003d \u0027cruelty-free\u0027 GROUP BY region_name;", + "sql_explanation": "The query joins the products and regions tables on the appropriate columns. It filters the results to only include rows with a certification of \u0027cruelty-free\u0027. The results are then grouped by the region_name and the total sales for each group are calculated." +}, { + "id": "1466", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue generated from products that contain SPF and were sold in Australia?", + "sql_context": "CREATE TABLE products (product_id INT, product_name VARCHAR(255), price DECIMAL(5,2), contains_spf BOOLEAN); CREATE TABLE sales (sale_id INT, product_id INT, sale_quantity INT, sale_price DECIMAL(5,2), country VARCHAR(255));", + "sql": "SELECT SUM(sale_price * sale_quantity) FROM sales JOIN products ON sales.product_id \u003d products.product_id WHERE contains_spf \u003d TRUE AND country \u003d \u0027Australia\u0027;", + "sql_explanation": "The SQL query joins the sales and products tables, filters for products that contain SPF and were sold in Australia, and then calculates the total revenue generated by summing the product of sale_price and sale_quantity." +}, { + "id": "1620", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which products contain both natural and organic ingredients?", + "sql_context": "CREATE TABLE ingredients (ingredient_id INT, ingredient_name TEXT, is_natural BOOLEAN, is_organic BOOLEAN, product_id INT); CREATE TABLE products (product_id INT, product_name TEXT); INSERT INTO ingredients VALUES (1, \u0027Water\u0027, true, false, 1), (2, \u0027Aloe Vera\u0027, true, true, 1), (3, \u0027Fragrance\u0027, false, false, 2), (4, \u0027Water\u0027, true, false, 3), (5, \u0027Glycerin\u0027, true, true, 3); INSERT INTO products VALUES (1, \u0027Shampoo\u0027), (2, \u0027Conditioner\u0027), (3, \u0027Lotion\u0027);", + "sql": "SELECT i.product_id, p.product_name FROM ingredients i JOIN products p ON i.product_id \u003d p.product_id WHERE i.is_natural \u003d true AND i.is_organic \u003d true;", + "sql_explanation": "The query joins the ingredients and products tables on the product_id column. It filters for rows where the is_natural and is_organic columns are both true. It then selects the product_id and product_name columns to get the names of the products that contain both natural and organic ingredients." +}, { + "id": "2330", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the ingredient sources for all vegan cosmetic products.", + "sql_context": "CREATE TABLE ingredients (product_id INT, ingredient VARCHAR(50), source_country VARCHAR(50)); INSERT INTO ingredients (product_id, ingredient, source_country) VALUES (1, \u0027Vitamin E\u0027, \u0027Brazil\u0027), (2, \u0027Beeswax\u0027, \u0027France\u0027), (3, \u0027Mica\u0027, \u0027India\u0027); CREATE TABLE products (product_id INT, is_vegan BOOLEAN); INSERT INTO products (product_id, is_vegan) VALUES (1, true), (2, false), (3, true);", + "sql": "SELECT i.ingredient, i.source_country FROM ingredients i JOIN products p ON i.product_id \u003d p.product_id WHERE p.is_vegan \u003d true;", + "sql_explanation": "The query performs an inner join between the ingredients and products tables, filters for vegan products, and selects the ingredient and source_country columns." +}, { + "id": "264", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which students have the highest attendance in open pedagogy courses?", + "sql_context": "CREATE TABLE attendance (student_id INT, course_id INT, attendance INT); INSERT INTO attendance (student_id, course_id, attendance) VALUES (1, 101, 90), (1, 102, 95), (2, 101, 85), (2, 102, 80), (3, 101, 92), (3, 102, 98); CREATE TABLE open_pedagogy_courses (course_id INT, course_name VARCHAR(20)); INSERT INTO open_pedagogy_courses (course_id, course_name) VALUES (101, \u0027Open Science\u0027), (102, \u0027Open Art\u0027);", + "sql": "SELECT student_id, SUM(attendance) as total_attendance FROM attendance JOIN open_pedagogy_courses ON attendance.course_id \u003d open_pedagogy_courses.course_id WHERE course_name IN (\u0027Open Science\u0027, \u0027Open Art\u0027) GROUP BY student_id ORDER BY total_attendance DESC;", + "sql_explanation": "Calculate the total attendance for each student in open pedagogy courses by joining the attendance and open_pedagogy_courses tables, filtering for rows with course names \u0027Open Science\u0027 and \u0027Open Art\u0027, grouping by the student_id column, and ordering in descending order to get the students with the highest attendance." +}, { + "id": "586", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average mental health score of students in each school district, grouped by district and displayed in alphabetical order?", + "sql_context": "CREATE TABLE school_districts (district_id INT, district_name VARCHAR(255)); CREATE TABLE student_mental_health (student_id INT, district_id INT, mental_health_score INT); INSERT INTO school_districts (district_id, district_name) VALUES (1, \u0027Downtown\u0027), (2, \u0027Uptown\u0027); INSERT INTO student_mental_health (student_id, district_id, mental_health_score) VALUES (1, 1, 80), (2, 1, 70), (3, 2, 90), (4, 2, 60);", + "sql": "SELECT sd.district_name, AVG(smh.mental_health_score) as avg_score FROM school_districts sd JOIN student_mental_health smh ON sd.district_id \u003d smh.district_id GROUP BY sd.district_name ORDER BY sd.district_name;", + "sql_explanation": "Join the school_districts table and student_mental_health table on the district_id column. Group the results by district_name and calculate the average mental health score. Order the results alphabetically by district name." +}, { + "id": "691", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of students and teachers in each school?", + "sql_context": "CREATE TABLE students (student_id INT, student_name VARCHAR(50), school_id INT); INSERT INTO students (student_id, student_name, school_id) VALUES (1, \u0027John Doe\u0027, 1001), (2, \u0027Jane Smith\u0027, 1001), (3, \u0027Mike Johnson\u0027, 1002); CREATE TABLE teachers (teacher_id INT, teacher_name VARCHAR(50), school_id INT); INSERT INTO teachers (teacher_id, teacher_name, school_id) VALUES (1, \u0027Alice Brown\u0027, 1001), (2, \u0027David Lee\u0027, 1001), (3, \u0027Emily White\u0027, 1002);", + "sql": "SELECT school_id, COUNT(DISTINCT s.student_id) as student_count, COUNT(DISTINCT t.teacher_id) as teacher_count FROM students s FULL OUTER JOIN teachers t ON s.school_id \u003d t.school_id GROUP BY school_id;", + "sql_explanation": "Count the number of unique students and teachers in each school using a FULL OUTER JOIN." +}, { + "id": "733", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average mental health support session duration for students of different age groups?", + "sql_context": "CREATE TABLE students (student_id INT, student_name TEXT, student_age INT); CREATE TABLE sessions (session_id INT, student_id INT, session_date DATE, support_type TEXT, hours_spent INT); INSERT INTO students VALUES (1, \u0027Alex\u0027, 15), (2, \u0027Bella\u0027, 17), (3, \u0027Charlie\u0027, 20), (4, \u0027Daniel\u0027, 22); INSERT INTO sessions VALUES (1, 1, \u00272022-01-01\u0027, \u0027mental health\u0027, 2), (2, 2, \u00272022-01-02\u0027, \u0027mental health\u0027, 3), (3, 3, \u00272022-01-03\u0027, \u0027mental health\u0027, 4), (4, 4, \u00272022-01-04\u0027, \u0027mental health\u0027, 5), (5, 1, \u00272022-02-01\u0027, \u0027mental health\u0027, 3), (6, 2, \u00272022-02-02\u0027, \u0027mental health\u0027, 4), (7, 3, \u00272022-02-03\u0027, \u0027mental health\u0027, 5), (8, 4, \u00272022-02-04\u0027, \u0027mental health\u0027, 6);", + "sql": "SELECT FLOOR(s.student_age / 5) * 5 AS age_group, AVG(s.hours_spent) FROM students st INNER JOIN sessions s ON st.student_id \u003d s.student_id WHERE s.support_type \u003d \u0027mental health\u0027 GROUP BY age_group;", + "sql_explanation": "The SQL query groups students into age groups (15-19, 20-24, etc.) by dividing the \u0027student_age\u0027 column by 5 and rounding down to the nearest integer. It then calculates the average mental health support session duration for students in each age group using the AVG function, grouped by the \u0027age_group\u0027 column." +}, { + "id": "901", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many students have improved their mental health score by more than 10 points since enrolling?", + "sql_context": "CREATE TABLE student_mental_health (student_id INT, mental_health_score INT, date DATE); CREATE TABLE enrollments (student_id INT, enrollment_date DATE);", + "sql": "SELECT COUNT(smh.student_id) as num_improved FROM student_mental_health smh JOIN enrollments e ON smh.student_id \u003d e.student_id WHERE smh.mental_health_score \u003e e.mental_health_score + 10;", + "sql_explanation": "This query identifies the number of students who have improved their mental health score by more than 10 points since enrolling. It does so by joining the student_mental_health table and the enrollments table on student_id, and filtering for students where the mental health score is more than 10 points higher than the enrollment mental health score." +}, { + "id": "1017", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many professional development workshops were offered in each region, and what were their respective topics?", + "sql_context": "CREATE TABLE regions (region_id INT, region_name VARCHAR(255)); CREATE TABLE workshops (workshop_id INT, region_id INT, workshop_topic VARCHAR(255)); INSERT INTO regions (region_id, region_name) VALUES (1, \u0027Northeast\u0027), (2, \u0027Southeast\u0027); INSERT INTO workshops (workshop_id, region_id, workshop_topic) VALUES (1, 1, \u0027Python Programming\u0027), (2, 1, \u0027Data Science\u0027), (3, 2, \u0027Open Pedagogy\u0027), (4, 2, \u0027Lifelong Learning\u0027);", + "sql": "SELECT r.region_name, COUNT(w.workshop_id) as num_workshops, w.workshop_topic FROM regions r JOIN workshops w ON r.region_id \u003d w.region_id GROUP BY r.region_name, w.workshop_topic;", + "sql_explanation": "Join the regions table and workshops table on the region_id column. Group the results by region_name and workshop_topic, and calculate the number of workshops offered in each region and their respective topics." +}, { + "id": "1409", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average lifelong learning progress for students in each school?", + "sql_context": "CREATE TABLE lifelong_learning (student_id INT, school_id INT, progress INT); INSERT INTO lifelong_learning (student_id, school_id, progress) VALUES (1, 100, 25), (2, 100, 50), (3, 101, 75), (4, 101, 100); CREATE TABLE schools (school_id INT, name VARCHAR(20)); INSERT INTO schools (school_id, name) VALUES (100, \u0027Maple\u0027), (101, \u0027Oak\u0027);", + "sql": "SELECT s.school_id, s.name, AVG(ll.progress) as avg_progress FROM lifelong_learning ll JOIN schools s ON ll.school_id \u003d s.school_id GROUP BY s.school_id, s.name;", + "sql_explanation": "Calculate the average lifelong learning progress for students in each school. The JOIN operation combines the lifelong_learning and schools tables, allowing the AVG function to compute the average progress for each school." +}, { + "id": "1758", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many professional development courses are offered in each subject area?", + "sql_context": "CREATE TABLE subject_areas (id INT, name VARCHAR(255)); CREATE TABLE courses (id INT, subject_area_id INT, name VARCHAR(255), provider_id INT); INSERT INTO subject_areas (id, name) VALUES (1, \u0027Mathematics\u0027), (2, \u0027Science\u0027), (3, \u0027Humanities\u0027); INSERT INTO courses (id, subject_area_id, name, provider_id) VALUES (1, 1, \u0027Algebra\u0027, 1), (2, 1, \u0027Geometry\u0027, 1), (3, 2, \u0027Biology\u0027, 2), (4, 2, \u0027Chemistry\u0027, 2), (5, 3, \u0027History\u0027, 3), (6, 3, \u0027Literature\u0027, 3);", + "sql": "SELECT sa.name AS subject_area_name, COUNT(c.id) AS num_courses FROM subject_areas sa JOIN courses c ON sa.id \u003d c.subject_area_id GROUP BY sa.name;", + "sql_explanation": "Join the subject_areas and courses tables based on the subject_area_id. Then, count the number of courses per subject area using the COUNT() function and group by the name of the subject area." +}, { + "id": "1868", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average mental health score for male teachers?", + "sql_context": "CREATE TABLE teachers (id INT, name VARCHAR(50), gender VARCHAR(10), years_experience INT); INSERT INTO teachers (id, name, gender, years_experience) VALUES (1, \u0027John Doe\u0027, \u0027Male\u0027, 5);", + "sql": "SELECT AVG(m.mental_health_score) as average_score FROM teachers t JOIN teacher_mental_health m ON t.id \u003d m.teacher_id WHERE t.gender \u003d \u0027Male\u0027;", + "sql_explanation": "Calculate the average mental health score for male teachers by joining the teachers and teacher_mental_health tables on teacher_id and filtering by gender." +}, { + "id": "2055", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the teachers that have not attended any professional development programs?", + "sql_context": "CREATE TABLE teachers (teacher_id INT, teacher_name TEXT); INSERT INTO teachers (teacher_id, teacher_name) VALUES (1, \u0027Mrs. Doe\u0027), (2, \u0027Mr. Smith\u0027), (3, \u0027Ms. Johnson\u0027); CREATE TABLE professional_development (program_id INT, program_name TEXT, teacher_id INT); INSERT INTO professional_development (program_id, program_name, teacher_id) VALUES (1, \u0027Python for Educators\u0027, 1), (2, \u0027Data Science for Teachers\u0027, 2), (3, \u0027Inclusive Teaching\u0027, 4), (4, \u0027Open Pedagogy\u0027, 5);", + "sql": "SELECT t.teacher_name FROM teachers t LEFT JOIN professional_development pd ON t.teacher_id \u003d pd.teacher_id WHERE pd.teacher_id IS NULL;", + "sql_explanation": "We perform a left join between the teachers and professional_development table on the teacher_id field, then filter for NULL values in the teacher_id field, which indicates that the teacher has not attended any professional development programs." +}, { + "id": "2158", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average mental health score of students in each grade level?", + "sql_context": "CREATE TABLE students (id INT, name VARCHAR(255), grade INT); CREATE TABLE student_mental_health (id INT, student_id INT, score INT);", + "sql": "SELECT s.grade, AVG(smh.score) as avg_score FROM student_mental_health smh JOIN students s ON smh.student_id \u003d s.id GROUP BY s.grade;", + "sql_explanation": "This SQL query joins the student_mental_health table with the students table on the student_id column. It then groups the results by the grade level and calculates the average score for each grade level." +}, { + "id": "2539", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average mental health score of students in \u0027Middle School\u0027 schools?", + "sql_context": "CREATE TABLE Schools (id INT, name VARCHAR(20)); INSERT INTO Schools (id, name) VALUES (1, \u0027Elementary\u0027), (2, \u0027High School\u0027), (3, \u0027Middle School\u0027); CREATE TABLE StudentMentalHealth (student_id INT, school_id INT, score INT); INSERT INTO StudentMentalHealth (student_id, school_id, score) VALUES (1, 1, 80), (2, 1, 90), (3, 2, 70), (4, 3, 85), (5, 3, 75);", + "sql": "SELECT AVG(smh.score) FROM StudentMentalHealth smh JOIN Schools s ON smh.school_id \u003d s.id WHERE s.name \u003d \u0027Middle School\u0027;", + "sql_explanation": "This query calculates the average mental health score of students in \u0027Middle School\u0027 schools by joining StudentMentalHealth and Schools tables, filtering on school name \u0027Middle School\u0027, and then calculating the average score." +}, { + "id": "245", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 10 artists with the highest total revenue in the last 3 years?", + "sql_context": "CREATE TABLE ArtWorkSales (artworkID INT, artistID INT, saleDate DATE, revenue DECIMAL(10,2)); CREATE TABLE Artists (artistID INT, artistName VARCHAR(50));", + "sql": "SELECT a.artistName, SUM(aws.revenue) as total_revenue FROM ArtWorkSales aws JOIN Artists a ON aws.artistID \u003d a.artistID WHERE saleDate BETWEEN DATE_SUB(CURRENT_DATE, INTERVAL 3 YEAR) AND CURRENT_DATE GROUP BY a.artistName ORDER BY total_revenue DESC LIMIT 10;", + "sql_explanation": "This query calculates the total revenue for each artist in the last 3 years and returns the top 10 artists with the highest total revenue. It does this by joining the ArtWorkSales and Artists tables on the artistID column and grouping the results by artistName. The SUM() function is used to calculate the total revenue for each artist and the results are then ordered by total_revenue in descending order and limited to the top 10. The WHERE clause is used to filter the results to only include sales that occurred in the last 3 years." +}, { + "id": "440", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the number of artworks in the \u0027Cubism\u0027 genre, grouped by the artist\u0027s country of origin and the artwork\u0027s medium.", + "sql_context": "CREATE TABLE Artwork (artwork_id INT, artwork_name VARCHAR(30), genre VARCHAR(20), artist_id INT, medium VARCHAR(20)); CREATE TABLE Artist (artist_id INT, artist_name VARCHAR(30), country_of_origin VARCHAR(30));", + "sql": "SELECT Artist.country_of_origin, Artwork.medium, COUNT(Artwork.artwork_id) FROM Artist INNER JOIN Artwork ON Artist.artist_id \u003d Artwork.artist_id WHERE Artwork.genre \u003d \u0027Cubism\u0027 GROUP BY Artist.country_of_origin, Artwork.medium;", + "sql_explanation": "This query uses an INNER JOIN to combine the \u0027Artwork\u0027 and \u0027Artist\u0027 tables based on the \u0027artist_id\u0027 field. It then filters the results to only include records where the genre is \u0027Cubism\u0027. Finally, it groups the results by the artist\u0027s country of origin and the artwork\u0027s medium and calculates the count of artworks for each group." +}, { + "id": "955", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total price of artworks created by Indigenous Australian artists?", + "sql_context": "CREATE TABLE Artists (artist_id INT, artist_name VARCHAR(50), birth_date DATE, country VARCHAR(50)); INSERT INTO Artists (artist_id, artist_name, birth_date, country) VALUES (1, \u0027Albert Namatjira\u0027, \u00271902-07-28\u0027, \u0027Australia\u0027); CREATE TABLE Artworks (artwork_id INT, title VARCHAR(50), year_made INT, artist_id INT, price FLOAT); INSERT INTO Artworks (artwork_id, title, year_made, artist_id, price) VALUES (1, \u0027Mount Hermannsburg\u0027, 1947, 1, 500.0);", + "sql": "SELECT SUM(Artworks.price) FROM Artworks INNER JOIN Artists ON Artworks.artist_id \u003d Artists.artist_id WHERE Artists.country \u003d \u0027Australia\u0027 AND Artists.artist_name \u003d \u0027Albert Namatjira\u0027;", + "sql_explanation": "This query joins the Artworks and Artists tables on the artist_id column and filters for rows where the artist is from Australia and has the name \u0027Albert Namatjira\u0027. It then calculates the total price of the artworks created by Indigenous Australian artist Albert Namatjira." +}, { + "id": "1183", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which artist has the most works in the modern art category?", + "sql_context": "CREATE TABLE artists (artist_id INT PRIMARY KEY, artist_name TEXT, style TEXT);CREATE TABLE works (work_id INT PRIMARY KEY, work_title TEXT, artist_id INT, category TEXT, FOREIGN KEY (artist_id) REFERENCES artists(artist_id));INSERT INTO artists (artist_id, artist_name, style) VALUES (1, \u0027Pablo Picasso\u0027, \u0027Cubism\u0027); INSERT INTO works (work_id, work_title, artist_id, category) VALUES (1, \u0027Guernica\u0027, 1, \u0027Modern Art\u0027);", + "sql": "SELECT a.artist_name FROM artists a JOIN works w ON a.artist_id \u003d w.artist_id WHERE w.category \u003d \u0027Modern Art\u0027 GROUP BY a.artist_name ORDER BY COUNT(w.work_id) DESC LIMIT 1;", + "sql_explanation": "The SQL query joins the artists and works tables on the artist_id foreign key. It then filters for modern art category works and groups the results by artist_name. Finally, it orders by the count of works in descending order and limits the result to the top 1 artist." +}, { + "id": "1325", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which artist has the most artwork in the modern art category?", + "sql_context": "CREATE TABLE Artists (ArtistID INT, Name VARCHAR(100), Nationality VARCHAR(50), BirthDate DATE); INSERT INTO Artists VALUES (1, \u0027Pablo Picasso\u0027, \u0027Spanish\u0027, \u00271881-10-25\u0027); INSERT INTO Artists VALUES (2, \u0027Vincent van Gogh\u0027, \u0027Dutch\u0027, \u00271853-03-30\u0027); CREATE TABLE Artwork (ArtworkID INT, Title VARCHAR(100), Category VARCHAR(50), ArtistID INT); INSERT INTO Artwork VALUES (1, \u0027Guernica\u0027, \u0027Modern Art\u0027, 1); INSERT INTO Artwork VALUES (2, \u0027Starry Night\u0027, \u0027Post-Impressionism\u0027, 2);", + "sql": "SELECT A.Name FROM Artists A JOIN Artwork AW ON A.ArtistID \u003d AW.ArtistID WHERE AW.Category \u003d \u0027Modern Art\u0027 GROUP BY A.Name ORDER BY COUNT(AW.ArtworkID) DESC LIMIT 1;", + "sql_explanation": "The SQL query first joins the Artists and Artwork tables based on the ArtistID. It then filters the records to only include modern art pieces. By grouping the records by the artist name and counting the number of artworks for each artist, we can identify the artist with the most modern artworks." +}, { + "id": "1627", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many artworks have been created by female artists in the last 50 years?", + "sql_context": "CREATE TABLE artists (id INT, name TEXT, gender TEXT, birth_year INT); CREATE TABLE artworks (id INT, title TEXT, artist_id INT, creation_year INT); INSERT INTO artists (id, name, gender, birth_year) VALUES (1, \u0027Claude Monet\u0027, \u0027Male\u0027, 1840), (2, \u0027Camille Pissarro\u0027, \u0027Male\u0027, 1830), (3, \u0027Marie Bracquemond\u0027, \u0027Female\u0027, 1840); INSERT INTO artworks (id, title, artist_id, creation_year) VALUES (1, \u0027Water Lilies\u0027, 1, 1905), (2, \u0027The Boulevard Montmartre at Night\u0027, 2, 1897), (3, \u0027The Garden\u0027, 3, 1888);", + "sql": "SELECT COUNT(*) FROM artworks a INNER JOIN artists ar ON a.artist_id \u003d ar.id WHERE ar.gender \u003d \u0027Female\u0027 AND a.creation_year \u003e\u003d YEAR(CURRENT_DATE) - 50;", + "sql_explanation": "The query performs an inner join between artworks and artists tables, filtering the records where the gender is \u0027Female\u0027 and the creation year is within the last 50 years. The result is the number of artworks created by female artists in the last 50 years." +}, { + "id": "1756", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total value of all artworks in our Asian art collection?", + "sql_context": "CREATE TABLE Artworks (id INT, title VARCHAR(50), value DECIMAL(10,2), collection VARCHAR(50)); CREATE TABLE Collections (id INT, name VARCHAR(50), category VARCHAR(50));", + "sql": "SELECT SUM(Artworks.value) FROM Artworks INNER JOIN Collections ON Artworks.collection \u003d Collections.name WHERE Collections.category \u003d \u0027Asian Art\u0027;", + "sql_explanation": "The SQL query joins the \u0027Artworks\u0027 and \u0027Collections\u0027 tables on the \u0027collection\u0027 and \u0027name\u0027 columns respectively. It then filters for artworks in the Asian art collection. Lastly, it calculates the total value of these artworks." +}, { + "id": "2242", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the minimum sale price for a Picasso artwork.", + "sql_context": "CREATE TABLE artist_data (id INT, artist_name VARCHAR(50), nationality VARCHAR(50)); CREATE TABLE artwork_sales (id INT, artwork_name VARCHAR(50), artist_id INT, sale_price DECIMAL(10, 2));", + "sql": "SELECT MIN(sale_price) as min_price FROM artwork_sales a JOIN artist_data d ON a.artist_id \u003d d.id WHERE d.artist_name \u003d \u0027Picasso\u0027;", + "sql_explanation": "The SQL query finds the minimum sale price for a Picasso artwork. It uses a JOIN clause to combine the \u0027artwork_sales\u0027 and \u0027artist_data\u0027 tables based on their \u0027artist_id\u0027 and \u0027id\u0027 columns. Then, it applies a WHERE clause to filter the results for Picasso artworks. The MIN function is used to determine the minimum sale price in the result set." +}, { + "id": "2349", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average price of artworks created by female artists?", + "sql_context": "CREATE TABLE Artworks (id INT, title TEXT, artist_id INT, price INT); INSERT INTO Artworks (id, title, artist_id, price) VALUES (1, \u0027Water Lilies\u0027, 1, 10000000), (2, \u0027The Card Players\u0027, 2, 20000000); CREATE TABLE Artists (id INT, name TEXT, gender TEXT, birth_year INT, death_year INT); INSERT INTO Artists (id, name, gender, birth_year, death_year) VALUES (1, \u0027Claude Monet\u0027, \u0027Male\u0027, 1840, 1926), (2, \u0027Paul Cezanne\u0027, \u0027Male\u0027, 1839, 1906), (3, \u0027Mary Cassatt\u0027, \u0027Female\u0027, 1844, 1926);", + "sql": "SELECT AVG(Artworks.price) FROM Artworks INNER JOIN Artists ON Artworks.artist_id \u003d Artists.id WHERE Artists.gender \u003d \u0027Female\u0027;", + "sql_explanation": "This query calculates the average price of artworks made by female artists by joining the Artworks and Artists tables based on artist_id, then filtering on the gender column." +}, { + "id": "3073", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which are the names of all museums that have hosted exhibitions with more than 50 pieces of art?", + "sql_context": "CREATE TABLE museums (museum_id INT, name VARCHAR(50), city VARCHAR(50), opening_year INT); INSERT INTO museums (museum_id, name, city, opening_year) VALUES (1, \u0027Metropolitan Museum of Art\u0027, \u0027New York\u0027, 1870); CREATE TABLE exhibitions (exhibition_id INT, title VARCHAR(50), year INT, museum_id INT, art_count INT); INSERT INTO exhibitions (exhibition_id, title, year, museum_id, art_count) VALUES (1, \u0027First Exhibition\u0027, 1871, 1, 100);", + "sql": "SELECT m.name FROM museums m INNER JOIN exhibitions e ON m.museum_id \u003d e.museum_id WHERE e.art_count \u003e 50;", + "sql_explanation": "Join museums and exhibitions on museum_id and select museum names with more than 50 pieces of art in exhibitions." +}, { + "id": "241", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all policyholders who have filed a claim in the last 30 days, including their policy ID, policyholder name, and policy start date.", + "sql_context": "CREATE TABLE Policyholders (PolicyID INT, PolicyholderName TEXT, PolicyStartDate DATE); INSERT INTO Policyholders (PolicyID, PolicyholderName, PolicyStartDate) VALUES (1, \u0027John Doe\u0027, \u00272022-01-01\u0027), (2, \u0027Jane Smith\u0027, \u00272022-02-01\u0027); CREATE TABLE Claims (ClaimID INT, PolicyID INT, ClaimDate DATE); INSERT INTO Claims (ClaimID, PolicyID, ClaimDate) VALUES (1, 1, \u00272022-01-15\u0027), (2, 1, \u00272022-02-10\u0027), (3, 2, \u00272022-02-20\u0027);", + "sql": "SELECT Policyholders.PolicyID, Policyholders.PolicyholderName, Policyholders.PolicyStartDate FROM Policyholders INNER JOIN Claims ON Policyholders.PolicyID \u003d Claims.PolicyID WHERE Claims.ClaimDate \u003e\u003d DATEADD(day, -30, GETDATE()) ORDER BY Policyholders.PolicyID;", + "sql_explanation": "This SQL query lists all policyholders who have filed a claim in the last 30 days, including their policy ID, policyholder name, and policy start date. It does this by performing an inner join between the Policyholders and Claims tables on the PolicyID field, filtering for records where the ClaimDate field in the Claims table is within the last 30 days using the DATEADD function, and finally sorting the results by the PolicyID field in the Policyholders table in ascending order." +}, { + "id": "249", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the average policy duration and number of claims, by policy type, for policyholders in Florida.", + "sql_context": "CREATE TABLE Claim (ClaimId INT, PolicyId INT, ClaimDate DATE); CREATE TABLE Policy (PolicyId INT, PolicyType VARCHAR(50), IssueDate DATE, ExpirationDate DATE, Region VARCHAR(50));", + "sql": "SELECT Policy.PolicyType, AVG(DATEDIFF(day, IssueDate, ExpirationDate)) as AveragePolicyDuration, COUNT(Claim.ClaimId) as NumberOfClaims FROM Policy LEFT JOIN Claim ON Policy.PolicyId \u003d Claim.PolicyId WHERE Policy.Region \u003d \u0027Florida\u0027 GROUP BY Policy.PolicyType;", + "sql_explanation": "The SQL query performs a left join between the Claim and Policy tables, filtering for policyholders in Florida. It then calculates the average policy duration and counts the number of claims for each policy type, providing the desired output." +}, { + "id": "320", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the total claim amounts, by policy type and region, for policyholders with a policy duration of more than 3 years.", + "sql_context": "CREATE TABLE Claim (ClaimId INT, PolicyId INT, ClaimAmount DECIMAL(10,2), ClaimDate DATE); CREATE TABLE Policy (PolicyId INT, PolicyType VARCHAR(50), IssueDate DATE, ExpirationDate DATE, Region VARCHAR(50));", + "sql": "SELECT Policy.PolicyType, Policy.Region, SUM(Claim.ClaimAmount) as TotalClaimAmounts FROM Policy INNER JOIN Claim ON Policy.PolicyId \u003d Claim.PolicyId WHERE DATEDIFF(day, IssueDate, ExpirationDate) \u003e 1095 GROUP BY Policy.PolicyType, Policy.Region;", + "sql_explanation": "The SQL query performs a join between the Claim and Policy tables, filtering for policyholders with a policy duration of more than 3 years. It then calculates the total claim amounts for each policy type and region, providing the desired output." +}, { + "id": "534", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List policyholders in the Midwest region who have not filed a claim.", + "sql_context": "CREATE TABLE Policyholders (PolicyholderID INT, Region VARCHAR(20)); CREATE TABLE Claims (ClaimID INT, PolicyholderID INT); INSERT INTO Policyholders (PolicyholderID, Region) VALUES (1, \u0027Midwest\u0027), (2, \u0027Northeast\u0027), (3, \u0027Midwest\u0027); INSERT INTO Claims (ClaimID, PolicyholderID) VALUES (1, 2), (2, 3);", + "sql": "SELECT Policyholders.PolicyholderID, Policyholders.Region FROM Policyholders LEFT JOIN Claims ON Policyholders.PolicyholderID \u003d Claims.PolicyholderID WHERE Claims.ClaimID IS NULL AND Policyholders.Region \u003d \u0027Midwest\u0027;", + "sql_explanation": "This SQL query lists policyholders in the Midwest region who have not filed a claim by performing a left join between the Policyholders and Claims tables on the PolicyholderID column. It then filters the results to only include policyholders from the Midwest region who do not have a corresponding claim record (using the IS NULL condition) and returns the PolicyholderID and Region columns." +}, { + "id": "549", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List health insurance policy types and their respective average claim amounts.", + "sql_context": "CREATE TABLE HealthPolicyTypes (PolicyTypeID int, PolicyType varchar(20)); CREATE TABLE HealthClaims (ClaimID int, PolicyTypeID int, ClaimAmount decimal); INSERT INTO HealthPolicyTypes (PolicyTypeID, PolicyType) VALUES (1, \u0027Health Maintenance Organization\u0027); INSERT INTO HealthPolicyTypes (PolicyTypeID, PolicyType) VALUES (2, \u0027Preferred Provider Organization\u0027); INSERT INTO HealthClaims (ClaimID, PolicyTypeID, ClaimAmount) VALUES (1, 1, 1200); INSERT INTO HealthClaims (ClaimID, PolicyTypeID, ClaimAmount) VALUES (2, 2, 1800);", + "sql": "SELECT HealthPolicyTypes.PolicyType, AVG(HealthClaims.ClaimAmount) FROM HealthPolicyTypes INNER JOIN HealthClaims ON HealthPolicyTypes.PolicyTypeID \u003d HealthClaims.PolicyTypeID GROUP BY HealthPolicyTypes.PolicyType;", + "sql_explanation": "The SQL query lists health insurance policy types and their respective average claim amounts by performing an INNER JOIN between the \u0027HealthPolicyTypes\u0027 and \u0027HealthClaims\u0027 tables, based on the matching \u0027PolicyTypeID\u0027, using the GROUP BY function on the \u0027PolicyType\u0027 column and the AVG function on the \u0027ClaimAmount\u0027 column." +}, { + "id": "580", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the policy retention rate for male policyholders in New York and Florida?", + "sql_context": "CREATE TABLE Policyholders (PolicyID INT, Gender VARCHAR(10), State VARCHAR(10)); INSERT INTO Policyholders VALUES (1, \u0027Male\u0027, \u0027New York\u0027); INSERT INTO Policyholders VALUES (2, \u0027Female\u0027, \u0027Florida\u0027); CREATE TABLE Policies (PolicyID INT, RetentionRate DECIMAL(3,2)); INSERT INTO Policies VALUES (1, 0.80); INSERT INTO Policies VALUES (2, 0.85);", + "sql": "SELECT p.Gender, AVG(pr.RetentionRate) as RetentionRate FROM Policyholders p INNER JOIN Policies pr ON p.PolicyID \u003d pr.PolicyID WHERE (p.Gender \u003d \u0027Male\u0027 AND p.State IN (\u0027New York\u0027, \u0027Florida\u0027)) GROUP BY p.Gender;", + "sql_explanation": "This SQL query calculates the average retention rate for male policyholders in New York and Florida. It first joins the Policyholders table and Policies table on PolicyID. Then, it filters the data where Gender is \u0027Male\u0027 and State is either \u0027New York\u0027 or \u0027Florida\u0027. Finally, it calculates the average retention rate by grouping the data by Gender." +}, { + "id": "714", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the policyholders in \u0027CA\u0027 with the highest claim amount?", + "sql_context": "CREATE TABLE Policyholders (PolicyID INT, PolicyholderName TEXT, State TEXT); INSERT INTO Policyholders (PolicyID, PolicyholderName, State) VALUES (1, \u0027Maria Garcia\u0027, \u0027CA\u0027), (2, \u0027James Lee\u0027, \u0027NY\u0027); CREATE TABLE Claims (ClaimID INT, PolicyID INT, ClaimAmount INT); INSERT INTO Claims (ClaimID, PolicyID, ClaimAmount) VALUES (1, 1, 10000), (2, 1, 7000), (3, 2, 3000);", + "sql": "SELECT PolicyholderName, MAX(ClaimAmount) AS MaxClaimAmount FROM Policyholders INNER JOIN Claims ON Policyholders.PolicyID \u003d Claims.PolicyID WHERE Policyholders.State \u003d \u0027CA\u0027 GROUP BY PolicyholderName;", + "sql_explanation": "The SQL query lists policyholders in \u0027CA\u0027 with the highest claim amount by performing an inner join on the Policyholders and Claims tables. It filters the records where the State column is equal to \u0027CA\u0027, calculates the maximum claim amount (MAX) for each policyholder, and groups them by the PolicyholderName." +}, { + "id": "846", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average claim amount by policy type, region, and year?", + "sql_context": "CREATE TABLE Claims (ClaimID INT, PolicyID INT, ClaimAmount DECIMAL(10, 2), ClaimDate DATE); INSERT INTO Claims VALUES (1, 1, 500, \u00272021-01-05\u0027), (2, 2, 1000, \u00272022-02-10\u0027), (3, 3, 750, \u00272021-03-15\u0027), (4, 4, 1200, \u00272022-01-25\u0027), (5, 5, 300, \u00272021-02-01\u0027), (6, 6, 1500, \u00272022-03-01\u0027);", + "sql": "SELECT PolicyType, Region, EXTRACT(YEAR FROM ClaimDate) AS Year, AVG(ClaimAmount) AS AvgClaimAmount FROM Claims c JOIN Policies p ON c.PolicyID \u003d p.PolicyID GROUP BY PolicyType, Region, Year;", + "sql_explanation": "The SQL query joins the Claims and Policies tables based on PolicyID, extracts the year from ClaimDate, and groups the records by PolicyType, Region, and Year, calculating the average claim amount for each group using the AVG() function." +}, { + "id": "912", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a view \u0027claims_by_city\u0027 that displays the total claim amount by city", + "sql_context": "CREATE TABLE policyholders (policyholder_id INT PRIMARY KEY, name VARCHAR(100), age INT, gender VARCHAR(10), city VARCHAR(50), state VARCHAR(50)); CREATE TABLE claims (claim_id INT PRIMARY KEY, policyholder_id INT, claim_amount DECIMAL(10, 2), claim_date DATE); INSERT INTO policyholders (policyholder_id, name, age, gender, city, state) VALUES (1, \u0027John Doe\u0027, 34, \u0027Male\u0027, \u0027New York\u0027, \u0027NY\u0027), (2, \u0027Jane Smith\u0027, 28, \u0027Female\u0027, \u0027Los Angeles\u0027, \u0027CA\u0027); INSERT INTO claims (claim_id, policyholder_id, claim_amount, claim_date) VALUES (1, 1, 500, \u00272022-01-01\u0027), (2, 2, 300, \u00272022-02-15\u0027), (3, 3, 700, \u00272022-03-05\u0027);", + "sql": "CREATE VIEW claims_by_city AS SELECT city, SUM(claim_amount) as total_claim_amount FROM claims JOIN policyholders ON claims.policyholder_id \u003d policyholders.policyholder_id GROUP BY city;", + "sql_explanation": "A view \u0027claims_by_city\u0027 is created to display the total claim amount by city. The \u0027JOIN\u0027 clause combines the \u0027claims\u0027 and \u0027policyholders\u0027 tables based on the \u0027policyholder_id\u0027 column. The \u0027GROUP BY\u0027 clause calculates the sum of \u0027claim_amount\u0027 for each city." +}, { + "id": "919", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the sum of policy premiums and average claim amount for policyholders from Texas, grouped by policy type?", + "sql_context": "CREATE TABLE policyholders (id INT, age INT, gender VARCHAR(10), policy_type VARCHAR(20), premium FLOAT, state VARCHAR(20)); INSERT INTO policyholders (id, age, gender, policy_type, premium, state) VALUES (1, 32, \u0027Female\u0027, \u0027Comprehensive\u0027, 1200.00, \u0027Texas\u0027), (2, 41, \u0027Male\u0027, \u0027Third-Party\u0027, 800.00, \u0027California\u0027); CREATE TABLE claims (id INT, policyholder_id INT, claim_amount FLOAT, claim_date DATE); INSERT INTO claims (id, policyholder_id, claim_amount, claim_date) VALUES (1, 1, 500.00, \u00272021-01-01\u0027), (2, 2, 1000.00, \u00272021-02-01\u0027), (3, 1, 300.00, \u00272021-03-01\u0027), (4, 3, 200.00, \u00272021-01-01\u0027);", + "sql": "SELECT policy_type, SUM(premium), AVG(claim_amount) FROM policyholders JOIN claims ON policyholders.id \u003d claims.policyholder_id WHERE policyholders.state \u003d \u0027Texas\u0027 GROUP BY policy_type;", + "sql_explanation": "To find the sum of policy premiums and average claim amount for policyholders from Texas, grouped by policy type, we join the policyholders and claims tables, filter for policyholders from Texas, group by policy_type, and apply the SUM function to the premium column and the AVG function to the claim_amount column for each group to determine the sum of policy premiums and average claim amount for each policy type." +}, { + "id": "1000", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many policyholders have never filed a claim?", + "sql_context": "CREATE TABLE claims (claim_id INT, policyholder_id INT); INSERT INTO claims (claim_id, policyholder_id) VALUES (1, 1), (2, 3), (3, 2), (4, 1); CREATE TABLE policyholders (policyholder_id INT); INSERT INTO policyholders (policyholder_id) VALUES (1), (2), (3), (4), (5);", + "sql": "SELECT COUNT(DISTINCT ph.policyholder_id) as num_policyholders_no_claims FROM policyholders ph LEFT JOIN claims c ON ph.policyholder_id \u003d c.policyholder_id WHERE c.claim_id IS NULL;", + "sql_explanation": "This query joins the policyholders and claims tables on the policyholder_id foreign key using a left join. It then filters for policyholders with no claims (NULL in the claims table) and calculates the count of these policyholders." +}, { + "id": "1168", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total claim amount for home insurance policies in the \u0027East\u0027 region?", + "sql_context": "CREATE TABLE HomePolicies (PolicyID int, ClaimAmount decimal); CREATE TABLE Policyholders (PolicyholderID int, Region varchar(10)); INSERT INTO HomePolicies (PolicyID, ClaimAmount) VALUES (1, 1500); INSERT INTO HomePolicies (PolicyID, ClaimAmount) VALUES (2, 2000); INSERT INTO Policyholders (PolicyholderID, Region) VALUES (1, \u0027East\u0027); INSERT INTO Policyholders (PolicyholderID, Region) VALUES (2, \u0027West\u0027);", + "sql": "SELECT SUM(HomePolicies.ClaimAmount) FROM HomePolicies INNER JOIN Policyholders ON HomePolicies.PolicyID \u003d Policyholders.PolicyholderID WHERE Policyholders.Region \u003d \u0027East\u0027;", + "sql_explanation": "The SQL query calculates the total claim amount for home insurance policies in the \u0027East\u0027 region by performing an INNER JOIN between the \u0027HomePolicies\u0027 and \u0027Policyholders\u0027 tables, based on the matching \u0027PolicyID\u0027, and using the SUM function on the \u0027ClaimAmount\u0027 column, filtered by the \u0027Region\u0027 column with the value \u0027East\u0027." +}, { + "id": "1246", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names and dates of birth of policyholders who have both a home and auto insurance policy with us?", + "sql_context": "CREATE TABLE HomeInsurance (PolicyholderName VARCHAR(50), DOB DATE); INSERT INTO HomeInsurance VALUES (\u0027John Doe\u0027, \u00271980-05-05\u0027); INSERT INTO HomeInsurance VALUES (\u0027Jane Smith\u0027, \u00271990-12-31\u0027); CREATE TABLE AutoInsurance (PolicyholderName VARCHAR(50), DOB DATE); INSERT INTO AutoInsurance VALUES (\u0027John Doe\u0027, \u00271980-05-05\u0027); INSERT INTO AutoInsurance VALUES (\u0027Jim Brown\u0027, \u00271975-08-11\u0027);", + "sql": "SELECT HomeInsurance.PolicyholderName, HomeInsurance.DOB FROM HomeInsurance INNER JOIN AutoInsurance ON HomeInsurance.PolicyholderName \u003d AutoInsurance.PolicyholderName;", + "sql_explanation": "This query performs an inner join on the HomeInsurance and AutoInsurance tables, based on the PolicyholderName. It returns the names and dates of birth of policyholders who have both a home and auto insurance policy with the company." +}, { + "id": "1259", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which policyholders have more than one active policy?", + "sql_context": "CREATE TABLE Policyholders (PolicyholderID INT, Name VARCHAR(50)); CREATE TABLE Policy (PolicyID INT, PolicyholderID INT, PolicyType VARCHAR(20), Active BOOLEAN); INSERT INTO Policyholders VALUES (1, \u0027John Smith\u0027), (2, \u0027Jane Doe\u0027); INSERT INTO Policy VALUES (1, 1, \u0027Auto\u0027, TRUE), (2, 1, \u0027Home\u0027, TRUE), (3, 2, \u0027Auto\u0027, TRUE), (4, 2, \u0027Life\u0027, FALSE);", + "sql": "SELECT DISTINCT PH.Name FROM Policyholders PH JOIN Policy P ON PH.PolicyholderID \u003d P.PolicyholderID WHERE P.Active \u003d TRUE GROUP BY PH.Name HAVING COUNT(P.PolicyID) \u003e 1;", + "sql_explanation": "This query identifies policyholders who have more than one active policy by joining the Policyholders and Policy tables on the PolicyholderID column. It then filters for active policies and groups the results by policyholder name. Finally, it uses the HAVING clause to only return policyholders who have more than one active policy." +}, { + "id": "1298", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total claim amount for policies issued in each state, in descending order?", + "sql_context": "CREATE TABLE Policies (PolicyID INT, State VARCHAR(20)); CREATE TABLE Claims (PolicyID INT, ClaimAmount DECIMAL(10,2)); INSERT INTO Policies (PolicyID, State) VALUES (1, \u0027California\u0027), (2, \u0027California\u0027), (3, \u0027Texas\u0027), (4, \u0027Texas\u0027), (5, \u0027Florida\u0027); INSERT INTO Claims (PolicyID, ClaimAmount) VALUES (1, 500), (2, 1200), (3, 800), (4, 3000), (5, 1500);", + "sql": "SELECT P.State, SUM(C.ClaimAmount) AS TotalClaimAmount FROM Policies P INNER JOIN Claims C ON P.PolicyID \u003d C.PolicyID GROUP BY P.State ORDER BY TotalClaimAmount DESC;", + "sql_explanation": "The query first joins the Policies and Claims tables on the PolicyID column. It then groups the results by State and calculates the total claim amount for each state using the SUM function. Lastly, it orders the results in descending order by the total claim amount." +}, { + "id": "1820", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the highest claim amount in the \u0027IL\u0027 region?", + "sql_context": "CREATE TABLE Policyholders (PolicyID INT, PolicyholderName TEXT, State TEXT); INSERT INTO Policyholders (PolicyID, PolicyholderName, State) VALUES (1, \u0027John Smith\u0027, \u0027IL\u0027), (2, \u0027Jane Doe\u0027, \u0027NY\u0027); CREATE TABLE Claims (ClaimID INT, PolicyID INT, ClaimAmount INT); INSERT INTO Claims (ClaimID, PolicyID, ClaimAmount) VALUES (1, 1, 9000), (2, 1, 11000), (3, 2, 7000);", + "sql": "SELECT MAX(Claims.ClaimAmount) FROM Claims INNER JOIN Policyholders ON Claims.PolicyID \u003d Policyholders.PolicyID WHERE Policyholders.State \u003d \u0027IL\u0027;", + "sql_explanation": "The SQL query finds the highest claim amount (MAX) in the \u0027IL\u0027 region by performing an inner join on the Policyholders and Claims tables. It filters the records where the State column is equal to \u0027IL\u0027 and calculates the highest claim amount." +}, { + "id": "2067", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum claim amount processed in the last 6 months?", + "sql_context": "CREATE TABLE Dates (Date DATE); INSERT INTO Dates (Date) VALUES (\u00272022-01-01\u0027), (\u00272022-01-02\u0027), (\u00272022-01-03\u0027); CREATE TABLE Claims (ClaimID INT, ProcessedDate DATE, Amount DECIMAL(10, 2)); INSERT INTO Claims (ClaimID, ProcessedDate, Amount) VALUES (1, \u00272022-01-01\u0027, 500), (2, \u00272022-01-03\u0027, 750), (3, \u00272022-06-15\u0027, 1000);", + "sql": "SELECT MAX(Claims.Amount) FROM Claims JOIN Dates ON Claims.ProcessedDate \u003d Dates.Date WHERE Dates.Date \u003e\u003d CURDATE() - INTERVAL 6 MONTH;", + "sql_explanation": "The SQL query first joins the Claims and Dates tables on the ProcessedDate. It then filters the records to only include claims processed in the last 6 months using the WHERE clause and the CURDATE() function. Finally, it calculates the maximum claim amount processed in the last 6 months using the MAX function." +}, { + "id": "456", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum delivery time for packages shipped to South America?", + "sql_context": "CREATE TABLE delivery_data (delivery_id INT, shipment_id INT, delivery_time INT); INSERT INTO delivery_data (delivery_id, shipment_id, delivery_time) VALUES (1, 1, 10), (2, 2, 15), (3, 3, 12), (4, 4, 18), (5, 5, 20);", + "sql": "SELECT MIN(delivery_time) FROM delivery_data JOIN shipment_data ON delivery_data.shipment_id \u003d shipment_data.shipment_id WHERE shipment_data.destination_country IN (\u0027South America\u0027, \u0027Brazil\u0027, \u0027Argentina\u0027, \u0027Colombia\u0027, \u0027Peru\u0027);", + "sql_explanation": "This query calculates the minimum delivery time for packages shipped to South America by joining the delivery_data and shipment_data tables on the shipment_id and selecting the minimum delivery_time for entries with destination_country in \u0027South America\u0027, \u0027Brazil\u0027, \u0027Argentina\u0027, \u0027Colombia\u0027, or \u0027Peru\u0027." +}, { + "id": "660", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the total quantities of items shipped by land to \u0027City of Industry\u0027 between 2021-06-01 and 2021-06-15?", + "sql_context": "CREATE TABLE warehouses (warehouse_id INT, warehouse_name VARCHAR(255), city VARCHAR(255)); INSERT INTO warehouses (warehouse_id, warehouse_name, city) VALUES (1, \u0027Warehouse A\u0027, \u0027City of Industry\u0027), (2, \u0027Warehouse B\u0027, \u0027Los Angeles\u0027); CREATE TABLE shipments (shipment_id INT, warehouse_id INT, shipped_date DATE, shipped_quantity INT); INSERT INTO shipments (shipment_id, warehouse_id, shipped_date, shipped_quantity) VALUES (1, 1, \u00272021-06-03\u0027, 500), (2, 1, \u00272021-06-10\u0027, 800);", + "sql": "SELECT SUM(shipped_quantity) FROM shipments INNER JOIN warehouses ON shipments.warehouse_id \u003d warehouses.warehouse_id WHERE shipped_date BETWEEN \u00272021-06-01\u0027 AND \u00272021-06-15\u0027 AND city \u003d \u0027City of Industry\u0027;", + "sql_explanation": "This SQL query calculates the total quantities of items shipped by land to \u0027City of Industry\u0027 between June 1st, 2021 and June 15th, 2021. It does this by joining the \u0027shipments\u0027 table with the \u0027warehouses\u0027 table on the \u0027warehouse_id\u0027 column and filtering for the relevant date range and city. The SUM() function is then used to add up the shipped quantities." +}, { + "id": "671", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue generated from deliveries in the \u0027Europe\u0027 region?", + "sql_context": "CREATE TABLE Warehouse (id INT, name TEXT, region TEXT); INSERT INTO Warehouse (id, name, region) VALUES (1, \u0027London Warehouse\u0027, \u0027Europe\u0027), (2, \u0027Berlin Warehouse\u0027, \u0027Europe\u0027), (3, \u0027Madrid Warehouse\u0027, \u0027Europe\u0027); CREATE TABLE Shipment (id INT, warehouse_id INT, delivery_fee DECIMAL); INSERT INTO Shipment (id, warehouse_id, delivery_fee) VALUES (1, 1, 50.5), (2, 1, 45.3), (3, 2, 60.2), (4, 3, 40.1);", + "sql": "SELECT Warehouse.region, SUM(Shipment.delivery_fee) as total_revenue FROM Warehouse INNER JOIN Shipment ON Warehouse.id \u003d Shipment.warehouse_id WHERE Warehouse.region \u003d \u0027Europe\u0027 GROUP BY Warehouse.region;", + "sql_explanation": "This SQL query performs an inner join on Warehouse and Shipment tables using the warehouse_id. It then calculates the total revenue generated from deliveries in the \u0027Europe\u0027 region by summing up the delivery_fee column." +}, { + "id": "1027", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the names and total weight of packages shipped from warehouses in the \u0027APAC\u0027 region", + "sql_context": "CREATE TABLE warehouses (id INT, name TEXT, region TEXT); INSERT INTO warehouses (id, name, region) VALUES (1, \u0027Warehouse D\u0027, \u0027EMEA\u0027), (2, \u0027Warehouse E\u0027, \u0027APAC\u0027), (3, \u0027Warehouse F\u0027, \u0027Americas\u0027);", + "sql": "SELECT warehouses.name, SUM(packages.weight) AS total_weight FROM packages JOIN warehouses ON packages.warehouse_id \u003d warehouses.id WHERE region \u003d \u0027APAC\u0027 GROUP BY warehouses.name;", + "sql_explanation": "This query joins the packages table with the warehouses table on the warehouse_id and id columns, respectively. It then filters the results to only include packages from warehouses in the \u0027APAC\u0027 region and calculates the total weight of packages shipped from each warehouse." +}, { + "id": "1033", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum weight of packages shipped from the Sydney warehouse to each destination state?", + "sql_context": "CREATE TABLE Packages (id INT, warehouse_id INT, destination_state TEXT, weight FLOAT); INSERT INTO Packages (id, warehouse_id, destination_state, weight) VALUES (1, 11, \u0027WA\u0027, 70.2), (2, 11, \u0027QLD\u0027, 85.6), (3, 11, \u0027VIC\u0027, 60.1); CREATE TABLE Warehouses (id INT, name TEXT, city TEXT, state TEXT); INSERT INTO Warehouses (id, name, city, state) VALUES (11, \u0027Sydney Warehouse\u0027, \u0027Sydney\u0027, \u0027NSW\u0027);", + "sql": "SELECT destination_state, MAX(weight) FROM Packages JOIN Warehouses ON Packages.warehouse_id \u003d Warehouses.id WHERE Warehouses.name \u003d \u0027Sydney Warehouse\u0027 GROUP BY destination_state;", + "sql_explanation": "This query finds the maximum weight (MAX(weight)) of packages shipped from the Sydney warehouse (Warehouses.name \u003d \u0027Sydney Warehouse\u0027) to each destination state (GROUP BY destination_state)." +}, { + "id": "1085", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which freight forwarders have handled shipments with a total weight greater than 200 kg to countries in South America?", + "sql_context": "CREATE TABLE freight_forwarders (id INT, name VARCHAR(255));CREATE TABLE shipments (id INT, forwarder_id INT, weight FLOAT, destination_country VARCHAR(255));INSERT INTO freight_forwarders (id, name) VALUES (1, \u0027ABC Freight\u0027), (2, \u0027XYZ Logistics\u0027), (3, \u0027DEF Freight\u0027);INSERT INTO shipments (id, forwarder_id, weight, destination_country) VALUES (1, 1, 120.5, \u0027Brazil\u0027), (2, 1, 75.2, \u0027Colombia\u0027), (3, 2, 50.0, \u0027USA\u0027), (4, 3, 150.0, \u0027Argentina\u0027);", + "sql": "SELECT f.name FROM freight_forwarders f INNER JOIN shipments s ON f.id \u003d s.forwarder_id WHERE s.weight \u003e 200.0 AND s.destination_country IN (\u0027Brazil\u0027, \u0027Colombia\u0027, \u0027Argentina\u0027);", + "sql_explanation": "This query first joins the freight_forwarders and shipments tables on the forwarder_id and id columns, respectively. Then, it filters the results to only include records where the weight column is greater than 200.0 kg and the destination_country is in South America (represented by the countries \u0027Brazil\u0027, \u0027Colombia\u0027, and \u0027Argentina\u0027 in the query). Finally, it selects the name column from the freight_forwarders table." +}, { + "id": "1151", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of items shipped to each country in Africa?", + "sql_context": "CREATE TABLE Warehouse (id INT, city VARCHAR(50), country VARCHAR(50)); INSERT INTO Warehouse (id, city, country) VALUES (1, \u0027Johannesburg\u0027, \u0027South Africa\u0027), (2, \u0027Cairo\u0027, \u0027Egypt\u0027), (3, \u0027Nairobi\u0027, \u0027Kenya\u0027), (4, \u0027Accra\u0027, \u0027Ghana\u0027); CREATE TABLE Shipment (id INT, quantity INT, warehouse_id INT, destination_country VARCHAR(50)); INSERT INTO Shipment (id, quantity, warehouse_id, destination_country) VALUES (1, 500, 1, \u0027South Africa\u0027), (2, 300, 2, \u0027Egypt\u0027), (3, 200, 3, \u0027Kenya\u0027), (4, 150, 4, \u0027Ghana\u0027);", + "sql": "SELECT Shipment.destination_country, SUM(Shipment.quantity) FROM Shipment INNER JOIN Warehouse ON Shipment.warehouse_id \u003d Warehouse.id GROUP BY Shipment.destination_country;", + "sql_explanation": "This SQL query calculates the total quantity of items shipped to each country in Africa. It first joins the Shipment and Warehouse tables on the warehouse_id and id columns, respectively. Then, it groups the records by the destination_country column of the Shipment table. Finally, it sums up the quantity column to find the total quantity of items shipped to each country in Africa." +}, { + "id": "1184", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of items shipped from Hong Kong to the USA?", + "sql_context": "CREATE TABLE Warehouse (id INT, city VARCHAR(50), country VARCHAR(50)); INSERT INTO Warehouse (id, city, country) VALUES (1, \u0027Hong Kong\u0027, \u0027China\u0027); CREATE TABLE Shipment (id INT, quantity INT, warehouse_id INT, destination_country VARCHAR(50)); INSERT INTO Shipment (id, quantity, warehouse_id, destination_country) VALUES (1, 500, 1, \u0027USA\u0027);", + "sql": "SELECT SUM(quantity) FROM Shipment INNER JOIN Warehouse ON Shipment.warehouse_id \u003d Warehouse.id WHERE Warehouse.city \u003d \u0027Hong Kong\u0027 AND Shipment.destination_country \u003d \u0027USA\u0027;", + "sql_explanation": "This SQL query calculates the total quantity of items shipped from Hong Kong to the USA. It first joins the Shipment and Warehouse tables on the warehouse_id and id columns, respectively. Then, it filters the records where the city is Hong Kong and the destination country is the USA. Finally, it sums up the quantity column to find the total quantity of items shipped." +}, { + "id": "1247", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names of the freight forwarders who have handled shipments to \u0027London\u0027 from any origin?", + "sql_context": "CREATE TABLE FreightForwarders (ID INT, Name VARCHAR(50), Country VARCHAR(50)); INSERT INTO FreightForwarders (ID, Name, Country) VALUES (1, \u0027ABC Logistics\u0027, \u0027USA\u0027), (2, \u0027XYZ Shipping\u0027, \u0027Canada\u0027); CREATE TABLE Shipments (ID INT, FreightForwarderID INT, Origin VARCHAR(50), Destination VARCHAR(50)); INSERT INTO Shipments (ID, FreightForwarderID, Origin, Destination) VALUES (1, 1, \u0027Tokyo\u0027, \u0027New York\u0027), (2, 2, \u0027Paris\u0027, \u0027London\u0027);", + "sql": "SELECT FreightForwarders.Name FROM FreightForwarders INNER JOIN Shipments ON FreightForwarders.ID \u003d Shipments.FreightForwarderID WHERE Shipments.Destination \u003d \u0027London\u0027;", + "sql_explanation": "This query joins the FreightForwarders table with the Shipments table on the FreightForwarderID column. It then filters for rows where the Destination is \u0027London\u0027, and returns the Name from the FreightForwarders table for those rows." +}, { + "id": "1493", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average weight of returned goods for each reason, grouped by warehouse?", + "sql_context": "CREATE TABLE Warehouse (id INT, location VARCHAR(255)); INSERT INTO Warehouse (id, location) VALUES (1, \u0027New York\u0027), (2, \u0027Los Angeles\u0027), (3, \u0027Chicago\u0027); CREATE TABLE Returned_Goods (id INT, warehouse_id INT, reason VARCHAR(255), returned_date DATE, weight INT); INSERT INTO Returned_Goods (id, warehouse_id, reason, returned_date, weight) VALUES (1, 1, \u0027Damaged\u0027, \u00272021-01-15\u0027, 50), (2, 2, \u0027Wrong product\u0027, \u00272021-01-20\u0027, 75), (3, 3, \u0027Missing parts\u0027, \u00272021-01-25\u0027, 60);", + "sql": "SELECT rg.reason, w.location, AVG(rg.weight) as avg_weight FROM Returned_Goods rg JOIN Warehouse w ON rg.warehouse_id \u003d w.id GROUP BY rg.reason, w.location;", + "sql_explanation": "The SQL query joins the Returned_Goods and Warehouse tables to get the warehouse location for each returned good. It then groups the records by reason and warehouse location and calculates the average weight of the returned goods for each reason and warehouse location." +}, { + "id": "1924", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of pallets stored in warehouses located in Asia?", + "sql_context": "CREATE TABLE warehouses (id INT, country VARCHAR(255));CREATE TABLE inventory (id INT, warehouse_id INT, pallets INT);INSERT INTO warehouses (id, country) VALUES (1, \u0027China\u0027), (2, \u0027Japan\u0027), (3, \u0027USA\u0027), (4, \u0027Mexico\u0027);INSERT INTO inventory (id, warehouse_id, pallets) VALUES (1, 1, 500), (2, 1, 300), (3, 2, 250), (4, 3, 100);", + "sql": "SELECT SUM(pallets) as total_pallets FROM inventory i INNER JOIN warehouses w ON i.warehouse_id \u003d w.id WHERE w.country IN (\u0027China\u0027, \u0027Japan\u0027);", + "sql_explanation": "This query first joins the inventory and warehouses tables on the warehouse_id and id columns, respectively. Then, it filters the results to only include records where the country column is either \u0027China\u0027 or \u0027Japan\u0027. Finally, it uses the SUM function to calculate the total number of pallets stored in warehouses located in these two countries." +}, { + "id": "3108", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of packages shipped daily from each warehouse?", + "sql_context": "CREATE TABLE Shipments (id INT, warehouse_id INT, shipped_date DATE, packages INT); INSERT INTO Shipments (id, warehouse_id, shipped_date, packages) VALUES (1, 1, \u00272022-01-01\u0027, 50), (2, 1, \u00272022-01-02\u0027, 75), (3, 2, \u00272022-01-03\u0027, 100); CREATE TABLE Warehouses (id INT, name VARCHAR(50), city VARCHAR(50), country VARCHAR(50)); INSERT INTO Warehouses (id, name, city, country) VALUES (1, \u0027Warehouse A\u0027, \u0027City A\u0027, \u0027Country A\u0027), (2, \u0027Warehouse B\u0027, \u0027City B\u0027, \u0027Country B\u0027);", + "sql": "SELECT w.name, AVG(s.packages) FROM Shipments s JOIN Warehouses w ON s.warehouse_id \u003d w.id GROUP BY w.id;", + "sql_explanation": "This query joins the Shipments and Warehouses tables on warehouse_id and calculates the average number of packages shipped daily for each warehouse." +}, { + "id": "253", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the average capacity building expenditure per nonprofit in \u0027region_EU\u0027 over the last year.", + "sql_context": "CREATE TABLE nonprofit (nonprofit_id INT, name VARCHAR(50), region VARCHAR(50)); INSERT INTO nonprofit (nonprofit_id, name, region) VALUES (1, \u0027Greenpeace EU\u0027, \u0027region_EU\u0027); INSERT INTO nonprofit (nonprofit_id, name, region) VALUES (2, \u0027Amnesty International EU\u0027, \u0027region_EU\u0027); CREATE TABLE expenditure (expenditure_id INT, nonprofit_id INT, category VARCHAR(50), amount DECIMAL(10, 2), expenditure_date DATE); INSERT INTO expenditure (expenditure_id, nonprofit_id, category, amount, expenditure_date) VALUES (1, 1, \u0027fundraising\u0027, 2000, \u00272021-01-01\u0027); INSERT INTO expenditure (expenditure_id, nonprofit_id, category, amount, expenditure_date) VALUES (2, 1, \u0027capacity building\u0027, 5000, \u00272021-02-15\u0027);", + "sql": "SELECT AVG(e.amount) as avg_capacity_expenditure FROM expenditure e INNER JOIN nonprofit n ON e.nonprofit_id \u003d n.nonprofit_id WHERE n.region \u003d \u0027region_EU\u0027 AND e.category \u003d \u0027capacity building\u0027 AND e.expenditure_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR);", + "sql_explanation": "This query calculates the average capacity building expenditure per nonprofit in \u0027region_EU\u0027 over the last year. It first joins the \u0027expenditure\u0027 and \u0027nonprofit\u0027 tables on the \u0027nonprofit_id\u0027 field. Then, it filters the records where the \u0027region\u0027 is \u0027region_EU\u0027 and the \u0027category\u0027 is \u0027capacity building\u0027 and the \u0027expenditure_date\u0027 is within the last year. Lastly, it calculates the average of the \u0027amount\u0027 field for the selected records." +}, { + "id": "512", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all volunteers who have volunteered for more than one program in the \u0027VolunteerPrograms\u0027 table.", + "sql_context": "CREATE TABLE VolunteerPrograms (ProgramID INT, ProgramName VARCHAR(50), VolunteerID INT);", + "sql": "SELECT V.FirstName, V.LastName, COUNT(DISTINCT ProgramID) AS ProgramCount FROM VolunteerPrograms VP INNER JOIN Volunteers V ON VP.VolunteerID \u003d V.VolunteerID GROUP BY V.VolunteerID HAVING COUNT(DISTINCT ProgramID) \u003e 1;", + "sql_explanation": "This SQL query lists all volunteers who have volunteered for more than one program in the \u0027VolunteerPrograms\u0027 table. It uses the INNER JOIN clause to combine the \u0027VolunteerPrograms\u0027 table with the \u0027Volunteers\u0027 table. The COUNT() function is used to count the number of unique programs each volunteer has volunteered for, and the HAVING clause filters the results to only include volunteers who have volunteered for more than one program. The GROUP BY clause groups the results by volunteer ID." +}, { + "id": "562", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many volunteers signed up for each program in Q2 2022?", + "sql_context": "CREATE TABLE programs (program_id INT, program_name TEXT); INSERT INTO programs (program_id, program_name) VALUES (1, \u0027Education\u0027), (2, \u0027Healthcare\u0027), (3, \u0027Environment\u0027); CREATE TABLE volunteers (volunteer_id INT, volunteer_name TEXT, program_id INT, volunteer_date DATE); INSERT INTO volunteers (volunteer_id, volunteer_name, program_id, volunteer_date) VALUES (1, \u0027Alex\u0027, 1, \u00272022-04-05\u0027), (2, \u0027Jamie\u0027, 2, \u00272022-05-10\u0027), (3, \u0027Michael\u0027, 1, \u00272022-06-15\u0027), (4, \u0027Oliver\u0027, 3, \u00272022-04-20\u0027), (5, \u0027Samantha\u0027, 2, \u00272022-05-25\u0027);", + "sql": "SELECT p.program_name, COUNT(v.volunteer_id) as num_volunteers FROM volunteers v JOIN programs p ON v.program_id \u003d p.program_id WHERE v.volunteer_date BETWEEN \u00272022-04-01\u0027 AND \u00272022-06-30\u0027 GROUP BY p.program_name;", + "sql_explanation": "The SQL query joins the programs and volunteers tables based on the program_id foreign key. It then filters volunteers who signed up in Q2 2022 and groups them by program_name. Finally, it calculates the number of volunteers for each program." +}, { + "id": "689", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount donated to each non-profit in the last quarter?", + "sql_context": "CREATE TABLE non_profit (id INT, name TEXT); INSERT INTO non_profit (id, name) VALUES (1, \u0027Habitat for Humanity\u0027), (2, \u0027American Red Cross\u0027), (3, \u0027Doctors Without Borders\u0027); CREATE TABLE donations (non_profit_id INT, donation_amount INT, donation_date DATE); INSERT INTO donations (non_profit_id, donation_amount, donation_date) VALUES (1, 500, \u00272022-01-02\u0027), (2, 1000, \u00272022-03-15\u0027), (3, 750, \u00272022-02-28\u0027), (1, 250, \u00272021-12-28\u0027), (2, 800, \u00272021-11-10\u0027);", + "sql": "SELECT n.name, SUM(d.donation_amount) as total_donated FROM non_profit n INNER JOIN donations d ON n.id \u003d d.non_profit_id WHERE d.donation_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 3 MONTH) GROUP BY n.id;", + "sql_explanation": "This query performs an inner join between the non_profit table and the donations table, joining on the non_profit_id. It then filters for records where the donation_date is within the last quarter using the WHERE clause and the DATE_SUB function. Finally, it uses the SUM function to calculate the total amount donated to each non-profit in the last quarter and groups the results by non_profit_id." +}, { + "id": "700", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total donation amount for non-profit organizations focused on \u0027Environment\u0027 in \u0027California\u0027?", + "sql_context": "CREATE TABLE organization_info (name VARCHAR(50), focus VARCHAR(30), location VARCHAR(30)); INSERT INTO organization_info (name, focus, location) VALUES (\u0027Green Earth\u0027, \u0027Environment\u0027, \u0027California\u0027);", + "sql": "SELECT SUM(donation_amount) FROM donations JOIN organization_info ON donations.org_id \u003d organization_info.id WHERE organization_info.focus \u003d \u0027Environment\u0027 AND organization_info.location \u003d \u0027California\u0027;", + "sql_explanation": "This query retrieves the sum of donation_amount from donations table where the associated organization has a focus on \u0027Environment\u0027 and is located in \u0027California\u0027." +}, { + "id": "789", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which cities have the highest total donation amounts in the \u0027donors\u0027 table, joined with their corresponding city information from the \u0027cities\u0027 table?", + "sql_context": "CREATE TABLE donors (donor_id INT, donor_name TEXT, donation_amount DECIMAL, city_id INT); CREATE TABLE cities (city_id INT, city_name TEXT);", + "sql": "SELECT cities.city_name, SUM(donation_amount) as total_donations FROM donors INNER JOIN cities ON donors.city_id \u003d cities.city_id GROUP BY cities.city_name ORDER BY total_donations DESC LIMIT 1;", + "sql_explanation": "This query performs an inner join on the \u0027donors\u0027 and \u0027cities\u0027 tables, using the city_id as the common key. It then calculates the total donation amount for each city using the SUM function, groups the result by city_name, and orders the result in descending order by total donations. The query returns the city with the highest total donation amount." +}, { + "id": "868", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount donated to each organization and the number of unique donors who have donated to each organization?", + "sql_context": "CREATE TABLE Organizations (org_id INT, org_name TEXT); CREATE TABLE Donors (donor_id INT, donor_name TEXT, org_id INT, donation_amount DECIMAL(10,2));", + "sql": "SELECT O.org_name, SUM(D.donation_amount) as total_donations, COUNT(DISTINCT D.donor_id) as total_donors FROM Organizations O INNER JOIN Donors D ON O.org_id \u003d D.org_id GROUP BY O.org_name;", + "sql_explanation": "First, an inner join is performed between the Organizations table and the Donors table, on the org_id field. This creates a new table with all records from both tables where there is a match on org_id. Then, the data is grouped by org_name, and aggregate functions are used to sum the total donations (total_donations) and count the number of unique donors (total_donors) for each organization." +}, { + "id": "961", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all volunteers who have not participated in any capacity building activities in the last 6 months.", + "sql_context": "CREATE TABLE volunteers (id INT, name TEXT); INSERT INTO volunteers (id, name) VALUES (1, \u0027John Doe\u0027), (2, \u0027Jane Smith\u0027), (3, \u0027Alice Johnson\u0027), (4, \u0027Bob Williams\u0027); CREATE TABLE capacity_building (volunteer_id INT, activity_date DATE); INSERT INTO capacity_building (volunteer_id, activity_date) VALUES (1, \u00272021-05-12\u0027), (2, \u00272022-03-15\u0027), (3, \u00272021-12-28\u0027), (1, \u00272020-08-07\u0027), (4, \u00272021-01-02\u0027);", + "sql": "SELECT v.name FROM volunteers v LEFT JOIN capacity_building cb ON v.id \u003d cb.volunteer_id WHERE cb.activity_date IS NULL OR cb.activity_date \u003c DATE_SUB(CURRENT_DATE, INTERVAL 6 MONTH);", + "sql_explanation": "This query performs a left join between the volunteers table and the capacity_building table, joining on the volunteer_id. It then filters for records where there is no matching activity_date in the past 6 months using the WHERE clause and the DATE_SUB function." +}, { + "id": "1290", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of volunteers for each organization, excluding those with less than 50 volunteers?", + "sql_context": "CREATE TABLE organizations (org_id INT, org_name TEXT);CREATE TABLE volunteers (vol_id INT, org_id INT, vol_country TEXT);", + "sql": "SELECT o.org_name, COUNT(v.vol_id) AS total_volunteers FROM organizations o JOIN volunteers v ON o.org_id \u003d v.org_id GROUP BY o.org_name HAVING total_volunteers \u003e\u003d 50;", + "sql_explanation": "This query first joins the organizations and volunteers tables on the org_id column. Then, it groups the results by the org_name column and calculates the number of volunteers (vol_id) for each organization. Finally, it uses the HAVING clause to exclude organizations with less than 50 volunteers." +}, { + "id": "1624", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount donated to each organization in the Southeast region?", + "sql_context": "CREATE TABLE donations (id INT, org_id INT, donation DECIMAL(10,2)); CREATE TABLE organizations (id INT, name TEXT, region TEXT); INSERT INTO donations (id, org_id, donation) VALUES (1, 1, 50.00), (2, 1, 75.00), (3, 2, 100.00), (4, 2, 125.00), (5, 3, 25.00), (6, 3, 50.00); INSERT INTO organizations (id, name, region) VALUES (1, \u0027Habitat for Humanity\u0027, \u0027Southeast\u0027), (2, \u0027Red Cross\u0027, \u0027Southeast\u0027), (3, \u0027UNICEF\u0027, \u0027Northeast\u0027);", + "sql": "SELECT o.name, SUM(d.donation) AS total_donations FROM donations d JOIN organizations o ON d.org_id \u003d o.id WHERE o.region \u003d \u0027Southeast\u0027 GROUP BY o.name;", + "sql_explanation": "This SQL query performs a join between the donations and organizations tables on the org_id column, and then groups the results by the name column from the organizations table. For each group, it calculates the sum of the donation column, which represents the total amount donated to each organization in the Southeast region." +}, { + "id": "2284", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who has donated from \u0027IL\u0027?", + "sql_context": "CREATE TABLE donors_3 (id INT PRIMARY KEY, name VARCHAR(50), age INT, city VARCHAR(50), state VARCHAR(50)); INSERT INTO donors_3 (id, name, age, city, state) VALUES (1, \u0027John Doe\u0027, 35, \u0027New York\u0027, \u0027NY\u0027), (2, \u0027Jane Smith\u0027, 40, \u0027Buffalo\u0027, \u0027NY\u0027), (3, \u0027Mike Johnson\u0027, 50, \u0027Boston\u0027, \u0027MA\u0027), (4, \u0027Emily Davis\u0027, 30, \u0027Chicago\u0027, \u0027IL\u0027);", + "sql": "SELECT donors_3.name FROM donors_3 INNER JOIN donations_5 ON donors_3.id \u003d donations_5.donor_id WHERE donations_5.state \u003d \u0027IL\u0027;", + "sql_explanation": "Join the donors_3 and donations_5 tables on the donor_id. Filter the results to only show records where the state is IL." +}, { + "id": "3354", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which organizations have not received grants?", + "sql_context": "CREATE TABLE organization (id INT PRIMARY KEY, name VARCHAR(255));CREATE TABLE grant (id INT PRIMARY KEY, organization_id INT, foundation_name VARCHAR(255));", + "sql": "SELECT o.name FROM organization o LEFT JOIN grant g ON o.id \u003d g.organization_id WHERE g.id IS NULL;", + "sql_explanation": "The query performs a left join between the organization and grant tables based on the organization_id. It then filters the results where grant.id is NULL, which indicates no grants have been received." +}, { + "id": "503", + "domain": "archeology", + "domain_description": "Detailed records on excavation sites, artifact analysis, historical context, and public outreach in archeology.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of unique artifact types found in excavation sites in Africa, grouped by material.", + "sql_context": "CREATE TABLE excavation_sites (site_name TEXT, location TEXT, start_date DATE, end_date DATE); INSERT INTO excavation_sites (site_name, location, start_date, end_date) VALUES (\u0027Site N\u0027, \u0027Egypt\u0027, \u00273000-01-01\u0027, \u00272000-12-31\u0027); INSERT INTO excavation_sites (site_name, location, start_date, end_date) VALUES (\u0027Site O\u0027, \u0027Morocco\u0027, \u0027200-01-01\u0027, \u00271800-12-31\u0027); CREATE TABLE artifacts (site_name TEXT, artifact_type TEXT, artifact_material TEXT); INSERT INTO artifacts (site_name, artifact_type, artifact_material) VALUES (\u0027Site N\u0027, \u0027Pottery\u0027, \u0027Ceramic\u0027); INSERT INTO artifacts (site_name, artifact_type, artifact_material) VALUES (\u0027Site N\u0027, \u0027Statue\u0027, \u0027Stone\u0027); INSERT INTO artifacts (site_name, artifact_type, artifact_material) VALUES (\u0027Site O\u0027, \u0027Figurine\u0027, \u0027Clay\u0027);", + "sql": "SELECT artifact_material, COUNT(DISTINCT artifact_type) AS unique_types FROM excavation_sites INNER JOIN artifacts ON excavation_sites.site_name \u003d artifacts.site_name WHERE location \u003d \u0027Africa\u0027 GROUP BY artifact_material;", + "sql_explanation": "The SQL query performs an INNER JOIN on the excavation_sites and artifacts tables, filtering for excavation sites in Africa. The query then groups the results by artifact material and calculates the number of unique artifact types for each group." +}, { + "id": "754", + "domain": "archeology", + "domain_description": "Detailed records on excavation sites, artifact analysis, historical context, and public outreach in archeology.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of unique artifact types for each site in the \u0027Southern Region\u0027?", + "sql_context": "CREATE TABLE excavation_sites (site_id INT, site_name TEXT, region TEXT); CREATE TABLE artifacts (artifact_id INT, site_id INT, artifact_type TEXT); INSERT INTO excavation_sites (site_id, site_name, region) VALUES (1, \u0027Site A\u0027, \u0027Southern Region\u0027), (2, \u0027Site B\u0027, \u0027Northern Region\u0027), (3, \u0027Site C\u0027, \u0027Southern Region\u0027); INSERT INTO artifacts (artifact_id, site_id, artifact_type) VALUES (1, 1, \u0027pottery\u0027), (2, 1, \u0027stone\u0027), (3, 2, \u0027metal\u0027), (4, 3, \u0027pottery\u0027), (5, 3, \u0027wooden\u0027), (6, 4, \u0027stone\u0027), (7, 4, \u0027pottery\u0027);", + "sql": "SELECT e.site_name, COUNT(DISTINCT a.artifact_type) as unique_artifact_types FROM excavation_sites e JOIN artifacts a ON e.site_id \u003d a.site_id WHERE e.region \u003d \u0027Southern Region\u0027 GROUP BY e.site_id;", + "sql_explanation": "Join excavation_sites and artifacts tables, filter for Southern Region, group by site_name, count distinct artifact_type, and return unique artifact types for each site." +}, { + "id": "1060", + "domain": "archeology", + "domain_description": "Detailed records on excavation sites, artifact analysis, historical context, and public outreach in archeology.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which artifact categories were most frequently excavated in Egypt?", + "sql_context": "CREATE TABLE excavation_sites (site_id INT, site_name VARCHAR(50), country VARCHAR(50)); INSERT INTO excavation_sites (site_id, site_name, country) VALUES (1, \u0027Site A\u0027, \u0027Egypt\u0027); CREATE TABLE artifacts (artifact_id INT, site_id INT, category VARCHAR(50));", + "sql": "SELECT a.category, COUNT(*) as frequency FROM excavation_sites e JOIN artifacts a ON e.site_id \u003d a.site_id WHERE e.country \u003d \u0027Egypt\u0027 GROUP BY a.category ORDER BY frequency DESC;", + "sql_explanation": "We join excavation_sites and artifacts tables on site_id, then filter for excavation sites in Egypt. After that, we calculate the frequency of each artifact category using GROUP BY and COUNT functions. Finally, we order by frequency." +}, { + "id": "1630", + "domain": "archeology", + "domain_description": "Detailed records on excavation sites, artifact analysis, historical context, and public outreach in archeology.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names and dates of all excavation sites in France and Italy?", + "sql_context": "CREATE TABLE ExcavationSites (SiteID INT, SiteName TEXT, Country TEXT); INSERT INTO ExcavationSites (SiteID, SiteName, Country) VALUES (1, \u0027Pompeii\u0027, \u0027Italy\u0027), (2, \u0027Herculaneum\u0027, \u0027Italy\u0027), (3, \u0027Lutetia\u0027, \u0027France\u0027);", + "sql": "SELECT SiteName, DateFound FROM ExcavationSites INNER JOIN Artifacts ON ExcavationSites.SiteID \u003d Artifacts.SiteID WHERE Country IN (\u0027Italy\u0027, \u0027France\u0027);", + "sql_explanation": "This query performs an inner join on the ExcavationSites and Artifacts tables, filtering for sites located in Italy and France. It retrieves the SiteName from ExcavationSites and the DateFound from Artifacts." +}, { + "id": "1302", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of safety tests performed in the autonomous driving research program?", + "sql_context": "CREATE TABLE SafetyTests (Id INT, TestType VARCHAR(50), VehicleId INT, TestDate DATE); CREATE TABLE AutonomousVehicles (Id INT, Name VARCHAR(100), Program VARCHAR(50)); INSERT INTO SafetyTests (Id, TestType, VehicleId, TestDate) VALUES (1, \u0027Lane Keeping\u0027, 1, \u00272020-01-10\u0027); INSERT INTO SafetyTests (Id, TestType, VehicleId, TestDate) VALUES (2, \u0027Emergency Braking\u0027, 1, \u00272020-01-11\u0027); INSERT INTO AutonomousVehicles (Id, Name, Program) VALUES (1, \u0027AutonomousDriving\u0027, \u0027ResearchProgram\u0027);", + "sql": "SELECT COUNT(*) FROM SafetyTests INNER JOIN AutonomousVehicles ON SafetyTests.VehicleId \u003d AutonomousVehicles.Id WHERE AutonomousVehicles.Program \u003d \u0027ResearchProgram\u0027;", + "sql_explanation": "This query calculates the total number of safety tests performed in the autonomous driving research program by using the COUNT() function and the INNER JOIN clause to combine the data from two tables (SafetyTests and AutonomousVehicles). It filters the records based on the program name using the WHERE clause." +}, { + "id": "1642", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of auto shows attended by a specific manufacturer?", + "sql_context": "CREATE TABLE Auto_Shows (id INT, manufacturer VARCHAR(50), show_name VARCHAR(50), year INT); CREATE TABLE Manufacturers (id INT, name VARCHAR(50));", + "sql": "SELECT COUNT(DISTINCT show_name) FROM Auto_Shows JOIN Manufacturers ON Auto_Shows.manufacturer \u003d Manufacturers.name WHERE Manufacturers.name \u003d \u0027Tesla\u0027;", + "sql_explanation": "This query calculates the total number of auto shows attended by a specific manufacturer by joining the Auto_Shows and Manufacturers tables on the manufacturer column and filtering the results by the name column in the Manufacturers table." +}, { + "id": "1844", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of charging stations for electric vehicles in California and New York?", + "sql_context": "CREATE TABLE charging_stations (station_id INT, station_name VARCHAR(50), location VARCHAR(50), quantity INT); CREATE TABLE electric_vehicles (vehicle_id INT, model VARCHAR(20), manufacture VARCHAR(20), vehicle_type VARCHAR(20), state VARCHAR(20));", + "sql": "SELECT SUM(quantity) FROM charging_stations cs JOIN electric_vehicles ev ON cs.location \u003d ev.state WHERE ev.state IN (\u0027California\u0027, \u0027New York\u0027);", + "sql_explanation": "1. Selects the sum of the quantity of charging stations for electric vehicles in California and New York. 2. Joins the \"charging_stations\" table with the \"electric_vehicles\" table on the state. 3. Filters the results to only include charging stations in California and New York." +}, { + "id": "328", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names and categories of the decentralized applications that have been deployed on the Binance Smart Chain and have had the most number of transactions?", + "sql_context": "CREATE TABLE IF NOT EXISTS decentralized_applications (dapp_id INT PRIMARY KEY, name VARCHAR(100), tx_id INT, category VARCHAR(50), blockchain VARCHAR(50), FOREIGN KEY (tx_id) REFERENCES blockchain_transactions(tx_id)); CREATE TABLE IF NOT EXISTS blockchain_transactions (tx_id INT PRIMARY KEY, blockchain VARCHAR(50)); INSERT INTO blockchain_transactions (tx_id, blockchain) VALUES (1, \u0027Binance Smart Chain\u0027);", + "sql": "SELECT dapp_name, category, COUNT(dapp_id) FROM decentralized_applications da JOIN blockchain_transactions bt ON da.tx_id \u003d bt.tx_id WHERE bt.blockchain \u003d \u0027Binance Smart Chain\u0027 GROUP BY dapp_name, category ORDER BY COUNT(dapp_id) DESC LIMIT 10;", + "sql_explanation": "This query retrieves the names and categories of the decentralized applications that have been deployed on the Binance Smart Chain and have had the most number of transactions. It uses an INNER JOIN to combine data from the decentralized_applications and blockchain_transactions tables based on the tx_id column. Then, it filters the results based on the blockchain column in the blockchain_transactions table. Finally, it groups the results by the name and category columns in the decentralized_applications table, counts the number of transactions for each decentralized application using the COUNT function, and orders the results in descending order. The query then limits the results to the top 10 decentralized applications." +}, { + "id": "867", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average value of digital assets with smart contracts having the word \u0027agreement\u0027 in their name?", + "sql_context": "CREATE TABLE digital_assets (asset_id INT, asset_name VARCHAR(50), value DECIMAL(10,2)); INSERT INTO digital_assets (asset_id, asset_name, value) VALUES (1, \u0027Asset1\u0027, 50.5), (2, \u0027Asset2\u0027, 100.2), (3, \u0027Asset3\u0027, 75.0); CREATE TABLE smart_contracts (contract_id INT, asset_id INT, contract_name VARCHAR(50)); INSERT INTO smart_contracts (contract_id, asset_id, contract_name) VALUES (101, 1, \u0027ContractAgreement1\u0027), (102, 2, \u0027Contract2\u0027), (103, 3, \u0027Contract3\u0027);", + "sql": "SELECT AVG(digital_assets.value) FROM digital_assets INNER JOIN smart_contracts ON digital_assets.asset_id \u003d smart_contracts.asset_id WHERE smart_contracts.contract_name LIKE \u0027%agreement%\u0027;", + "sql_explanation": "Calculate the average value of digital assets associated with smart contracts having \u0027agreement\u0027 in their name." +}, { + "id": "2365", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of smart contracts developed by developers from the US and China?", + "sql_context": "CREATE TABLE developers (id INT, name VARCHAR(50), country VARCHAR(50)); INSERT INTO developers (id, name, country) VALUES (1, \u0027Alice\u0027, \u0027USA\u0027), (2, \u0027Bob\u0027, \u0027China\u0027); CREATE TABLE smart_contracts (id INT, name VARCHAR(50), developer_id INT); INSERT INTO smart_contracts (id, name, developer_id) VALUES (1, \u0027SC1\u0027, 1), (2, \u0027SC2\u0027, 2);", + "sql": "SELECT COUNT(*) FROM smart_contracts sc INNER JOIN developers d ON sc.developer_id \u003d d.id WHERE d.country IN (\u0027USA\u0027, \u0027China\u0027);", + "sql_explanation": "Join the smart_contracts and developers tables based on developer_id. Filter the records to only include developers from the USA and China, then count the number of smart contracts." +}, { + "id": "134", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the federal agencies with the largest budget increases in the last 5 fiscal years.", + "sql_context": "CREATE TABLE agency_budgets (agency_id INT, fiscal_year INT, budget_amount INT);", + "sql": "SELECT agency_id, agency_name, ((MAX(budget_amount) - MIN(budget_amount)) * 100.0 / MIN(budget_amount)) AS budget_increase_percentage FROM agency_budgets JOIN agencies ON agency_budgets.agency_id \u003d agencies.agency_id GROUP BY agency_id, agency_name ORDER BY budget_increase_percentage DESC;", + "sql_explanation": "We first calculate the difference between the maximum and minimum budget amounts for each agency and divide by the minimum budget amount to get the budget increase percentage. Then, we list the federal agencies with the largest budget increases in the last 5 fiscal years." +}, { + "id": "498", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Present the number of public libraries and public schools in each district of Chicago", + "sql_context": "CREATE TABLE libraries (library_id INT, name VARCHAR(255), location VARCHAR(255), district VARCHAR(255)); CREATE TABLE schools (school_id INT, name VARCHAR(255), location VARCHAR(255), district VARCHAR(255)); INSERT INTO libraries (library_id, name, location, district) VALUES (1, \u0027Chicago Public Library\u0027, \u0027400 S State St\u0027, \u0027Downtown Chicago\u0027); INSERT INTO schools (school_id, name, location, district) VALUES (1, \u0027Chicago Public School\u0027, \u0027500 W Adams St\u0027, \u0027Downtown Chicago\u0027);", + "sql": "SELECT libraries.district, COUNT(libraries.library_id) AS libraries_count, COUNT(schools.school_id) AS schools_count FROM libraries INNER JOIN schools ON libraries.district \u003d schools.district GROUP BY libraries.district;", + "sql_explanation": "This query presents the number of public libraries and public schools in each district of Chicago by selecting the records from the libraries and schools table, joining the records on district and grouping the result set by district, returning the district, count of libraries and count of schools columns." +}, { + "id": "785", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average budget for initiatives related to \u0027environment\u0027 for each department?", + "sql_context": "CREATE TABLE department (id INT, name TEXT);CREATE TABLE initiatives (id INT, department_id INT, budget INT, category TEXT);", + "sql": "SELECT department.name, AVG(initiatives.budget) FROM department JOIN initiatives ON department.id \u003d initiatives.department_id WHERE initiatives.category \u003d \u0027environment\u0027 GROUP BY department.name;", + "sql_explanation": "Joins the \"department\" table with the \"initiatives\" table on department_id, then filters the results where the category is \u0027environment\u0027. Groups the results by department name and calculates the average budget for each group." +}, { + "id": "891", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of public transportation vehicles in each city in the state of California, including their type and capacity?", + "sql_context": "CREATE TABLE cities(id INT, name TEXT, state TEXT); INSERT INTO cities VALUES (1, \u0027City A\u0027, \u0027California\u0027); INSERT INTO cities VALUES (2, \u0027City B\u0027, \u0027California\u0027); INSERT INTO cities VALUES (3, \u0027City C\u0027, \u0027California\u0027); CREATE TABLE vehicles(id INT, city_id INT, type TEXT, capacity INT); INSERT INTO vehicles VALUES (1, 1, \u0027Bus\u0027, 50); INSERT INTO vehicles VALUES (2, 1, \u0027Train\u0027, 1000); INSERT INTO vehicles VALUES (3, 2, \u0027Bus\u0027, 40); INSERT INTO vehicles VALUES (4, 2, \u0027Tram\u0027, 300); INSERT INTO vehicles VALUES (5, 3, \u0027Bus\u0027, 60);", + "sql": "SELECT c.name, v.type, COUNT(*) as vehicle_count, SUM(v.capacity) as total_capacity FROM cities c JOIN vehicles v ON c.id \u003d v.city_id WHERE c.state \u003d \u0027California\u0027 GROUP BY c.name, v.type;", + "sql_explanation": "We create two tables: cities and vehicles, both containing the columns id and name. We also include the state column in the cities table and the city_id, type, and capacity columns in the vehicles table. We insert sample records for cities and vehicles in California. Next, we perform a JOIN on these tables using the id column. We group the results by city name and vehicle type, and calculate the vehicle count and total capacity." +}, { + "id": "1755", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 5 cities with the most public libraries?", + "sql_context": "CREATE TABLE cities (id INT, name VARCHAR(255)); CREATE TABLE libraries (id INT, city_id INT, name VARCHAR(255));", + "sql": "SELECT c.name, COUNT(l.id) AS library_count FROM cities c JOIN libraries l ON c.id \u003d l.city_id GROUP BY c.name ORDER BY library_count DESC LIMIT 5;", + "sql_explanation": "This query performs an inner join between the cities and libraries tables on the city_id column. It groups the results by city name and counts the number of libraries for each city. Finally, it orders the results by library count in descending order and returns the top 5 cities." +}, { + "id": "419", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of mental health parity violations and the total number of patients treated for mental health issues in each community?", + "sql_context": "CREATE TABLE ParityViolations (ViolationID int, CommunityID int, ViolationCount int);CREATE TABLE CommunityMentalHealth (CommunityID int, PatientID int);", + "sql": "SELECT CommunityID, SUM(ViolationCount) as TotalViolations, COUNT(PatientID) as PatientCount FROM ParityViolations JOIN CommunityMentalHealth ON ParityViolations.CommunityID \u003d CommunityMentalHealth.CommunityID GROUP BY CommunityID;", + "sql_explanation": "We calculate the total number of mental health parity violations and the total number of patients treated for mental health issues in each community. We use the SUM function to find the total number of violations, the COUNT function to count the number of patients, and the JOIN and GROUP BY clauses to combine and group the results by community." +}, { + "id": "540", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the unique mental health conditions treated by providers in the Midwest and West, along with their total patient count.", + "sql_context": "CREATE TABLE mental_health_conditions (condition_id INT, condition_name VARCHAR(50)); INSERT INTO mental_health_conditions (condition_id, condition_name) VALUES (1, \u0027Anxiety\u0027), (2, \u0027Depression\u0027), (3, \u0027Bipolar Disorder\u0027); CREATE TABLE providers (provider_id INT, provider_name VARCHAR(50), region_id INT); INSERT INTO providers (provider_id, provider_name, region_id) VALUES (1, \u0027Dr. Smith\u0027, 3), (2, \u0027Dr. Johnson\u0027, 5); CREATE TABLE provider_patients (provider_id INT, condition_id INT, patient_id INT);", + "sql": "SELECT mhc.condition_name, COUNT(pp.patient_id) as total_patients FROM mental_health_conditions mhc CROSS JOIN providers p ON mhc.condition_id \u003d pp.condition_id AND p.region_id IN (3, 5) GROUP BY mhc.condition_name;", + "sql_explanation": "This query performs a cross join between the mental_health_conditions and providers tables, filtering for regions 3 (Midwest) and 5 (West). It then groups the results by condition_name and calculates the total patient count for each condition." +}, { + "id": "1127", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify underrepresented communities with no healthcare providers in LA and MS.", + "sql_context": "CREATE TABLE healthcare_providers (provider_id INT, name TEXT, state TEXT); INSERT INTO healthcare_providers (provider_id, name, state) VALUES (1, \u0027Dr. Keisha Brown\u0027, \u0027LA\u0027); CREATE TABLE underrepresented_communities (community TEXT, state TEXT);", + "sql": "SELECT u.community, u.state FROM underrepresented_communities u LEFT JOIN healthcare_providers h ON u.state \u003d h.state WHERE h.provider_id IS NULL AND u.state IN (\u0027LA\u0027, \u0027MS\u0027);", + "sql_explanation": "Use a left join to identify underrepresented communities with no healthcare providers in Louisiana and Mississippi, returning their community names and states." +}, { + "id": "1410", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average mental health score by county for Community Health Workers?", + "sql_context": "CREATE TABLE Counties (CountyID INT, CountyName VARCHAR(50), State VARCHAR(50)); CREATE TABLE CommunityHealthWorkers (CHW_ID INT, CountyID INT, MentalHealthScore INT); INSERT INTO Counties (CountyID, CountyName, State) VALUES (1, \u0027Harris\u0027, \u0027Texas\u0027), (2, \u0027Los Angeles\u0027, \u0027California\u0027); INSERT INTO CommunityHealthWorkers (CHW_ID, CountyID, MentalHealthScore) VALUES (1, 1, 85), (2, 1, 90), (3, 2, 75), (4, 2, 70);", + "sql": "SELECT c.CountyName, AVG(chw.MentalHealthScore) as Avg_Score FROM CommunityHealthWorkers chw JOIN Counties c ON chw.CountyID \u003d c.CountyID GROUP BY c.CountyName;", + "sql_explanation": "This query joins the CommunityHealthWorkers table with the Counties table based on CountyID, then calculates the average MentalHealthScore for Community Health Workers in each county." +}, { + "id": "1423", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of cultural competency training sessions conducted per community health worker by region?", + "sql_context": "CREATE TABLE cultural_competency_training (region VARCHAR(255), sessions INT); CREATE TABLE community_health_workers (region VARCHAR(255), training_level VARCHAR(255), workers INT); INSERT INTO cultural_competency_training (region, sessions) VALUES (\u0027Northeast\u0027, 400), (\u0027Southeast\u0027, 500), (\u0027Midwest\u0027, 350), (\u0027West\u0027, 600); INSERT INTO community_health_workers (region, training_level, workers) VALUES (\u0027Northeast\u0027, \u0027Beginner\u0027, 200), (\u0027Northeast\u0027, \u0027Intermediate\u0027, 150), (\u0027Northeast\u0027, \u0027Advanced\u0027, 100), (\u0027Southeast\u0027, \u0027Beginner\u0027, 250), (\u0027Southeast\u0027, \u0027Intermediate\u0027, 200), (\u0027Southeast\u0027, \u0027Advanced\u0027, 100), (\u0027Midwest\u0027, \u0027Beginner\u0027, 180), (\u0027Midwest\u0027, \u0027Intermediate\u0027, 120), (\u0027Midwest\u0027, \u0027Advanced\u0027, 80), (\u0027West\u0027, \u0027Beginner\u0027, 300), (\u0027West\u0027, \u0027Intermediate\u0027, 250), (\u0027West\u0027, \u0027Advanced\u0027, 150);", + "sql": "SELECT c.region, AVG(c.sessions / c.workers) FROM cultural_competency_training c INNER JOIN community_health_workers h ON c.region \u003d h.region GROUP BY c.region;", + "sql_explanation": "This SQL query calculates the average number of cultural competency training sessions conducted per community health worker by region. It does this by joining the two tables on the region column and then grouping the data by region, averaging the ratio of sessions to workers for each group." +}, { + "id": "1603", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a cross join table for mental health parity and community health workers", + "sql_context": "CREATE TABLE mental_health_parity (id INT PRIMARY KEY, state VARCHAR(2), year INT, accessibility FLOAT, quality FLOAT); CREATE TABLE community_health_workers (id INT PRIMARY KEY, state VARCHAR(2), year INT, workers_per_capita FLOAT, supervision_hours INT);", + "sql": "CREATE TABLE if not exists mental_health_parity_community_workers AS SELECT * FROM mental_health_parity CROSS JOIN community_health_workers WHERE FALSE;", + "sql_explanation": "A new table \u0027mental_health_parity_community_workers\u0027 is created which is a cross join of \u0027mental_health_parity\u0027 and \u0027community_health_workers\u0027 tables using the \u0027CREATE TABLE AS SELECT * FROM table1 CROSS JOIN table2 WHERE FALSE\u0027 pattern." +}, { + "id": "1639", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of mental health parity violations for each cultural competency program?", + "sql_context": "CREATE TABLE cultural_competency_program (id INT, name VARCHAR(50), location VARCHAR(50)); INSERT INTO cultural_competency_program (id, name, location) VALUES (1, \u0027Care for All\u0027, \u0027New York\u0027), (2, \u0027Cultura y Salud\u0027, \u0027California\u0027), (3, \u0027Together We Heal\u0027, \u0027Florida\u0027); CREATE TABLE mental_health_parity_violations (id INT, program_id INT, violation_count INT); INSERT INTO mental_health_parity_violations (id, program_id, violation_count) VALUES (1, 1, 3), (2, 2, 0), (3, 1, 2), (4, 3, 1);", + "sql": "SELECT ccp.name, mhpv.violation_count FROM cultural_competency_program ccp INNER JOIN mental_health_parity_violations mhpv ON ccp.id \u003d mhpv.program_id;", + "sql_explanation": "Join the cultural_competency_program and mental_health_parity_violations tables on their common id column. Select the name of the cultural competency program and the number of mental health parity violations." +}, { + "id": "1759", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of community health workers who identify as LGBTQ+ and work in states that have implemented mental health parity laws?", + "sql_context": "CREATE TABLE community_health_workers (id INT, name VARCHAR(50), ethnicity VARCHAR(50), state VARCHAR(50), lgbtq_identification BOOLEAN); INSERT INTO community_health_workers (id, name, ethnicity, state, lgbtq_identification) VALUES (1, \u0027John Doe\u0027, \u0027Cisgender\u0027, \u0027California\u0027, TRUE), (2, \u0027Jane Smith\u0027, \u0027Transgender\u0027, \u0027New York\u0027, TRUE), (3, \u0027Maria Garcia\u0027, \u0027Cisgender\u0027, \u0027Florida\u0027, FALSE), (4, \u0027Pedro Rodriguez\u0027, \u0027Transgender\u0027, \u0027Texas\u0027, FALSE); CREATE TABLE mental_health_parity_laws (state VARCHAR(50)); INSERT INTO mental_health_parity_laws (state) VALUES (\u0027California\u0027), (\u0027New York\u0027);", + "sql": "SELECT COUNT(*) FROM community_health_workers cw JOIN mental_health_parity_laws mhpl ON cw.state \u003d mhpl.state WHERE cw.lgbtq_identification \u003d TRUE;", + "sql_explanation": "This query counts the total number of community health workers who identify as LGBTQ+ and work in states that have implemented mental health parity laws. It uses a JOIN clause to combine the community_health_workers and mental_health_parity_laws tables, and filters for workers who identify as LGBTQ+ and work in states that have implemented mental health parity laws. It then uses the COUNT function to count the number of workers that meet these criteria." +}, { + "id": "387", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get the details of readers who have not read any article in the last month from \u0027monthlyupdate\u0027 database.", + "sql_context": "CREATE TABLE readers (reader_id INT, last_login DATE); CREATE TABLE article_readers (article_id INT, reader_id INT, read_date DATE); INSERT INTO readers VALUES (1, \u00272022-01-01\u0027); INSERT INTO article_readers VALUES (1, 1, \u00272022-01-02\u0027);", + "sql": "SELECT readers.reader_id, readers.last_login FROM readers LEFT JOIN article_readers ON readers.reader_id \u003d article_readers.reader_id WHERE article_readers.read_date IS NULL AND readers.last_login \u003c DATE_SUB(CURDATE(), INTERVAL 1 MONTH)", + "sql_explanation": "This SQL query gets the details of readers who have not read any article in the last month from \u0027monthlyupdate\u0027 database. It does so by using a LEFT JOIN to combine the \u0027readers\u0027 and \u0027article_readers\u0027 tables, while filtering the data for rows where the \u0027article_readers.read_date\u0027 column is NULL and the \u0027readers.last_login\u0027 column is less than the current date minus 1 month." +}, { + "id": "591", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 3 most read articles in \u0027articles\u0027 table, with an outer join on \u0027read_counts\u0027 table containing read counts for each article.", + "sql_context": "CREATE TABLE articles (article_id INT, author_id INT, title VARCHAR(100), pub_date DATE); CREATE TABLE read_counts (article_id INT, read_count INT);", + "sql": "SELECT articles.title, SUM(read_counts.read_count) FROM articles OUTER JOIN read_counts ON articles.article_id \u003d read_counts.article_id GROUP BY articles.title ORDER BY SUM(read_counts.read_count) DESC LIMIT 3;", + "sql_explanation": "This query identifies the top 3 most read articles by performing an outer join on the \u0027read_counts\u0027 table. It first groups the records by the title column and then calculates the sum of read_count for each group. Finally, it orders the results in descending order and returns the top 3 records." +}, { + "id": "1160", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the ages of investigative journalists in \u0027New York Times\u0027 and \u0027Los Angeles Times\u0027?", + "sql_context": "CREATE TABLE NYT_Investigative(id INT, name VARCHAR(20), age INT, job VARCHAR(20));CREATE TABLE LAT_Investigative(id INT, name VARCHAR(20), age INT, job VARCHAR(20));", + "sql": "SELECT ny.age FROM NYT_Investigative ny JOIN LAT_Investigative lat ON ny.name \u003d lat.name WHERE ny.job \u003d \u0027investigative journalist\u0027 AND lat.job \u003d \u0027investigative journalist\u0027;", + "sql_explanation": "Find the ages of investigative journalists in \u0027New York Times\u0027 and \u0027Los Angeles Times\u0027 by joining the two tables on the \u0027name\u0027 column and filtering for \u0027investigative journalist\u0027 in the \u0027job\u0027 column." +}, { + "id": "2423", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which authors have published more than 5 articles, ordered by their names?", + "sql_context": "CREATE TABLE authors (id INT, name VARCHAR(50), gender VARCHAR(10)); INSERT INTO authors (id, name, gender) VALUES (1, \u0027Juana Doe\u0027, \u0027Female\u0027), (2, \u0027JosÊ Smith\u0027, \u0027Male\u0027); CREATE TABLE articles (id INT, author_id INT, title VARCHAR(100), content TEXT); INSERT INTO articles (id, author_id, title, content) VALUES (1, 1, \u0027Artículo 1\u0027, \u0027Contenido 1\u0027), (2, 1, \u0027Article 2\u0027, \u0027Content 2\u0027), (3, 1, \u0027Article 3\u0027, \u0027Content 3\u0027), (4, 1, \u0027Article 4\u0027, \u0027Content 4\u0027), (5, 1, \u0027Article 5\u0027, \u0027Content 5\u0027), (6, 2, \u0027Article 6\u0027, \u0027Content 6\u0027);", + "sql": "SELECT a.name FROM authors a JOIN articles ar ON a.id \u003d ar.author_id GROUP BY a.name HAVING COUNT(ar.id) \u003e 5 ORDER BY a.name;", + "sql_explanation": "This query joins the authors and articles tables on the author_id column, then groups the results by the name column from the authors table. The HAVING clause is used to filter the results to only include authors who have published more than 5 articles. The results are ordered by the name column from the authors table." +}, { + "id": "2451", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which articles were published in \u0027Philadelphia Inquirer\u0027 and \u0027Seattle Times\u0027 on the same day?", + "sql_context": "CREATE TABLE PI_Articles(id INT, title VARCHAR(50), publication DATE);CREATE TABLE ST_Articles(id INT, title VARCHAR(50), publication DATE);", + "sql": "SELECT pi.title FROM PI_Articles pi JOIN ST_Articles st ON pi.publication \u003d st.publication WHERE pi.publication IS NOT NULL;", + "sql_explanation": "Find articles published in \u0027Philadelphia Inquirer\u0027 and \u0027Seattle Times\u0027 on the same day by joining the two tables on the \u0027publication\u0027 column and filtering for non-NULL values." +}, { + "id": "2959", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which authors have published more than 5 articles?", + "sql_context": "CREATE TABLE authors (id INT, name VARCHAR(50)); INSERT INTO authors (id, name) VALUES (1, \u0027John Doe\u0027), (2, \u0027Jane Smith\u0027); CREATE TABLE articles (id INT, author_id INT, title VARCHAR(100), content TEXT); INSERT INTO articles (id, author_id, title, content) VALUES (1, 1, \u0027Article 1\u0027, \u0027Content 1\u0027), (2, 1, \u0027Article 2\u0027, \u0027Content 2\u0027), (3, 1, \u0027Article 3\u0027, \u0027Content 3\u0027), (4, 1, \u0027Article 4\u0027, \u0027Content 4\u0027), (5, 1, \u0027Article 5\u0027, \u0027Content 5\u0027), (6, 2, \u0027Article 6\u0027, \u0027Content 6\u0027);", + "sql": "SELECT a.name FROM authors a JOIN articles ar ON a.id \u003d ar.author_id GROUP BY a.name HAVING COUNT(ar.id) \u003e 5;", + "sql_explanation": "This query joins the authors and articles tables on the author_id column, then groups the results by the name column from the authors table. The HAVING clause is used to filter the results to only include authors who have published more than 5 articles." +}, { + "id": "3041", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of articles published by each author, with a cross join to the authors table?", + "sql_context": "CREATE TABLE news_articles (article_id INT PRIMARY KEY, title TEXT, topic TEXT, author_id INT, publication_date DATE); CREATE TABLE authors (author_id INT PRIMARY KEY, author_name TEXT);", + "sql": "SELECT a.author_name, COUNT(n.article_id) FROM authors a CROSS JOIN news_articles n GROUP BY a.author_name;", + "sql_explanation": "This query uses a cross join to combine every row in the news_articles table with every row in the authors table, grouping the table by the author_name column and counting the number of rows in each group to provide the number of articles published by each author. Note that this query will likely produce a cartesian product, so it may not be practical for large tables." +}, { + "id": "91", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 10 most active volunteers by total hours in Q2 2022, ordered by their location?", + "sql_context": "CREATE TABLE volunteer_hours (hour_id INT, volunteer_id INT, hours_spent FLOAT, hour_date DATE); INSERT INTO volunteer_hours (hour_id, volunteer_id, hours_spent, hour_date) VALUES (1, 1, 3, \u00272022-04-01\u0027); INSERT INTO volunteer_hours (hour_id, volunteer_id, hours_spent, hour_date) VALUES (2, 2, 5, \u00272022-05-03\u0027);", + "sql": "SELECT v.signup_location, vh.volunteer_id, SUM(vh.hours_spent) AS total_hours FROM volunteer_hours vh INNER JOIN volunteers v ON vh.volunteer_id \u003d v.volunteer_id WHERE EXTRACT(MONTH FROM vh.hour_date) BETWEEN 4 AND 6 GROUP BY v.signup_location, vh.volunteer_id ORDER BY total_hours DESC, v.signup_location;", + "sql_explanation": "This query lists the top 10 most active volunteers by total hours in Q2 2022, ordered by their location. It joins the volunteer_hours and volunteers tables, groups the data by location and volunteer_id, sums the hours_spent, and orders the result by total hours and location." +}, { + "id": "261", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of volunteers in each region and total hours volunteered, from \u0027volunteers\u0027 and \u0027volunteer_hours\u0027 tables", + "sql_context": "CREATE TABLE volunteers (volunteer_id INT, volunteer_name TEXT, region TEXT); CREATE TABLE volunteer_hours (volunteer_id INT, hours_volunteered DECIMAL);", + "sql": "SELECT volunteers.region, COUNT(volunteers.volunteer_id) as num_volunteers, SUM(volunteer_hours.hours_volunteered) as total_hours FROM volunteers LEFT JOIN volunteer_hours ON volunteers.volunteer_id \u003d volunteer_hours.volunteer_id GROUP BY volunteers.region;", + "sql_explanation": "This query performs a LEFT JOIN on the \u0027volunteers\u0027 and \u0027volunteer_hours\u0027 tables, groups by region, and calculates the number of volunteers and total hours volunteered for each region." +}, { + "id": "382", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total financial contribution by each department for Q2 2022?", + "sql_context": "CREATE TABLE departments (department_id INT, department_name TEXT); CREATE TABLE financials (financial_id INT, department_id INT, financial_date DATE, financial_amount FLOAT); INSERT INTO financials (financial_id, department_id, financial_date, financial_amount) VALUES (1, 1, \u00272022-04-01\u0027, 5000.00), (2, 1, \u00272022-06-15\u0027, 3000.00), (3, 2, \u00272022-05-30\u0027, 7000.00);", + "sql": "SELECT department_name, SUM(financial_amount) as total_financial_contribution FROM financials f JOIN departments d ON f.department_id \u003d d.department_id WHERE financial_date BETWEEN \u00272022-04-01\u0027 AND \u00272022-06-30\u0027 GROUP BY department_name;", + "sql_explanation": "This query joins the departments and financials table, filters the records for Q2 2022, and calculates the total financial contribution for each department." +}, { + "id": "385", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total financial contribution by each department for Q1 2023?", + "sql_context": "CREATE TABLE departments (department_id INT, department_name TEXT); CREATE TABLE financials (financial_id INT, department_id INT, financial_date DATE, financial_amount FLOAT); INSERT INTO financials (financial_id, department_id, financial_date, financial_amount) VALUES (1, 1, \u00272023-01-07\u0027, 5000.00), (2, 2, \u00272023-01-20\u0027, 8000.00), (3, 3, \u00272023-01-31\u0027, 3000.00);", + "sql": "SELECT department_name, SUM(financial_amount) as total_financial_contribution FROM financials f JOIN departments d ON f.department_id \u003d d.department_id WHERE financial_date BETWEEN \u00272023-01-01\u0027 AND \u00272023-01-31\u0027 GROUP BY department_name;", + "sql_explanation": "This query joins the departments and financials table, filters the records for Q1 2023, and calculates the total financial contribution for each department." +}, { + "id": "418", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount donated by repeat donors in the last 6 months?", + "sql_context": "CREATE TABLE Donations (DonationID INT, DonorID INT, DonationAmount DECIMAL(10,2), DonationDate DATE); CREATE TABLE Donors (DonorID INT, DonorName VARCHAR(50), FirstDonationDate DATE);", + "sql": "SELECT SUM(Donations.DonationAmount) FROM Donations INNER JOIN Donors ON Donations.DonorID \u003d Donors.DonorID WHERE Donations.DonationDate \u003e\u003d DATEADD(month, -6, GETDATE()) AND Donors.FirstDonationDate \u003c DATEADD(month, -6, GETDATE());", + "sql_explanation": "This query calculates the total amount donated by repeat donors in the last 6 months by using the SUM function on the DonationAmount column, filtering the Donations table to only include rows where the DonationDate is within the last 6 months, and joining with the Donors table to only include donors who made their first donation more than 6 months ago." +}, { + "id": "422", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many volunteers have participated in programs focused on environmental sustainability in the past 12 months?", + "sql_context": "CREATE TABLE volunteers (id INT, name VARCHAR(100), program_id INT, participation_date DATE); CREATE TABLE programs (id INT, name VARCHAR(100), focus_area VARCHAR(50)); INSERT INTO volunteers (id, name, program_id, participation_date) VALUES (1, \u0027Alice Johnson\u0027, 1, \u00272021-02-20\u0027); INSERT INTO volunteers (id, name, program_id, participation_date) VALUES (2, \u0027Bob Williams\u0027, 2, \u00272020-12-01\u0027); INSERT INTO programs (id, name, focus_area) VALUES (1, \u0027Plant a Tree\u0027, \u0027environmental sustainability\u0027); INSERT INTO programs (id, name, focus_area) VALUES (2, \u0027Clean the Beach\u0027, \u0027environmental sustainability\u0027);", + "sql": "SELECT COUNT(DISTINCT volunteers.id) FROM volunteers JOIN programs ON volunteers.program_id \u003d programs.id WHERE programs.focus_area \u003d \u0027environmental sustainability\u0027 AND participation_date \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 12 MONTH);", + "sql_explanation": "This query counts the number of unique volunteers who have participated in environmental sustainability programs in the past 12 months by counting the number of distinct id values in the volunteers table. It filters volunteers based on their participation date and focus area using a JOIN clause and date comparison, which is not shown here." +}, { + "id": "446", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 5 donors by total donation amount, along with their email and the date of their largest donation.", + "sql_context": "CREATE TABLE Donations (DonationID int, DonorID int, Amount decimal(10,2), DonationDate date); INSERT INTO Donations (DonationID, DonorID, Amount, DonationDate) VALUES (1, 1, 500.00, \u00272022-01-01\u0027), (2, 2, 350.00, \u00272022-02-01\u0027), (3, 3, 200.00, \u00272022-03-01\u0027);", + "sql": "SELECT D.Name, D.Email, MAX(D.Amount) as LargestDonation, MAX(D.DonationDate) as LatestDonation FROM Donors D JOIN Donations DD ON D.DonorID \u003d DD.DonorID GROUP BY D.DonorID, D.Name, D.Email ORDER BY MAX(D.Amount) DESC LIMIT 5;", + "sql_explanation": "This query joins the Donors and Donations tables on the DonorID field. It then groups the results by donor and calculates the largest donation amount and date for each donor. Finally, it sorts the results by largest donation amount in descending order and returns the top 5 donors." +}, { + "id": "469", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all programs, their respective managers, and total budget from the \u0027programs\u0027 and \u0027managers\u0027 tables", + "sql_context": "CREATE TABLE programs (program_id INT, program_name TEXT, manager_id INT); CREATE TABLE managers (manager_id INT, manager_name TEXT, budget DECIMAL);", + "sql": "SELECT programs.program_name, managers.manager_name, SUM(managers.budget) as total_budget FROM programs INNER JOIN managers ON programs.manager_id \u003d managers.manager_id GROUP BY programs.program_name, managers.manager_name;", + "sql_explanation": "This query performs an INNER JOIN on the \u0027programs\u0027 and \u0027managers\u0027 tables using the manager_id, then groups by program_name and manager_name to calculate the total budget for each program." +}, { + "id": "806", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount donated by first-time donors from Australia in 2019?", + "sql_context": "CREATE TABLE donations (id INT, donor_id INT, donation_amount DECIMAL(10,2), donation_date DATE); CREATE TABLE donors (id INT, is_first_time_donor BOOLEAN, country VARCHAR(50)); INSERT INTO donations (id, donor_id, donation_amount, donation_date) VALUES (1, 1, 100.00, \u00272019-04-15\u0027); INSERT INTO donors (id, is_first_time_donor, country) VALUES (1, true, \u0027Australia\u0027);", + "sql": "SELECT SUM(donation_amount) FROM donations JOIN donors ON donations.donor_id \u003d donors.id WHERE donors.country \u003d \u0027Australia\u0027 AND donors.is_first_time_donor \u003d true AND YEAR(donation_date) \u003d 2019;", + "sql_explanation": "This query calculates the total donation amount from first-time donors in Australia for the year 2019 by summing the donation_amount field. It filters donors based on their first-time donor status and country using a JOIN clause and date comparison, which is not shown here." +}, { + "id": "1105", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which donors have donated to both the arts and sports categories?", + "sql_context": "CREATE TABLE Donations (DonationID INT, DonorID INT, ProgramID INT); INSERT INTO Donations (DonationID, DonorID, ProgramID) VALUES (1, 1, 1), (2, 1, 2), (3, 2, 3), (4, 2, 4), (5, 3, 5), (6, 3, 6); CREATE TABLE Programs (ProgramID INT, ProgramName TEXT, Category TEXT); INSERT INTO Programs (ProgramID, ProgramName, Category) VALUES (1, \u0027Theater Production\u0027, \u0027Arts\u0027), (2, \u0027Dance Performance\u0027, \u0027Arts\u0027), (3, \u0027Soccer Tournament\u0027, \u0027Sports\u0027), (4, \u0027Basketball Camp\u0027, \u0027Sports\u0027), (5, \u0027Painting Class\u0027, \u0027Arts\u0027), (6, \u0027Writing Workshop\u0027, \u0027Arts\u0027);", + "sql": "SELECT D.DonorID FROM Donations D JOIN Programs P ON D.ProgramID \u003d P.ProgramID WHERE P.Category IN (\u0027Arts\u0027, \u0027Sports\u0027) GROUP BY D.DonorID HAVING COUNT(DISTINCT P.Category) \u003d 2;", + "sql_explanation": "This SQL query identifies donors who have donated to both the arts and sports categories by joining the Donations and Programs tables on ProgramID. It then filters the results to only include programs from the arts and sports categories using the WHERE clause and the IN keyword. Finally, it groups the results by donor and filters the results to only include donors who have donated to programs in both categories using the HAVING clause and the COUNT function." +}, { + "id": "1619", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of volunteers for each program in the \u0027programs\u0027 and \u0027volunteers\u0027 tables?", + "sql_context": "CREATE TABLE programs (program_id INT, program_name TEXT); CREATE TABLE volunteers (volunteer_id INT, volunteer_name TEXT, program_id INT); INSERT INTO programs (program_id, program_name) VALUES (1, \u0027Education\u0027), (2, \u0027Health\u0027), (3, \u0027Environment\u0027); INSERT INTO volunteers (volunteer_id, volunteer_name, program_id) VALUES (1, \u0027John Doe\u0027, 1), (2, \u0027Jane Smith\u0027, 1), (3, \u0027Alice Johnson\u0027, 2);", + "sql": "SELECT p.program_name, COUNT(v.volunteer_id) as num_volunteers FROM programs p JOIN volunteers v ON p.program_id \u003d v.program_id GROUP BY p.program_name;", + "sql_explanation": "The SQL query joins the \u0027programs\u0027 and \u0027volunteers\u0027 tables on the program_id column using the JOIN clause. It then groups the results by program_name using the GROUP BY clause and calculates the total number of volunteers for each program using the COUNT() function." +}, { + "id": "1646", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all programs and their total expenses, ordered by expense", + "sql_context": "CREATE TABLE programs (id INT, name VARCHAR); CREATE TABLE expenses (program_id INT, amount INT)", + "sql": "SELECT p.name, SUM(e.amount) AS total_expense FROM programs p JOIN expenses e ON p.id \u003d e.program_id GROUP BY p.id, p.name ORDER BY total_expense DESC;", + "sql_explanation": "This query performs an inner join between programs (p) and expenses (e) tables on their respective id and program_id columns. It then groups the results by program name and sums the expenses. Finally, it orders the result by total expense in descending order." +}, { + "id": "1687", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many volunteers from underrepresented communities in Canada joined in 2020?", + "sql_context": "CREATE TABLE volunteers (volunteer_id INT, joined_date DATE, underrepresented_community BOOLEAN); INSERT INTO volunteers (volunteer_id, joined_date, underrepresented_community) VALUES (1, \u00272020-05-12\u0027, true), (2, \u00272019-11-04\u0027, false);", + "sql": "SELECT COUNT(*) FROM volunteers WHERE joined_date BETWEEN \u00272020-01-01\u0027 AND \u00272020-12-31\u0027 AND underrepresented_community \u003d true AND country \u003d \u0027Canada\u0027;", + "sql_explanation": "This query counts the number of volunteers who joined in 2020 from underrepresented communities in Canada by filtering the rows in the volunteers table. It checks the joined_date column between \u00272020-01-01\u0027 and \u00272020-12-31\u0027, the underrepresented_community column is true, and the country column is Canada." +}, { + "id": "2171", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average donation amount by donors from the Middle East?", + "sql_context": "CREATE TABLE donations (id INT, donor_id INT, amount DECIMAL); INSERT INTO donations (id, donor_id, amount) VALUES (1, 1, 150.00), (2, 2, 75.00), (3, 1, 250.00), (4, 3, 120.00); CREATE TABLE donors (id INT, name TEXT, region TEXT); INSERT INTO donors (id, name, region) VALUES (1, \u0027Hanin\u0027, \u0027Middle East\u0027), (2, \u0027Oliver\u0027, \u0027Europe\u0027), (3, \u0027Xiaoli\u0027, \u0027Asia\u0027);", + "sql": "SELECT AVG(donations.amount) FROM donations INNER JOIN donors ON donations.donor_id \u003d donors.id WHERE donors.region \u003d \u0027Middle East\u0027;", + "sql_explanation": "The SQL query calculates the average donation amount by donors from the Middle East by joining the donations and donors tables on the donor_id column, and filtering for donors from the Middle East using the WHERE clause. It then calculates the average of the amount column using the AVG function." +}, { + "id": "2270", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many volunteers have registered for each region, and what is the minimum age of volunteers for each region?", + "sql_context": "CREATE TABLE regions (region_id INT, region_name VARCHAR(255)); CREATE TABLE volunteers (volunteer_id INT, volunteer_name VARCHAR(255), age INT, region_id INT); INSERT INTO regions (region_id, region_name) VALUES (1, \u0027North\u0027), (2, \u0027South\u0027), (3, \u0027East\u0027), (4, \u0027West\u0027); INSERT INTO volunteers (volunteer_id, volunteer_name, age, region_id) VALUES (1, \u0027John Doe\u0027, 25, 1), (2, \u0027Jane Smith\u0027, 23, 2), (3, \u0027Bob Johnson\u0027, 30, 3), (4, \u0027Alice Davis\u0027, 28, 4);", + "sql": "SELECT region_id, COUNT(*), MIN(age) FROM volunteers JOIN regions ON volunteers.region_id \u003d regions.region_id GROUP BY region_id;", + "sql_explanation": "The query performs an INNER JOIN between the regions and volunteers table to combine the region_id with the volunteer data. After that, the query calculates the number of volunteers and the minimum age for each region." +}, { + "id": "2482", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total donation amount per donor type?", + "sql_context": "CREATE TABLE donor_type (id INT, type VARCHAR(20)); INSERT INTO donor_type (id, type) VALUES (1, \u0027Individual\u0027), (2, \u0027Corporate\u0027), (3, \u0027Foundation\u0027); CREATE TABLE donations (id INT, donor_id INT, amount DECIMAL(10,2));", + "sql": "SELECT dt.type, SUM(d.amount) as total_donation FROM donations d JOIN donor_type dt ON d.donor_id \u003d dt.id GROUP BY dt.type;", + "sql_explanation": "The SQL query joins the donations table and the donor_type table on the donor_id and id columns respectively. It then calculates the total donation amount for each donor type by grouping the result by the type column and summing the amount column." +}, { + "id": "200", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the average donation amount per program category?", + "sql_context": "CREATE TABLE ProgramCategories (CategoryID INT, CategoryName TEXT, Budget DECIMAL(10, 2)); INSERT INTO ProgramCategories (CategoryID, CategoryName, Budget) VALUES (1, \u0027Education\u0027, 75000.00), (2, \u0027Health\u0027, 100000.00); CREATE TABLE ProgramCategoryDonations (DonationID INT, CategoryID INT, Amount DECIMAL(10, 2)); INSERT INTO ProgramCategoryDonations (DonationID, CategoryID, Amount) VALUES (1, 1, 50.00), (2, 1, 75.00), (3, 2, 100.00);", + "sql": "SELECT ProgramCategories.CategoryName, AVG(ProgramCategoryDonations.Amount) AS AverageDonationAmount FROM ProgramCategories INNER JOIN ProgramCategoryDonations ON ProgramCategories.CategoryID \u003d ProgramCategoryDonations.CategoryID GROUP BY ProgramCategories.CategoryName;", + "sql_explanation": "The SQL query performs an INNER JOIN between the ProgramCategories and ProgramCategoryDonations tables based on CategoryID. Then, it calculates the average donation amount for each program category using the AVG function and GROUP BY clause on the CategoryName column." +}, { + "id": "219", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of the total budget spent for each program in Q2?", + "sql_context": "CREATE TABLE Programs (id INT, program VARCHAR(50), budget DECIMAL(10,2)); INSERT INTO Programs (id, program, budget) VALUES (1, \u0027Feeding America\u0027, 10000.00); CREATE TABLE Donors (id INT, donor_name VARCHAR(50), donation_amount DECIMAL(10,2), donation_date DATE, program_id INT); INSERT INTO Donors (id, donor_name, donation_amount, donation_date, program_id) VALUES (1, \u0027Alex Brown\u0027, 200.00, \u00272021-01-01\u0027, 1);", + "sql": "SELECT program, SUM(budget) as total_budget, SUM(donation_amount) as total_spending, (SUM(donation_amount)/SUM(budget))*100 as percentage_spent FROM Programs p INNER JOIN Donors d ON p.id \u003d d.program_id WHERE EXTRACT(QUARTER FROM donation_date) \u003d 2 GROUP BY program;", + "sql_explanation": "This query joins the Programs and Donors tables on the program_id column, filters the data for donations made in Q2, groups the results by program, calculates the total budget and total spending for each program in Q2, and calculates the percentage of the total budget spent for each program in Q2." +}, { + "id": "364", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the volunteers who have not participated in any activities in the last month for the Health program.", + "sql_context": "CREATE TABLE Volunteers (VolunteerID INT, Name TEXT); CREATE TABLE Activities (ActivityID INT, VolunteerID INT, ProgramID INT, ActivityDate DATE); INSERT INTO Volunteers (VolunteerID, Name) VALUES (1, \u0027Liam Chen\u0027), (2, \u0027Olivia Johnson\u0027), (3, \u0027Ava Martin\u0027), (4, \u0027Ethan Kim\u0027); INSERT INTO Activities (ActivityID, VolunteerID, ProgramID, ActivityDate) VALUES (1, 1, 1, \u00272022-01-01\u0027), (2, 2, 1, \u00272022-02-01\u0027), (3, 3, 2, \u00272022-03-01\u0027), (4, 1, 3, \u00272022-04-01\u0027), (5, 2, 1, \u00272022-05-01\u0027);", + "sql": "SELECT Volunteers.Name FROM Volunteers LEFT JOIN Activities ON Volunteers.VolunteerID \u003d Activities.VolunteerID WHERE Activities.ActivityDate \u003c DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH) AND Activities.ActivityDate IS NULL AND ProgramID \u003d 2;", + "sql_explanation": "This query performs a left join between the Volunteers and Activities tables on the VolunteerID field. It then filters the results to only include volunteers who have not participated in any activities in the last month and are associated with the Health program (ProgramID 2)." +}, { + "id": "559", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total number of volunteer hours for each program in Q3 2020, grouped by city?", + "sql_context": "CREATE TABLE Volunteers (VolunteerID INT, Name TEXT, City TEXT, Hours DECIMAL(10,2)); CREATE TABLE Programs (ProgramID INT, Name TEXT, VolunteerID INT, StartDate DATE); INSERT INTO Volunteers (VolunteerID, Name, City, Hours) VALUES (1, \u0027James Johnson\u0027, \u0027New York\u0027, 20.00), (2, \u0027Natalie Brown\u0027, \u0027Los Angeles\u0027, 25.00), (3, \u0027Michael Davis\u0027, \u0027Chicago\u0027, 30.00); INSERT INTO Programs (ProgramID, Name, VolunteerID, StartDate) VALUES (1, \u0027Tutoring Kids\u0027, 1, \u00272020-07-15\u0027), (2, \u0027Cleaning Beaches\u0027, 2, \u00272020-08-01\u0027), (3, \u0027Planting Trees\u0027, 3, \u00272020-09-10\u0027);", + "sql": "SELECT City, SUM(Hours) as \u0027Total Volunteer Hours\u0027 FROM Volunteers INNER JOIN Programs ON Volunteers.VolunteerID \u003d Programs.VolunteerID WHERE Programs.StartDate BETWEEN \u00272020-07-01\u0027 AND \u00272020-09-30\u0027 GROUP BY City;", + "sql_explanation": "This query retrieves the total number of volunteer hours for each program in Q3 2020, grouped by city by summing the Hours field for each unique city in the Volunteers table where StartDate is in Q3 2020 and joining the Volunteers and Programs table based on VolunteerID." +}, { + "id": "573", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of volunteer hours contributed by a single volunteer in Q3 2025?", + "sql_context": "CREATE TABLE Volunteers (VolunteerID INT, Name TEXT);CREATE TABLE VolunteerHours (HourID INT, VolunteerID INT, Hours DECIMAL(10,2), HourDate DATE);", + "sql": "SELECT V.Name, MAX(VH.Hours) as MaxHours FROM VolunteerHours VH JOIN Volunteers V ON VH.VolunteerID \u003d Volunteers.VolunteerID WHERE VH.HourDate BETWEEN \u00272025-07-01\u0027 AND \u00272025-09-30\u0027 GROUP BY V.VolunteerID, V.Name;", + "sql_explanation": "This query joins the VolunteerHours and Volunteers tables on the VolunteerID column. It then filters the VolunteerHours table to only include records from Q3 2025. Finally, it groups the results by VolunteerID and Name, and calculates the maximum number of hours contributed by a single volunteer." +}, { + "id": "601", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many volunteers engaged with each program in Q4 2021?", + "sql_context": "CREATE TABLE Programs (program_id INT, program_name VARCHAR(50)); CREATE TABLE Volunteer_Hours (volunteer_id INT, program_id INT, hours DECIMAL(5,2), volunteer_date DATE);", + "sql": "SELECT p.program_name, COUNT(DISTINCT v.volunteer_id) FROM Programs p JOIN Volunteer_Hours v ON p.program_id \u003d v.program_id WHERE v.volunteer_date BETWEEN \u00272021-10-01\u0027 AND \u00272021-12-31\u0027 GROUP BY p.program_name;", + "sql_explanation": "This query calculates the number of unique volunteers who engaged with each program in Q4 2021. The query first joins the Programs table with the Volunteer_Hours table on the program_id, and then filters the records based on the volunteer_date. The query calculates the count of distinct volunteer_id for each program using the GROUP BY clause." +}, { + "id": "633", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of volunteers who have contributed more than 50 hours?", + "sql_context": "CREATE TABLE Volunteers (VolunteerID INT, VolunteerName TEXT, Program TEXT); INSERT INTO Volunteers (VolunteerID, VolunteerName, Program) VALUES (1, \u0027Alice\u0027, \u0027Education\u0027), (2, \u0027Bob\u0027, \u0027Health\u0027), (3, \u0027Charlie\u0027, \u0027Education\u0027); CREATE TABLE VolunteerHours (VolunteerID INT, Hours INT); INSERT INTO VolunteerHours (VolunteerID, Hours) VALUES (1, 120), (1, 130), (2, 80), (3, 110), (3, 120);", + "sql": "SELECT COUNT(DISTINCT Volunteers.VolunteerID) FROM Volunteers JOIN VolunteerHours ON Volunteers.VolunteerID \u003d VolunteerHours.VolunteerID GROUP BY Volunteers.VolunteerID HAVING SUM(VolunteerHours.Hours) \u003e 50;", + "sql_explanation": "The SQL query first joins the Volunteers and VolunteerHours tables on the VolunteerID. Next, it groups the results by volunteer using the GROUP BY clause. The HAVING clause is then used to filter for volunteers who have contributed more than 50 hours. Finally, the COUNT function is used to count the number of distinct volunteers." +}, { + "id": "634", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all programs with their respective total volunteer hours in 2020.", + "sql_context": "CREATE TABLE Programs (ProgramID INT, ProgramName TEXT); CREATE TABLE VolunteerHours (VolunteerID INT, ProgramID INT, Hours FLOAT, VolunteerDate DATE); INSERT INTO Programs (ProgramID, ProgramName) VALUES (1, \u0027Feeding Program\u0027), (2, \u0027Education Support\u0027); INSERT INTO VolunteerHours (VolunteerID, ProgramID, Hours, VolunteerDate) VALUES (1, 1, 5, \u00272020-01-01\u0027), (2, 1, 7, \u00272020-01-02\u0027);", + "sql": "SELECT Programs.ProgramName, SUM(VolunteerHours.Hours) FROM Programs INNER JOIN VolunteerHours ON Programs.ProgramID \u003d VolunteerHours.ProgramID WHERE YEAR(VolunteerDate) \u003d 2020 GROUP BY Programs.ProgramName;", + "sql_explanation": "This query calculates the total volunteer hours for each program in the year 2020. It joins the Programs and VolunteerHours tables based on the ProgramID field. Then, it filters the records based on the VolunteerDate field and groups them by the ProgramName. Finally, it calculates the sum of Hours for each group." +}, { + "id": "866", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the programs and the number of unique volunteers who participated in each program in Q1 2022.", + "sql_context": "CREATE TABLE Programs (ProgramID int, Name varchar(50), Budget money); CREATE TABLE Volunteers (VolunteerID int, Name varchar(50), Age int, ProgramID int); INSERT INTO Programs (ProgramID, Name, Budget) VALUES (1, \u0027Education\u0027, 10000), (2, \u0027Healthcare\u0027, 15000); INSERT INTO Volunteers (VolunteerID, Name, Age, ProgramID) VALUES (1, \u0027Alice\u0027, 25, 1), (2, \u0027Bob\u0027, 22, 1), (3, \u0027Charlie\u0027, 30, 2), (4, \u0027David\u0027, 28, 2);", + "sql": "SELECT P.Name, COUNT(DISTINCT V.VolunteerID) as UniqueVolunteers FROM Programs P JOIN Volunteers V ON P.ProgramID \u003d V.ProgramID WHERE MONTH(V.VolunteerDate) BETWEEN 1 AND 3 GROUP BY P.Name;", + "sql_explanation": "The SQL query lists the programs and the number of unique volunteers who participated in each program in Q1 2022. It does so by joining the Programs and Volunteers tables based on the ProgramID and then filtering the records based on the volunteer date." +}, { + "id": "896", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many volunteers have donated more than $50 in Australia?", + "sql_context": "CREATE TABLE Donations (id INT, donor_name VARCHAR(50), donation_amount DECIMAL(10,2), donation_date DATE, country VARCHAR(50)); CREATE TABLE Volunteers (id INT, volunteer_name VARCHAR(50), country VARCHAR(50)); INSERT INTO Donations (id, donor_name, donation_amount, donation_date, country) VALUES (1, \u0027John Doe\u0027, 50.00, \u00272021-01-01\u0027, \u0027Australia\u0027); INSERT INTO Donations (id, donor_name, donation_amount, donation_date, country) VALUES (2, \u0027Jane Smith\u0027, 25.00, \u00272021-01-02\u0027, \u0027Australia\u0027); INSERT INTO Volunteers (id, volunteer_name, country) VALUES (1, \u0027John Doe\u0027, \u0027Australia\u0027); INSERT INTO Volunteers (id, volunteer_name, country) VALUES (2, \u0027Jane Smith\u0027, \u0027Australia\u0027);", + "sql": "SELECT COUNT(DISTINCT d.donor_name) as num_volunteers FROM Donations d INNER JOIN Volunteers v ON d.donor_name \u003d v.volunteer_name WHERE d.country \u003d \u0027Australia\u0027 AND d.donation_amount \u003e 50;", + "sql_explanation": "This query calculates the number of volunteers who have donated more than $50 in Australia by joining the Donations and Volunteers tables on the donor_name and volunteer_name columns. It then filters the results to only include donations from Australia with an amount greater than $50 and counts the number of distinct donors." +}, { + "id": "1370", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount donated by each donor and their respective city?", + "sql_context": "CREATE TABLE Donors (id INT, donor_name VARCHAR(255), city VARCHAR(255), email VARCHAR(255), address VARCHAR(255)); CREATE TABLE Donations (id INT, donor_id INT, donation_amount DECIMAL(10,2), donation_date DATE);", + "sql": "SELECT d.donor_name, d.city, SUM(don.donation_amount) as total_donated FROM Donors d INNER JOIN Donations don ON d.id \u003d don.donor_id GROUP BY d.donor_name, d.city;", + "sql_explanation": "We select the donor_name, city from the Donors table and sum the total donation_amount from the Donations table by using the INNER JOIN and aggregate function. We group the result by donor_name and city." +}, { + "id": "1599", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the volunteers who have not yet completed their required training?", + "sql_context": "CREATE TABLE volunteer_training (id INT, volunteer_id INT, training_completed BOOLEAN);", + "sql": "SELECT first_name, last_name FROM volunteers JOIN volunteer_training ON volunteers.id \u003d volunteer_training.volunteer_id WHERE training_completed \u003d FALSE;", + "sql_explanation": "This SQL query selects the first and last names of the volunteers who have not yet completed their required training by joining the volunteers and volunteer_training tables on the volunteer_id column, then filtering the results by training_completed \u003d FALSE." +}, { + "id": "1651", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total budget allocated for programs starting in the month of January?", + "sql_context": "CREATE TABLE ProgramBudget (ProgramID INT, Month INT, Budget DECIMAL(10,2)); CREATE TABLE Programs (ProgramID INT, ProgramName TEXT);", + "sql": "SELECT SUM(ProgramBudget.Budget) FROM ProgramBudget INNER JOIN Programs ON ProgramBudget.ProgramID \u003d Programs.ProgramID WHERE ProgramBudget.Month \u003d 1;", + "sql_explanation": "This query calculates the total budget allocated for programs starting in the month of January. It filters records for the month of January using the WHERE clause and the Month column. Finally, it calculates the sum of Budget for all records." +}, { + "id": "2426", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total budget for the \u0027Education\u0027 program?", + "sql_context": "CREATE TABLE budgets (id INT, program_id INT, budget_amount DECIMAL(10,2)); INSERT INTO budgets (id, program_id, budget_amount) VALUES (1, 1, 1000.00), (2, 2, 2000.00), (3, 3, 1500.00), (4, 4, 500.00); CREATE TABLE programs (id INT, name VARCHAR(255)); INSERT INTO programs (id, name) VALUES (1, \u0027Education\u0027), (2, \u0027Health\u0027), (3, \u0027Environment\u0027), (4, \u0027Arts\u0027);", + "sql": "SELECT SUM(b.budget_amount) as total_budget FROM budgets b JOIN programs p ON b.program_id \u003d p.id WHERE p.name \u003d \u0027Education\u0027;", + "sql_explanation": "This query joins the budgets and programs tables, filters the budgets by the \u0027Education\u0027 program, calculates the total budget for the program, and returns the result." +}, { + "id": "3301", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the volunteers who have not yet donated?", + "sql_context": "CREATE TABLE Donors (DonorID INT, Name TEXT, TotalDonation FLOAT); INSERT INTO Donors (DonorID, Name, TotalDonation) VALUES (1, \u0027John Doe\u0027, 500.00), (2, \u0027Jane Smith\u0027, 750.00); CREATE TABLE Volunteers (VolunteerID INT, Name TEXT, DonorID INT); INSERT INTO Volunteers (VolunteerID, Name, DonorID) VALUES (1, \u0027Alice\u0027, 1), (2, \u0027Bob\u0027, NULL);", + "sql": "SELECT v.Name FROM Volunteers v LEFT JOIN Donors d ON v.DonorID \u003d d.DonorID WHERE d.DonorID IS NULL;", + "sql_explanation": "This query performs a left join between the Volunteers and Donors tables on the DonorID column and filters for volunteers who do not have a matching donor record." +}, { + "id": "58", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 5 countries with the most ethical AI research initiatives, considering both quantity and diversity of projects?", + "sql_context": "CREATE TABLE countries (id INT, name VARCHAR(50)); CREATE TABLE ethical_ai_initiatives (id INT, country_id INT, initiatives INT, diversity_score INT); INSERT INTO countries (id, name) VALUES (1, \u0027USA\u0027), (2, \u0027China\u0027), (3, \u0027UK\u0027), (4, \u0027Germany\u0027), (5, \u0027Canada\u0027), (6, \u0027India\u0027); INSERT INTO ethical_ai_initiatives (id, country_id, initiatives, diversity_score) VALUES (1, 1, 200, 80), (2, 1, 300, 70), (3, 2, 400, 60), (4, 3, 100, 90), (5, 3, 200, 85), (6, 4, 150, 80), (7, 4, 250, 90), (8, 5, 100, 75), (9, 6, 300, 65);", + "sql": "SELECT countries.name, SUM(ethical_ai_initiatives.initiatives) as total_initiatives, AVG(ethical_ai_initiatives.diversity_score) as avg_diversity_score FROM countries INNER JOIN ethical_ai_initiatives ON countries.id \u003d ethical_ai_initiatives.country_id GROUP BY countries.name ORDER BY total_initiatives DESC, avg_diversity_score DESC LIMIT 5;", + "sql_explanation": "This query identifies the top 5 countries with the most ethical AI research initiatives, considering both quantity and diversity of projects, by performing an INNER JOIN on the countries and ethical_ai_initiatives tables, grouping by the country name, summing the number of initiatives in each country, calculating the average diversity score, and ordering the results in descending order based on total initiatives and diversity score, limiting the output to the top 5." +}, { + "id": "514", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names and types of assistive technologies that have been distributed in underrepresented communities in the last 5 years?", + "sql_context": "CREATE TABLE assistive_tech (id INT, name VARCHAR(255), type VARCHAR(255), distribution_date DATE); CREATE TABLE communities (id INT, name VARCHAR(255), region VARCHAR(255));", + "sql": "SELECT assistive_tech.name, assistive_tech.type FROM assistive_tech INNER JOIN communities ON assistive_tech.distribution_date \u003e\u003d DATE_SUB(CURRENT_DATE(), INTERVAL 5 YEAR) WHERE communities.region \u003d \u0027Underrepresented\u0027;", + "sql_explanation": "This query retrieves the names and types of assistive technologies that have been distributed in underrepresented communities in the last 5 years. It joins the assistive_tech and communities tables using the distribution date and filters the results by the region using a WHERE clause." +}, { + "id": "551", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total funding received by organizations that have implemented digital divide initiatives, broken down by the type of funding (government or private)?", + "sql_context": "CREATE TABLE funding (funding_id INT, org_id INT, amount INT, funding_type VARCHAR(50)); INSERT INTO funding (funding_id, org_id, amount, funding_type) VALUES (1, 1, 100000, \u0027government\u0027), (2, 1, 200000, \u0027private\u0027), (3, 2, 150000, \u0027private\u0027); CREATE TABLE organizations (org_id INT, name VARCHAR(50), implemented_digital_divide_initiatives BOOLEAN); INSERT INTO organizations (org_id, name, implemented_digital_divide_initiatives) VALUES (1, \u0027Tech for Good Inc.\u0027, TRUE), (2, \u0027Social Impact Corp.\u0027, FALSE);", + "sql": "SELECT implemented_digital_divide_initiatives, funding_type, SUM(amount) FROM funding INNER JOIN organizations ON funding.org_id \u003d organizations.org_id GROUP BY implemented_digital_divide_initiatives, funding_type;", + "sql_explanation": "This query calculates the total funding received by organizations that have implemented digital divide initiatives, broken down by the type of funding (government or private). It performs an inner join between the funding and organizations tables on the org_id column, and then groups the result by the implemented_digital_divide_initiatives and funding_type columns. For each group, it calculates the sum of the amount column to get the total funding received." +}, { + "id": "599", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the percentage of digital divide funding received by organizations in the last year, partitioned by organization size.", + "sql_context": "CREATE TABLE org_size (org_name VARCHAR(50), size VARCHAR(50), funding INT); INSERT INTO org_size (org_name, size, funding) VALUES (\u0027Tech4Good\u0027, \u0027Small\u0027, 20000), (\u0027AI4People\u0027, \u0027Medium\u0027, 30000), (\u0027EthicsMatter\u0027, \u0027Large\u0027, 40000), (\u0027AI4Future\u0027, \u0027Small\u0027, 22000), (\u0027GoodAI\u0027, \u0027Medium\u0027, 35000); CREATE TABLE funding_data (org_name VARCHAR(50), year INT, funding INT); INSERT INTO funding_data (org_name, year, funding) VALUES (\u0027Tech4Good\u0027, 2021, 25000), (\u0027AI4People\u0027, 2021, 32000), (\u0027EthicsMatter\u0027, 2021, 45000), (\u0027AI4Future\u0027, 2021, 27000), (\u0027GoodAI\u0027, 2021, 38000);", + "sql": "SELECT o.org_name, o.size, (o.funding * 100.0 / f.funding) as percentage FROM org_size o JOIN funding_data f ON o.org_name \u003d f.org_name WHERE f.year \u003d 2021 GROUP BY o.org_name, o.size ORDER BY percentage DESC;", + "sql_explanation": "Calculate the percentage of digital divide funding received by organizations in the last year, partitioned by organization size by joining the two tables and dividing the funding for each organization by the total funding for each organization size. Order the results in descending order by the percentage." +}, { + "id": "1671", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total funding received by organizations that have implemented digital divide initiatives?", + "sql_context": "CREATE TABLE funding (funding_id INT, org_id INT, amount INT); INSERT INTO funding (funding_id, org_id, amount) VALUES (1, 1, 100000), (2, 1, 200000), (3, 2, 150000); CREATE TABLE organizations (org_id INT, name VARCHAR(50), implemented_digital_divide_initiatives BOOLEAN); INSERT INTO organizations (org_id, name, implemented_digital_divide_initiatives) VALUES (1, \u0027Tech for Good Inc.\u0027, TRUE), (2, \u0027Social Impact Corp.\u0027, FALSE);", + "sql": "SELECT SUM(amount) FROM funding INNER JOIN organizations ON funding.org_id \u003d organizations.org_id WHERE implemented_digital_divide_initiatives \u003d TRUE;", + "sql_explanation": "This query calculates the total funding received by organizations that have implemented digital divide initiatives. It performs an inner join between the funding and organizations tables on the org_id column, and then filters the result to only include rows where the implemented_digital_divide_initiatives column is TRUE. Finally, it calculates the sum of the amount column to get the total funding received by these organizations." +}, { + "id": "1673", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of accessible technology costs by gender?", + "sql_context": "CREATE TABLE people (id INT, gender VARCHAR(50), technology_id INT);", + "sql": "SELECT gender, AVG(cost) as avg_cost FROM people p INNER JOIN technology t ON p.technology_id \u003d t.id WHERE t.accessibility_rating \u003e 6 GROUP BY gender;", + "sql_explanation": "This query first joins the people and technology tables on the technology_id column. It then filters the results where the accessibility rating of the technology is greater than 6. Lastly, it calculates the average cost of the technology for each gender." +}, { + "id": "2811", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which cities have hosted conferences on ethical AI?", + "sql_context": "CREATE TABLE conferences (id INT PRIMARY KEY, name VARCHAR(255), city VARCHAR(255), start_date DATE, end_date DATE); INSERT INTO conferences (id, name, city, start_date, end_date) VALUES (1, \u0027Ethical AI Summit\u0027, \u0027San Francisco\u0027, \u00272022-06-01\u0027, \u00272022-06-03\u0027); INSERT INTO conferences (id, name, city, start_date, end_date) VALUES (2, \u0027Climate Change Tech Conference\u0027, \u0027Vancouver\u0027, \u00272022-07-01\u0027, \u00272022-07-02\u0027); INSERT INTO conferences (id, name, city, start_date, end_date) VALUES (3, \u0027Accessibility in Tech Conference\u0027, \u0027Toronto\u0027, \u00272022-08-01\u0027, \u00272022-08-03\u0027); INSERT INTO conferences (id, name, city, start_date, end_date) VALUES (4, \u0027Ethical AI Conference\u0027, \u0027New York\u0027, \u00272022-09-01\u0027, \u00272022-09-03\u0027); INSERT INTO conferences (id, name, city, start_date, end_date) VALUES (5, \u0027AI for Social Good Summit\u0027, \u0027London\u0027, \u00272022-10-01\u0027, \u00272022-10-03\u0027); CREATE TABLE ethical_ai_topics (id INT PRIMARY KEY, conference_id INT, title VARCHAR(255)); INSERT INTO ethical_ai_topics (id, conference_id, title) VALUES (1, 1, \u0027Ethical AI in Healthcare\u0027); INSERT INTO ethical_ai_topics (id, conference_id, title) VALUES (2, 4, \u0027Ethical AI for Climate Change\u0027); INSERT INTO ethical_ai_topics (id, conference_id, title) VALUES (3, 5, \u0027Ethical AI in Education\u0027);", + "sql": "SELECT DISTINCT city FROM conferences JOIN ethical_ai_topics ON conferences.id \u003d ethical_ai_topics.conference_id;", + "sql_explanation": "This query retrieves the cities that have hosted conferences on ethical AI by joining the conferences and ethical_ai_topics tables and using the DISTINCT keyword to filter the results based on the city column in the conferences table." +}, { + "id": "1074", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total crop yield and production cost for each crop variety in the \"crops\" and \"expenses\" tables?", + "sql_context": "CREATE TABLE crops (id INT, crop_name VARCHAR(50), yield INT); CREATE TABLE expenses (id INT, crop_id INT, cost INT);", + "sql": "SELECT crops.crop_name, SUM(crops.yield) AS total_yield, SUM(expenses.cost) AS total_cost FROM crops INNER JOIN expenses ON crops.id \u003d expenses.crop_id GROUP BY crops.crop_name;", + "sql_explanation": "This query retrieves the total crop yield and production cost for each crop variety in the \"crops\" and \"expenses\" tables by performing an inner join on the \"id\" column of the \"crops\" table and the \"crop_id\" column of the \"expenses\" table and grouping the results by crop name using the GROUP BY clause. The total crop yield is calculated using the SUM() function on the \"yield\" column of the \"crops\" table and the total production cost is calculated using the SUM() function on the \"cost\" column of the \"expenses\" table." +}, { + "id": "1142", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total area of organic farmland in each province of Canada in 2016.", + "sql_context": "CREATE TABLE canadian_provinces (province_name TEXT, province_abbr TEXT); INSERT INTO canadian_provinces (province_name, province_abbr) VALUES (\u0027Alberta\u0027, \u0027AB\u0027), (\u0027British Columbia\u0027, \u0027BC\u0027); CREATE TABLE organic_farmland (country TEXT, province TEXT, area INTEGER, year INTEGER); INSERT INTO organic_farmland (country, province, area, year) VALUES (\u0027CA\u0027, \u0027AB\u0027, 120000, 2016), (\u0027CA\u0027, \u0027BC\u0027, 150000, 2016);", + "sql": "SELECT province, SUM(area) FROM organic_farmland JOIN canadian_provinces ON organic_farmland.province \u003d canadian_provinces.province_abbr WHERE year \u003d 2016 GROUP BY province;", + "sql_explanation": "Joins the organic_farmland and canadian_provinces tables, filters for 2016, groups by province, and calculates the total organic farmland for each province." +}, { + "id": "2203", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which farmers in the Southwest have organic certifications and what are their names?", + "sql_context": "CREATE TABLE farmers (id INT PRIMARY KEY, name VARCHAR(50), age INT, ethnicity VARCHAR(20), region VARCHAR(25)); CREATE TABLE certifications (id INT PRIMARY KEY, farmer_id INT, name VARCHAR(50), authority VARCHAR(50)); INSERT INTO farmers (id, name, age, ethnicity, region) VALUES (1, \u0027Jose Hernandez\u0027, 50, \u0027Latinx\u0027, \u0027Southwest\u0027); INSERT INTO certifications (id, farmer_id, name, authority) VALUES (1, 1, \u0027Organic\u0027, \u0027USDA\u0027);", + "sql": "SELECT f.name FROM farmers f INNER JOIN certifications c ON f.id \u003d c.farmer_id WHERE f.region \u003d \u0027Southwest\u0027 AND c.name \u003d \u0027Organic\u0027;", + "sql_explanation": "This SQL query joins the farmers and certifications tables using an inner join on the farmer_id column. It then filters for farmers from the Southwest with organic certifications and retrieves their names." +}, { + "id": "2317", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which farmers in California have irrigation systems installed?", + "sql_context": "CREATE TABLE Farmers (id INT PRIMARY KEY, name VARCHAR(50), age INT, location VARCHAR(50)); CREATE TABLE Irrigation (id INT PRIMARY KEY, system VARCHAR(50), cost FLOAT, installation_date DATE, farm_id INT, FOREIGN KEY (farm_id) REFERENCES Farmers(id)); INSERT INTO Farmers (id, name, age, location) VALUES (1, \u0027Juan Garcia\u0027, 45, \u0027Texas\u0027), (2, \u0027Maria Rodriguez\u0027, 50, \u0027California\u0027), (3, \u0027Raul Sanchez\u0027, 35, \u0027California\u0027), (4, \u0027Emma White\u0027, 60, \u0027Oregon\u0027), (5, \u0027Liam Green\u0027, 28, \u0027Washington\u0027); INSERT INTO Irrigation (id, system, cost, installation_date, farm_id) VALUES (1, \u0027Drip\u0027, 5000.00, \u00272021-06-01\u0027, 1), (2, \u0027Sprinkler\u0027, 7000.00, \u00272022-02-15\u0027, 2);", + "sql": "SELECT Farmers.name FROM Farmers INNER JOIN Irrigation ON Farmers.id \u003d Irrigation.farm_id WHERE Farmers.location \u003d \u0027California\u0027;", + "sql_explanation": "This query selects the names of farmers from the Farmers table who have a corresponding record in the Irrigation table and are located in California, indicating that they have an irrigation system installed." +}, { + "id": "2428", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum yield for crops grown by farmers in India since 2020?", + "sql_context": "CREATE TABLE Farmers (id INT PRIMARY KEY, name VARCHAR(50), age INT, location VARCHAR(50)); INSERT INTO Farmers (id, name, age, location) VALUES (1, \u0027Ramesh Patel\u0027, 45, \u0027India\u0027); INSERT INTO Farmers (id, name, age, location) VALUES (2, \u0027Sita Devi\u0027, 50, \u0027Nepal\u0027); CREATE TABLE Crops (id INT PRIMARY KEY, farmer_id INT, crop_name VARCHAR(50), yield INT, year INT); INSERT INTO Crops (id, farmer_id, crop_name, yield, year) VALUES (1, 1, \u0027Rice\u0027, 800, 2021); INSERT INTO Crops (id, farmer_id, crop_name, yield, year) VALUES (2, 1, \u0027Wheat\u0027, 600, 2020);", + "sql": "SELECT MAX(yield) FROM Crops JOIN Farmers ON Crops.farmer_id \u003d Farmers.id WHERE Farmers.location \u003d \u0027India\u0027 AND year \u003e\u003d 2020;", + "sql_explanation": "Find the maximum yield from crops grown by Indian farmers since 2020." +}, { + "id": "2828", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum yield of soybean crops by country?", + "sql_context": "CREATE TABLE Country (id INT, name VARCHAR(255)); INSERT INTO Country (id, name) VALUES (1, \u0027Canada\u0027), (2, \u0027Mexico\u0027), (3, \u0027Brazil\u0027); CREATE TABLE Crop (id INT, name VARCHAR(255), country_id INT, yield INT); INSERT INTO Crop (id, name, country_id, yield) VALUES (1, \u0027Soybean\u0027, 1, 35), (2, \u0027Cotton\u0027, 2, 25), (3, \u0027Soybean\u0027, 3, 45);", + "sql": "SELECT MAX(Crop.yield) FROM Crop INNER JOIN Country ON Crop.country_id \u003d Country.id WHERE Crop.name \u003d \u0027Soybean\u0027;", + "sql_explanation": "This query retrieves the maximum yield of soybean crops by country by joining the Country and Crop tables on their country_id columns and filtering for soybean crops." +}, { + "id": "232", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of aquaculture farms in each country in the AquacultureFarms table that have a sustainable seafood certification in the SeafoodCertifications table?", + "sql_context": "CREATE TABLE AquacultureFarms (Country VARCHAR(50), FarmName VARCHAR(50)); INSERT INTO AquacultureFarms (Country, FarmName) VALUES (\u0027Norway\u0027, \u0027FjordFarms\u0027), (\u0027Mexico\u0027, \u0027CoastalCultures\u0027), (\u0027Chile\u0027, \u0027PatagoniaPonds\u0027); CREATE TABLE SeafoodCertifications (Country VARCHAR(50), Certification BOOLEAN); INSERT INTO SeafoodCertifications (Country, Certification) VALUES (\u0027Norway\u0027, TRUE), (\u0027Canada\u0027, FALSE), (\u0027Chile\u0027, TRUE);", + "sql": "SELECT AquacultureFarms.Country, COUNT(AquacultureFarms.Country) FROM AquacultureFarms INNER JOIN SeafoodCertifications ON AquacultureFarms.Country \u003d SeafoodCertifications.Country WHERE SeafoodCertifications.Certification \u003d TRUE GROUP BY AquacultureFarms.Country;", + "sql_explanation": "This query performs an INNER JOIN on the AquacultureFarms and SeafoodCertifications tables using the Country column. It then uses the WHERE clause to filter the results for rows where the Certification value is TRUE. Finally, it uses the COUNT() function and GROUP BY clause to count the number of aquaculture farms in each country that have a sustainable seafood certification." +}, { + "id": "426", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average dissolved oxygen level per week for each aquafarm in the Atlantic region?", + "sql_context": "CREATE TABLE aquafarms (id INT, name TEXT, location TEXT); INSERT INTO aquafarms (id, name, location) VALUES (1, \u0027Farm A\u0027, \u0027Atlantic\u0027), (2, \u0027Farm B\u0027, \u0027Pacific\u0027); CREATE TABLE oxygen_data (aquafarm_id INT, timestamp TIMESTAMP, oxygen_level FLOAT);", + "sql": "SELECT aquafarm_id, AVG(oxygen_level) AS avg_oxygen_level, EXTRACT(WEEK FROM timestamp) AS week FROM oxygen_data JOIN aquafarms ON oxygen_data.aquafarm_id \u003d aquafarms.id WHERE location LIKE \u0027Atlantic%\u0027 GROUP BY aquafarm_id, week;", + "sql_explanation": "This SQL query calculates the average dissolved oxygen level per week for each aquafarm located in the Atlantic region. It does so by joining the \u0027oxygen_data\u0027 table with the \u0027aquafarms\u0027 table on the \u0027aquafarm_id\u0027 column. It then filters for aquafarms located in the Atlantic region, extracts the week from the timestamp column, and calculates the average oxygen level for each aquafarm\u0027s unique combination of \u0027aquafarm_id\u0027 and week." +}, { + "id": "594", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of seafood harvested from sustainable aquatic farms in North America?", + "sql_context": "CREATE TABLE aquatic_farms (id INT, name TEXT, country TEXT, sustainable BOOLEAN); CREATE TABLE harvests (id INT, farm_id INT, quantity INT); INSERT INTO aquatic_farms (id, name, country, sustainable) VALUES (1, \u0027Farm A\u0027, \u0027Canada\u0027, TRUE), (2, \u0027Farm B\u0027, \u0027Canada\u0027, FALSE), (3, \u0027Farm C\u0027, \u0027US\u0027, TRUE), (4, \u0027Farm D\u0027, \u0027US\u0027, TRUE); INSERT INTO harvests (id, farm_id, quantity) VALUES (1, 1, 500), (2, 1, 700), (3, 3, 800), (4, 3, 900), (5, 4, 600);", + "sql": "SELECT SUM(harvests.quantity) FROM harvests JOIN aquatic_farms ON harvests.farm_id \u003d aquatic_farms.id WHERE aquatic_farms.sustainable \u003d TRUE AND aquatic_farms.country \u003d \u0027Canada\u0027 OR aquatic_farms.country \u003d \u0027US\u0027;", + "sql_explanation": "The SQL query calculates the total quantity (SUM(harvests.quantity)) of seafood harvested from sustainable aquatic farms (WHERE aquatic_farms.sustainable \u003d TRUE) in North America (AND aquatic_farms.country \u003d \u0027Canada\u0027 OR aquatic_farms.country \u003d \u0027US\u0027). It does so by joining the aquatic_farms and harvests tables based on their shared id and farm_id columns." +}, { + "id": "906", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total carbon footprint for each aquafarm in the Mediterranean region.", + "sql_context": "CREATE TABLE aquafarms (id INT, name VARCHAR(50), region VARCHAR(50)); CREATE TABLE carbon_emissions (aquafarm_id INT, year INT, carbon_footprint FLOAT); INSERT INTO aquafarms VALUES (1, \u0027Mediterranean Farm 1\u0027, \u0027Mediterranean\u0027), (2, \u0027Mediterranean Farm 2\u0027, \u0027Mediterranean\u0027), (3, \u0027North Sea Farm 1\u0027, \u0027North Sea\u0027); INSERT INTO carbon_emissions VALUES (1, 2021, 500), (1, 2022, 600), (2, 2021, 700), (2, 2022, 800);", + "sql": "SELECT a.name, SUM(ce.carbon_footprint) as total_carbon_footprint FROM aquafarms a INNER JOIN carbon_emissions ce ON a.id \u003d ce.aquafarm_id WHERE a.region \u003d \u0027Mediterranean\u0027 GROUP BY a.id;", + "sql_explanation": "We join the \u0027aquafarms\u0027 and \u0027carbon_emissions\u0027 tables on the \u0027aquafarm_id\u0027 column. Then, we filter the records for the Mediterranean region using the WHERE clause and the \u0027region\u0027 column. Afterward, we group the records by aquafarm and calculate the total carbon footprint." +}, { + "id": "909", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average survival rate of Tilapia in our aquatic farms, grouped by farm state?", + "sql_context": "CREATE TABLE FarmLocations (LocationID int, FarmName varchar(50), City varchar(50), State varchar(50), Country varchar(50)); INSERT INTO FarmLocations (LocationID, FarmName, City, State, Country) VALUES (1, \u0027Farm A\u0027, \u0027Los Angeles\u0027, \u0027California\u0027, \u0027USA\u0027), (2, \u0027Farm B\u0027, \u0027Toronto\u0027, \u0027Ontario\u0027, \u0027Canada\u0027); CREATE TABLE FishStock (StockID int, FarmLocationID int, FishSpecies varchar(50), Quantity int, SurvivalRate float); INSERT INTO FishStock (StockID, FarmLocationID, FishSpecies, Quantity, SurvivalRate) VALUES (1, 1, \u0027Salmon\u0027, 1000, 0.85), (2, 1, \u0027Tilapia\u0027, 2000, 0.92), (3, 2, \u0027Salmon\u0027, 1500, 0.88), (4, 2, \u0027Tilapia\u0027, 2500, 0.90);", + "sql": "SELECT fl.State, AVG(fs.SurvivalRate) as Avg_SurvivalRate FROM FishStock fs JOIN FarmLocations fl ON fs.FarmLocationID \u003d fl.LocationID WHERE fs.FishSpecies \u003d \u0027Tilapia\u0027 GROUP BY fl.State;", + "sql_explanation": "The SQL query calculates the average survival rate of Tilapia in our aquatic farms by joining the FishStock and FarmLocations tables on FarmLocationID. It then filters the records to only Tilapia, groups the results by the state, and calculates the average survival rate for each state using the AVG function." +}, { + "id": "969", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum water temperature for coldwater fish species in January?", + "sql_context": "CREATE TABLE fish_species (id INT, name VARCHAR(255), species_type VARCHAR(255)); INSERT INTO fish_species (id, name, species_type) VALUES (1, \u0027Salmon\u0027, \u0027Coldwater\u0027), (2, \u0027Tilapia\u0027, \u0027Tropical\u0027); CREATE TABLE temperature_data (id INT, fish_id INT, record_date DATE, water_temp DECIMAL(5,2)); INSERT INTO temperature_data (id, fish_id, record_date, water_temp) VALUES (1, 1, \u00272022-01-01\u0027, 5.2), (2, 1, \u00272022-01-15\u0027, 4.9), (3, 2, \u00272022-01-01\u0027, 25.1), (4, 2, \u00272022-01-15\u0027, 25.6);", + "sql": "SELECT MIN(water_temp) FROM temperature_data JOIN fish_species ON temperature_data.fish_id \u003d fish_species.id WHERE fish_species.species_type \u003d \u0027Coldwater\u0027 AND MONTH(record_date) \u003d 1;", + "sql_explanation": "Join the two tables and filter for coldwater species and records from January. Calculate the minimum water temperature." +}, { + "id": "990", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average water temperature for tropical fish species in March?", + "sql_context": "CREATE TABLE fish_species (id INT, name VARCHAR(255), species_type VARCHAR(255)); INSERT INTO fish_species (id, name, species_type) VALUES (1, \u0027Clownfish\u0027, \u0027Tropical\u0027), (2, \u0027Salmon\u0027, \u0027Coldwater\u0027); CREATE TABLE temperature_data (id INT, fish_id INT, record_date DATE, water_temp DECIMAL(5,2)); INSERT INTO temperature_data (id, fish_id, record_date, water_temp) VALUES (1, 1, \u00272022-03-01\u0027, 27.5), (2, 1, \u00272022-03-15\u0027, 28.2), (3, 2, \u00272022-03-01\u0027, 8.3), (4, 2, \u00272022-03-15\u0027, 8.9);", + "sql": "SELECT AVG(water_temp) FROM temperature_data JOIN fish_species ON temperature_data.fish_id \u003d fish_species.id WHERE fish_species.species_type \u003d \u0027Tropical\u0027 AND MONTH(record_date) \u003d 3;", + "sql_explanation": "Join the two tables and filter for tropical species and records from March. Calculate the average water temperature." +}, { + "id": "1583", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of farms by country and water type?", + "sql_context": "CREATE TABLE Country ( id INT PRIMARY KEY, name VARCHAR(50) ); CREATE TABLE Farm ( id INT PRIMARY KEY, country_id INT, water_type VARCHAR(50), FOREIGN KEY (country_id) REFERENCES Country(id) );", + "sql": "SELECT Country.name, Farm.water_type, COUNT(Farm.id) FROM Country INNER JOIN Farm ON Country.id \u003d Farm.country_id GROUP BY Country.name, Farm.water_type;", + "sql_explanation": "The SQL query joins the Country and Farm tables using an inner join on the country_id column. It then calculates the number of farms for each country and water type by grouping the results by Country.name and Farm.water_type and counting the number of Farm.id for each group." +}, { + "id": "1925", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "For how many fish species has feed been provided in the Arctic region with a dissolved oxygen level greater than 8?", + "sql_context": "CREATE TABLE Feed (FeedID INT, StockID INT, FeedType VARCHAR(50), Quantity INT, FeedDate DATE, Location VARCHAR(50), DissolvedOxygen FLOAT); INSERT INTO Feed (FeedID, StockID, FeedType, Quantity, FeedDate, Location, DissolvedOxygen) VALUES (1, 1, \u0027Organic\u0027, 120, \u00272021-01-01\u0027, \u0027Arctic\u0027, 8.2); INSERT INTO Feed (FeedID, StockID, FeedType, Quantity, FeedDate, Location, DissolvedOxygen) VALUES (2, 2, \u0027Commercial\u0027, 90, \u00272021-02-15\u0027, \u0027Arctic\u0027, 7.8);", + "sql": "SELECT COUNT(DISTINCT Species) FROM FishStock fs JOIN Feed f ON fs.StockID \u003d f.StockID WHERE f.Location \u003d \u0027Arctic\u0027 AND f.DissolvedOxygen \u003e 8;", + "sql_explanation": "Count the number of distinct fish species that have been provided feed in the Arctic region with a dissolved oxygen level greater than 8." +}, { + "id": "2433", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many fish were harvested from marine fish farms in each country in the last month?", + "sql_context": "CREATE TABLE country_codes (country TEXT, code TEXT); INSERT INTO country_codes (country, code) VALUES (\u0027Norway\u0027, \u0027NO\u0027), (\u0027Canada\u0027, \u0027CA\u0027); CREATE TABLE fish_harvest (id INT, name TEXT, type TEXT, location TEXT, harvest_quantity INT, country TEXT); INSERT INTO fish_harvest (id, name, type, location, harvest_quantity, country) VALUES (1, \u0027Farm O\u0027, \u0027Fish\u0027, \u0027Norway\u0027, 1000, \u0027NO\u0027), (2, \u0027Farm P\u0027, \u0027Fish\u0027, \u0027Canada\u0027, 1500, \u0027CA\u0027);", + "sql": "SELECT country, COUNT(*) FROM fish_harvest JOIN country_codes ON fish_harvest.country \u003d country_codes.code GROUP BY country;", + "sql_explanation": "The query lists the number of fish harvested from marine fish farms in each country in the last month by joining the fish_harvest and country_codes tables on the country column, then grouping by country and counting the number of rows in each group." +}, { + "id": "2761", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average dissolved oxygen level in the Atlantic Ocean by region?", + "sql_context": "CREATE TABLE ocean_regions (region VARCHAR(255), id INTEGER); INSERT INTO ocean_regions (region, id) VALUES (\u0027North Atlantic\u0027, 1), (\u0027South Atlantic\u0027, 2); CREATE TABLE dissolved_oxygen (region_id INTEGER, value FLOAT);", + "sql": "SELECT o.region, AVG(d.value) FROM dissolved_oxygen d JOIN ocean_regions o ON d.region_id \u003d o.id GROUP BY o.region;", + "sql_explanation": "This query joins the ocean_regions and dissolved_oxygen tables using the region_id foreign key. Then, it calculates the average value for each region in the dissolved_oxygen table, grouped by the region in the ocean_regions table." +}, { + "id": "2873", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total biomass of fish in each aquaculture farm?", + "sql_context": "CREATE TABLE FarmStocks (FarmID INT, Species VARCHAR(20), Biomass FLOAT); CREATE TABLE Farms (FarmID INT, FarmName VARCHAR(50)); INSERT INTO Farms (FarmID, FarmName) VALUES (1, \u0027Farm A\u0027), (2, \u0027Farm B\u0027); INSERT INTO FarmStocks (FarmID, Species, Biomass) VALUES (1, \u0027Tilapia\u0027, 1500), (1, \u0027Salmon\u0027, 2000), (2, \u0027Trout\u0027, 2500), (2, \u0027Tilapia\u0027, 1200);", + "sql": "SELECT FarmName, SUM(Biomass) FROM FarmStocks JOIN Farms ON FarmStocks.FarmID \u003d Farms.FarmID GROUP BY FarmName;", + "sql_explanation": "Join \u0027FarmStocks\u0027 and \u0027Farms\u0027 tables based on FarmID, calculate the total biomass for each farm." +}, { + "id": "2913", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many times has each farm experienced a disease outbreak?", + "sql_context": "CREATE TABLE Farm (FarmID INT, FarmName VARCHAR(50), FishSpecies VARCHAR(50)); INSERT INTO Farm (FarmID, FarmName, FishSpecies) VALUES (1, \u0027Farm A\u0027, \u0027Salmon\u0027); INSERT INTO Farm (FarmID, FarmName, FishSpecies) VALUES (2, \u0027Farm B\u0027, \u0027Tilapia\u0027); CREATE TABLE Disease (DiseaseID INT, DiseaseName VARCHAR(50), DiseaseImpact FLOAT, FarmID INT); INSERT INTO Disease (DiseaseID, DiseaseName, DiseaseImpact, FarmID) VALUES (1, \u0027Bacterial Infection\u0027, 0.35, 1); INSERT INTO Disease (DiseaseID, DiseaseName, DiseaseImpact, FarmID) VALUES (2, \u0027Fungal Infection\u0027, 0.25, 2);", + "sql": "SELECT Farm.FarmID, COUNT(*) FROM Farm INNER JOIN Disease ON Farm.FarmID \u003d Disease.FarmID GROUP BY Farm.FarmID;", + "sql_explanation": "The query counts the number of records in the Disease table for each farm in the Farm table." +}, { + "id": "602", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the number of participants in each sustainable tourism initiative in Japan.", + "sql_context": "CREATE TABLE initiatives (initiative_id INT, name TEXT, country TEXT); CREATE TABLE participants (initiative_id INT, participant_id INT); INSERT INTO initiatives VALUES (1, \u0027Green Trails\u0027, \u0027Japan\u0027), (2, \u0027Eco-friendly Cruises\u0027, \u0027Japan\u0027), (3, \u0027Sustainable Tourism Festival\u0027, \u0027Japan\u0027); INSERT INTO participants VALUES (1, 1), (1, 2), (2, 3), (2, 4), (3, 5), (3, 6), (3, 7);", + "sql": "SELECT initiatives.name, COUNT(participants.participant_id) FROM initiatives INNER JOIN participants ON initiatives.initiative_id \u003d participants.initiative_id WHERE country \u003d \u0027Japan\u0027 GROUP BY initiatives.name;", + "sql_explanation": "This query identifies the number of participants in each sustainable tourism initiative in Japan by performing an inner join on the two tables (initiatives and participants) based on the initiative_id. It then filters the results to only show records with country \u0027Japan\u0027 and groups the results by the name of the initiatives. The COUNT function is used to count the number of participants for each initiative." +}, { + "id": "1011", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many local businesses in Spain have benefited from virtual tourism initiatives?", + "sql_context": "CREATE TABLE businesses (business_id INT, business_name TEXT, country TEXT); CREATE TABLE virtual_tourism (business_id INT, initiative_type TEXT); INSERT INTO businesses (business_id, business_name, country) VALUES (1, \u0027Tapas Bar\u0027, \u0027Spain\u0027), (2, \u0027Museum\u0027, \u0027Spain\u0027), (3, \u0027Winery\u0027, \u0027USA\u0027); INSERT INTO virtual_tourism (business_id, initiative_type) VALUES (1, \u0027Virtual Tour\u0027), (2, \u0027Online Exhibition\u0027), (3, \u0027Webinar\u0027);", + "sql": "SELECT COUNT(DISTINCT businesses.business_id) FROM businesses INNER JOIN virtual_tourism ON businesses.business_id \u003d virtual_tourism.business_id WHERE businesses.country \u003d \u0027Spain\u0027;", + "sql_explanation": "This SQL query calculates the number of local businesses in Spain that have benefited from virtual tourism initiatives. It first performs an inner join between the \u0027businesses\u0027 and \u0027virtual_tourism\u0027 tables, based on the \u0027business_id\u0027 column. Then, it filters the results to only include rows where the country is \u0027Spain\u0027. Finally, it calculates the count of distinct \u0027business_id\u0027 values." +}, { + "id": "1438", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of hotel energy consumption in New York by type?", + "sql_context": "CREATE TABLE hotels (hotel_id INT, hotel_name TEXT, city TEXT, type TEXT); INSERT INTO hotels (hotel_id, hotel_name, city, type) VALUES (1, \u0027Hotel A\u0027, \u0027New York\u0027, \u0027Luxury\u0027), (2, \u0027Hotel B\u0027, \u0027New York\u0027, \u0027Budget\u0027), (3, \u0027Hotel C\u0027, \u0027New York\u0027, \u0027Boutique\u0027); CREATE TABLE energy_consumption (hotel_id INT, consumption FLOAT); INSERT INTO energy_consumption (hotel_id, consumption) VALUES (1, 1500), (2, 1200), (3, 1000);", + "sql": "SELECT type, AVG(consumption) FROM hotels INNER JOIN energy_consumption ON hotels.hotel_id \u003d energy_consumption.hotel_id WHERE city \u003d \u0027New York\u0027 GROUP BY type;", + "sql_explanation": "This query returns the average hotel energy consumption in New York by type. It does so by joining the hotels and energy_consumption tables on the hotel_id column and then filtering the results by the city column, limiting the results to only those with \u0027New York\u0027 as the value. The results are then grouped by the type column and the average of the consumption column is calculated." +}, { + "id": "1996", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many cultural heritage sites are associated with each hotel?", + "sql_context": "CREATE TABLE hotel_city (hotel_id INT, city TEXT); INSERT INTO hotel_city (hotel_id, city) VALUES (1, \u0027Miami\u0027), (2, \u0027Costa Rica\u0027);", + "sql": "SELECT hi.hotel_id, COUNT(ch.site_id) FROM hotel_info hi LEFT JOIN cultural_heritage ch ON hi.hotel_id \u003d ch.hotel_id GROUP BY hi.hotel_id;", + "sql_explanation": "Count the number of cultural heritage sites associated with each hotel by joining hotel_info and cultural_heritage tables and grouping by hotel_id." +}, { + "id": "133", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the average production figures for oil and gas in the Gulf of Mexico region for the year 2020?", + "sql_context": "CREATE TABLE gulf_oil_production (year INT, region VARCHAR(50), oil_production FLOAT);CREATE TABLE gulf_gas_production (year INT, region VARCHAR(50), gas_production FLOAT); INSERT INTO gulf_oil_production VALUES (2020, \u0027Gulf of Mexico\u0027, 1200000); INSERT INTO gulf_oil_production VALUES (2020, \u0027Gulf of Mexico\u0027, 1100000); INSERT INTO gulf_gas_production VALUES (2020, \u0027Gulf of Mexico\u0027, 2300000); INSERT INTO gulf_gas_production VALUES (2020, \u0027Gulf of Mexico\u0027, 2500000);", + "sql": "SELECT AVG(oil_production) AS avg_oil_production, AVG(gas_production) AS avg_gas_production FROM gulf_oil_production INNER JOIN gulf_gas_production ON gulf_oil_production.year \u003d gulf_gas_production.year WHERE gulf_oil_production.region \u003d \u0027Gulf of Mexico\u0027 AND gulf_oil_production.year \u003d 2020;", + "sql_explanation": "This query calculates the average oil and gas production in the Gulf of Mexico region for the year 2020. It does so by performing an inner join on the gulf_oil_production and gulf_gas_production tables based on the year. Then, it filters the records to only include the year 2020 and the Gulf of Mexico region, and finally calculates the average production values." +}, { + "id": "1049", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the production change for each field between Q1 and Q2 2019", + "sql_context": "CREATE TABLE quarterly_production (id INT, field_name VARCHAR(50), quarter INT, qty FLOAT); INSERT INTO quarterly_production (id, field_name, quarter, qty) VALUES (1, \u0027Galkynysh\u0027, 1, 50000); INSERT INTO quarterly_production (id, field_name, quarter, qty) VALUES (2, \u0027Galkynysh\u0027, 2, 55000); INSERT INTO quarterly_production (id, field_name, quarter, qty) VALUES (3, \u0027Samotlor\u0027, 1, 60000); INSERT INTO quarterly_production (id, field_name, quarter, qty) VALUES (4, \u0027Samotlor\u0027, 2, 58000);", + "sql": "SELECT a.field_name, (b.qty - a.qty) as q1_q2_change FROM quarterly_production a JOIN quarterly_production b ON a.field_name \u003d b.field_name WHERE a.quarter \u003d 1 AND b.quarter \u003d 2;", + "sql_explanation": "This query performs a self-join on the quarterly_production table, using the field_name as the common column. The query then calculates the production change between Q1 and Q2 for each field by subtracting the quantity of Q1 from the quantity of Q2." +}, { + "id": "1417", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List operators with wells in the Southern region and their total production", + "sql_context": "CREATE TABLE wells (id INT, operator VARCHAR(50), region VARCHAR(50)); INSERT INTO wells (id, operator, region) VALUES (1, \u0027ABC Oil\u0027, \u0027Northern\u0027), (2, \u0027XYZ Energy\u0027, \u0027Southern\u0027), (3, \u0027 DEF Petroleum\u0027, \u0027Western\u0027); CREATE TABLE production (well_id INT, type VARCHAR(10), region VARCHAR(50), quantity INT); INSERT INTO production (well_id, type, region, quantity) VALUES (1, \u0027Oil\u0027, \u0027Northern\u0027, 1000), (2, \u0027Gas\u0027, \u0027Southern\u0027, 2000), (3, \u0027Oil\u0027, \u0027Southern\u0027, 3000);", + "sql": "SELECT w.operator, SUM(p.quantity) as total_production FROM wells w INNER JOIN production p ON w.id \u003d p.well_id WHERE w.region \u003d \u0027Southern\u0027 GROUP BY w.operator;", + "sql_explanation": "This SQL query joins the wells and production tables, filters for the Southern region, and calculates the total production for each operator in that region." +}, { + "id": "3198", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average production quantity for wells in the North Sea", + "sql_context": "CREATE TABLE production (well_id INT, date DATE, quantity FLOAT); INSERT INTO production (well_id, date, quantity) VALUES (1, \u00272021-01-01\u0027, 100.0), (1, \u00272021-01-02\u0027, 120.0), (2, \u00272021-01-01\u0027, 150.0);", + "sql": "SELECT AVG(quantity) FROM production p JOIN wells w ON p.well_id \u003d w.id WHERE w.location \u003d \u0027North Sea\u0027;", + "sql_explanation": "This query calculates the average production quantity for wells in the North Sea by joining the production and wells tables." +}, { + "id": "298", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of properties with inclusive housing policies in each neighborhood, ordered from highest to lowest.", + "sql_context": "CREATE TABLE Neighborhoods (NeighborhoodID INT, NeighborhoodName VARCHAR(255)); CREATE TABLE Properties (PropertyID INT, NeighborhoodID INT, InclusiveHousing BOOLEAN);", + "sql": "SELECT NeighborhoodName, COUNT(*) AS InclusivePropertiesCount FROM Properties JOIN Neighborhoods ON Properties.NeighborhoodID \u003d Neighborhoods.NeighborhoodID WHERE InclusiveHousing \u003d 1 GROUP BY NeighborhoodName ORDER BY InclusivePropertiesCount DESC;", + "sql_explanation": "This query calculates the number of properties with inclusive housing policies in each neighborhood, ordered from highest to lowest. It joins the Properties table with the Neighborhoods table based on the NeighborhoodID, filters for properties with inclusive housing policies, and groups the results by neighborhood to calculate the number of inclusive properties. The results are ordered by the number of inclusive properties, from highest to lowest." +}, { + "id": "372", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average property price for each neighborhood in the last 6 months?", + "sql_context": "CREATE TABLE Neighborhoods (NeighborhoodID INT, NeighborhoodName VARCHAR(255)); CREATE TABLE Properties (PropertyID INT, NeighborhoodID INT, Sold DATE, PropertyPrice INT);", + "sql": "SELECT NeighborhoodName, AVG(PropertyPrice) AS AvgPropertyPrice FROM Properties JOIN Neighborhoods ON Properties.NeighborhoodID \u003d Neighborhoods.NeighborhoodID WHERE Sold \u003e\u003d DATEADD(month, -6, CURRENT_TIMESTAMP) GROUP BY NeighborhoodName;", + "sql_explanation": "This query calculates the average property price for each neighborhood in the last 6 months. It joins the Properties table with the Neighborhoods table based on the NeighborhoodID, filters for properties sold in the last 6 months, and groups the results by neighborhood to calculate the average property price. The results are grouped by neighborhood." +}, { + "id": "405", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the address of properties with a mortgage that starts in 2021 and ends in 2026?\u0027", + "sql_context": "CREATE TABLE Mortgage (Id INT PRIMARY KEY, MortgageStartDate DATE, MortgageEndDate DATE, PropertyId INT, FOREIGN KEY (PropertyId) REFERENCES Property(Id));", + "sql": "SELECT Property.Address FROM Property INNER JOIN Mortgage ON Property.Id \u003d Mortgage.PropertyId WHERE Mortgage.MortgageStartDate BETWEEN \u00272021-01-01\u0027 AND \u00272021-12-31\u0027 AND Mortgage.MortgageEndDate BETWEEN \u00272026-01-01\u0027 AND \u00272026-12-31\u0027;", + "sql_explanation": "Joining the Property and Mortgage tables on the PropertyId, this query selects the Address from the Property table where the MortgageStartDate is between January 1, 2021 and December 31, 2021 and the MortgageEndDate is between January 1, 2026 and December 31, 2026. This would provide information about properties with a mortgage that starts in 2021 and ends in 2026." +}, { + "id": "505", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names of property owners who co-own a property in Canada?", + "sql_context": "CREATE TABLE Property (id INT, address VARCHAR(255), city VARCHAR(255), state VARCHAR(255), country VARCHAR(255), price DECIMAL(10,2), size INT, sustainable_practices BOOLEAN, coop_owned BOOLEAN); CREATE TABLE PropertyOwner (id INT, property_id INT, owner_name VARCHAR(255), owner_email VARCHAR(255), owner_phone VARCHAR(20)); CREATE TABLE PropertyCoop (id INT, property_id INT, coop_name VARCHAR(255), coop_membership BOOLEAN);", + "sql": "SELECT DISTINCT PropertyOwner.owner_name FROM PropertyOwner FULL OUTER JOIN PropertyCoop ON PropertyOwner.property_id \u003d PropertyCoop.property_id WHERE PropertyCoop.coop_membership \u003d TRUE AND Property.country \u003d \u0027Canada\u0027;", + "sql_explanation": "This query performs a full outer join between the PropertyOwner and PropertyCoop tables on property_id, retrieving distinct owner_name values. It filters for properties with a coop_membership value of true and the country being Canada." +}, { + "id": "593", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average price of properties in each neighborhood", + "sql_context": "CREATE TABLE neighborhoods (neighborhood_id INT PRIMARY KEY, neighborhood_name VARCHAR(50)); INSERT INTO neighborhoods (neighborhood_id, neighborhood_name) VALUES (1, \u0027East River\u0027), (2, \u0027SoHo\u0027), (3, \u0027TriBeCa\u0027); CREATE TABLE properties (property_id INT PRIMARY KEY, property_price DECIMAL(10, 2), neighborhood_id INT, FOREIGN KEY (neighborhood_id) REFERENCES neighborhoods(neighborhood_id)); INSERT INTO properties (property_id, property_price, neighborhood_id) VALUES (1, 500000, 1), (2, 600000, 1), (3, 900000, 2), (4, 1000000, 3);", + "sql": "SELECT neighborhoods.neighborhood_name, AVG(properties.property_price) FROM properties JOIN neighborhoods ON properties.neighborhood_id \u003d neighborhoods.neighborhood_id GROUP BY neighborhoods.neighborhood_name;", + "sql_explanation": "This query first joins the properties and neighborhoods table based on neighborhood_id. Then it calculates the average property_price for each neighborhood_name using the AVG function and GROUP BY clause." +}, { + "id": "696", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List property co-owners in New York City who have not implemented accessibility features.", + "sql_context": "CREATE TABLE CoOwners (CoOwnerID int, PropertyID int, City varchar(20)); CREATE TABLE Properties (PropertyID int, Accessibility varchar(5)); INSERT INTO CoOwners (CoOwnerID, PropertyID, City) VALUES (1, 1, \u0027New York City\u0027); INSERT INTO Properties (PropertyID, Accessibility) VALUES (1, \u0027No\u0027); INSERT INTO CoOwners (CoOwnerID, PropertyID, City) VALUES (2, 2, \u0027New York City\u0027); INSERT INTO Properties (PropertyID, Accessibility) VALUES (2, \u0027Yes\u0027);", + "sql": "SELECT CoOwners.CoOwnerID, Properties.City FROM CoOwners INNER JOIN Properties ON CoOwners.PropertyID \u003d Properties.PropertyID WHERE Properties.City \u003d \u0027New York City\u0027 AND Properties.Accessibility \u003d \u0027No\u0027;", + "sql_explanation": "The SQL query lists property co-owners in New York City who have not implemented accessibility features. It uses the INNER JOIN clause to combine the records from CoOwners and Properties tables based on the PropertyID column. It then applies the WHERE clause to filter the records based on the City and Accessibility columns in the Properties table. Finally, it selects the CoOwnerID column from the CoOwners table and the City column from the Properties table." +}, { + "id": "769", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display co-owner names and their property addresses with sustainable features in City B.", + "sql_context": "CREATE TABLE co_owners (owner_id INT, name VARCHAR(255), property_id INT); CREATE TABLE properties (property_id INT, address VARCHAR(255), city VARCHAR(255), sustainable BOOLEAN);", + "sql": "SELECT co_owners.name, properties.address FROM co_owners INNER JOIN properties ON co_owners.property_id \u003d properties.property_id WHERE properties.city \u003d \u0027City B\u0027 AND properties.sustainable \u003d TRUE;", + "sql_explanation": "The SQL query displays co-owner names (co_owners.name) and their property addresses (properties.address) with sustainable features (properties.sustainable \u003d TRUE) in City B (properties.city \u003d \u0027City B\u0027) by performing an inner join (INNER JOIN) between co_owners and properties tables based on the property_id." +}, { + "id": "910", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of sustainable properties in each state?", + "sql_context": "CREATE TABLE States (State varchar(20)); CREATE TABLE Properties (PropertyID int, State varchar(20), Sustainable varchar(5)); INSERT INTO States (State) VALUES (\u0027Washington\u0027); INSERT INTO Properties (PropertyID, State, Sustainable) VALUES (1, \u0027Washington\u0027, \u0027Yes\u0027); INSERT INTO Properties (PropertyID, State, Sustainable) VALUES (2, \u0027Oregon\u0027, \u0027No\u0027); INSERT INTO States (State) VALUES (\u0027Oregon\u0027);", + "sql": "SELECT Properties.State, COUNT(Properties.PropertyID) FROM Properties INNER JOIN States ON Properties.State \u003d States.State WHERE Properties.Sustainable \u003d \u0027Yes\u0027 GROUP BY Properties.State;", + "sql_explanation": "The SQL query calculates the total number of sustainable properties in each state. It uses the INNER JOIN clause to combine the records from Properties and States tables based on the State column. It then applies the WHERE clause to filter the records based on the Sustainable column in the Properties table. Finally, it uses the GROUP BY clause to group the records by the State column and the COUNT function to calculate the number of sustainable properties in each state." +}, { + "id": "1107", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum property price for buildings in each city with a sustainable urbanism certification?", + "sql_context": "CREATE TABLE cities (id INT, name VARCHAR(30)); CREATE TABLE properties (id INT, city VARCHAR(20), price INT, sustainable_urbanism BOOLEAN); INSERT INTO cities (id, name) VALUES (1, \u0027Vancouver\u0027), (2, \u0027Seattle\u0027), (3, \u0027Portland\u0027); INSERT INTO properties (id, city, price, sustainable_urbanism) VALUES (101, \u0027Vancouver\u0027, 600000, true), (102, \u0027Vancouver\u0027, 700000, false), (103, \u0027Seattle\u0027, 800000, true), (104, \u0027Seattle\u0027, 900000, false), (105, \u0027Portland\u0027, 500000, true), (106, \u0027Portland\u0027, 400000, false);", + "sql": "SELECT cities.name, MAX(properties.price) FROM cities INNER JOIN properties ON cities.name \u003d properties.city WHERE properties.sustainable_urbanism \u003d true GROUP BY cities.name;", + "sql_explanation": "This SQL query calculates the maximum property price for buildings in each city with a sustainable urbanism certification by joining the cities and properties tables on the name column, filtering the results based on the sustainable_urbanism column, and grouping the results by the name column." +}, { + "id": "1819", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum and maximum property price for inclusive housing projects in the city of \"Boston\"?", + "sql_context": "CREATE TABLE inclusive_housing (project_id INT, property_id INT, price FLOAT, city_id INT, PRIMARY KEY (project_id)); INSERT INTO inclusive_housing (project_id, property_id, price, city_id) VALUES (1, 1, 500000.0, 1), (2, 2, 600000.0, 1), (3, 3, 400000.0, 1); CREATE TABLE cities (city_id INT, city_name TEXT, PRIMARY KEY (city_id)); INSERT INTO cities (city_id, city_name) VALUES (1, \u0027Boston\u0027), (2, \u0027Chicago\u0027), (3, \u0027Oakland\u0027);", + "sql": "SELECT MIN(price), MAX(price) FROM inclusive_housing JOIN cities ON inclusive_housing.city_id \u003d cities.city_id WHERE cities.city_name \u003d \u0027Boston\u0027;", + "sql_explanation": "The SQL query joins the \"inclusive_housing\" table with the \"cities\" table based on the city_id. It then filters the records for the city_name equal to \u0027Boston\u0027 and calculates the minimum and maximum property price for inclusive housing projects." +}, { + "id": "1842", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum property price for properties in the RealEstateCoOwnership schema that have co-owners?", + "sql_context": "CREATE TABLE RealEstateCoOwnership.Properties (id INT, price FLOAT); INSERT INTO RealEstateCoOwnership.Properties (id, price) VALUES (1, 400000.0), (2, 600000.0); CREATE TABLE RealEstateCoOwnership.CoOwnership (property_id INT, coowner VARCHAR(50)); INSERT INTO RealEstateCoOwnership.CoOwnership (property_id, coowner) VALUES (1, \u0027John\u0027), (1, \u0027Jane\u0027), (2, \u0027Bob\u0027);", + "sql": "SELECT MIN(price) FROM RealEstateCoOwnership.Properties INNER JOIN RealEstateCoOwnership.CoOwnership ON Properties.id \u003d CoOwnership.property_id;", + "sql_explanation": "This query joins the Properties and CoOwnership tables in the RealEstateCoOwnership schema on the property_id column. It returns the minimum property price for properties that have co-owners." +}, { + "id": "2654", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of properties in inclusive housing areas?", + "sql_context": "CREATE TABLE Inclusive_Housing (Property_ID INT, Inclusive VARCHAR(10)); INSERT INTO Inclusive_Housing (Property_ID, Inclusive) VALUES (1, \u0027Yes\u0027), (2, \u0027No\u0027), (3, \u0027Yes\u0027), (4, \u0027No\u0027); CREATE TABLE Properties (Property_ID INT, Property_Details VARCHAR(20)); INSERT INTO Properties (Property_ID, Property_Details) VALUES (1, \u0027Inclusive Housing\u0027), (2, \u0027Regular Housing\u0027), (3, \u0027Inclusive Housing\u0027), (4, \u0027Regular Housing\u0027);", + "sql": "SELECT COUNT(*) FROM Properties p JOIN Inclusive_Housing ih ON p.Property_ID \u003d ih.Property_ID WHERE Inclusive \u003d \u0027Yes\u0027;", + "sql_explanation": "The SQL query counts the total number of properties in inclusive housing areas by using the COUNT function on all rows in the Properties table where the Inclusive column in the Inclusive_Housing table is equal to \u0027Yes\u0027. This is done using a JOIN statement to combine the two tables based on the Property_ID column." +}, { + "id": "658", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all heritage sites in Asia and their corresponding preservation budget for the current year.", + "sql_context": "CREATE TABLE HeritageSites (SiteID INT, SiteName VARCHAR(50), Country VARCHAR(50)); CREATE TABLE Budget (BudgetID INT, SiteID INT, Year INT, Amount INT); INSERT INTO HeritageSites VALUES (1, \u0027Angkor Wat\u0027, \u0027Cambodia\u0027), (2, \u0027Forbidden City\u0027, \u0027China\u0027), (3, \u0027Taj Mahal\u0027, \u0027India\u0027); INSERT INTO Budget VALUES (1, 1, 2022, 500000), (2, 1, 2023, 550000), (3, 2, 2022, 700000), (4, 2, 2023, 750000), (5, 3, 2022, 800000), (6, 3, 2023, 850000);", + "sql": "SELECT HeritageSites.SiteName, HeritageSites.Country, Budget.Amount FROM HeritageSites INNER JOIN Budget ON HeritageSites.SiteID \u003d Budget.SiteID WHERE HeritageSites.Country \u003d \u0027Asia\u0027 AND Budget.Year \u003d 2022;", + "sql_explanation": "First, we join the HeritageSites and Budget tables on SiteID. Then, we filter the results to include only the heritage sites located in Asia and the budget for the current year. After that, we list all heritage sites in Asia and their corresponding preservation budget for the current year." +}, { + "id": "695", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of artists who have created more than 10 artworks in each European country.", + "sql_context": "CREATE TABLE Artists (ArtistID INT, Name TEXT, Country TEXT);CREATE TABLE Artworks (ArtworkID INT, Title TEXT, ArtistID INT);", + "sql": "SELECT Artists.Country, COUNT(DISTINCT Artists.ArtistID) FROM Artists INNER JOIN Artworks ON Artists.ArtistID \u003d Artworks.ArtistID GROUP BY Artists.Country HAVING COUNT(DISTINCT Artworks.ArtworkID) \u003e 10;", + "sql_explanation": "This query starts by joining the \u0027Artists\u0027 table with the \u0027Artworks\u0027 table using ArtistID. It then groups the results by Country. It uses the HAVING clause to only include rows where the number of distinct Artworks.ArtworkID is greater than 10, and returns the Country column and the count of distinct ArtistID." +}, { + "id": "974", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which artists contributed to the most collaborations in the music industry?", + "sql_context": "CREATE TABLE Artists (id INT, name VARCHAR(50), collaborations INT); CREATE TABLE Collaborations (id INT, artist1 INT, artist2 INT); INSERT INTO Artists VALUES (1, \u0027Artist A\u0027, 5), (2, \u0027Artist B\u0027, 3), (3, \u0027Artist C\u0027, 2); INSERT INTO Collaborations VALUES (1, 1, 2), (2, 1, 3), (3, 2, 3);", + "sql": "SELECT A.name, COUNT(*) as collaborations_count FROM Artists A INNER JOIN Collaborations C ON A.id \u003d C.artist1 OR A.id \u003d C.artist2 GROUP BY A.name ORDER BY collaborations_count DESC;", + "sql_explanation": "The SQL query joins the Artists table and the Collaborations table on the artist1 and artist2 fields, groups the results by artist name, and counts the number of collaborations for each artist. The results are sorted in descending order of the number of collaborations." +}, { + "id": "1195", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the artworks created by artists from Oceania", + "sql_context": "CREATE TABLE ArtPieces (ArtPieceID INT PRIMARY KEY, ArtPieceName VARCHAR(100), CreationDate DATE, ArtistID INT); INSERT INTO ArtPieces (ArtPieceID, ArtPieceName, CreationDate, ArtistID) VALUES (1, \u0027Aboriginal Artwork\u0027, \u00271900-01-01\u0027, 1); INSERT INTO ArtPieces (ArtPieceID, ArtPieceName, CreationDate, ArtistID) VALUES (2, \u0027Maori Artwork\u0027, \u00271800-01-01\u0027, 2); CREATE TABLE Artists (ArtistID INT PRIMARY KEY, ArtistName VARCHAR(100), Age INT, Nationality VARCHAR(50)); INSERT INTO Artists (ArtistID, ArtistName, Age, Nationality) VALUES (1, \u0027Amy Johnson\u0027, 26, \u0027Australian\u0027); INSERT INTO Artists (ArtistID, ArtistName, Age, Nationality) VALUES (2, \u0027Te Rauparaha\u0027, 56, \u0027New Zealander\u0027);", + "sql": "SELECT ArtPieceName FROM ArtPieces JOIN Artists ON ArtPieces.ArtistID \u003d Artists.ArtistID WHERE Artists.Nationality \u003d \u0027Australian\u0027 OR Artists.Nationality \u003d \u0027New Zealander\u0027;", + "sql_explanation": "This SQL query joins the ArtPieces and Artists tables on the ArtistID column, then filters the results to only include rows where the Artist\u0027s nationality is either \u0027Australian\u0027 or \u0027New Zealander\u0027. Finally, it selects the ArtPieceName column from the resulting table." +}, { + "id": "1653", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of art pieces by type in a given museum?", + "sql_context": "CREATE TABLE MuseumArtPieces (id INT, museumId INT, artType VARCHAR(50), quantity INT); INSERT INTO MuseumArtPieces (id, museumId, artType, quantity) VALUES (1, 1, \u0027Modern\u0027, 20), (2, 1, \u0027Ancient\u0027, 10), (3, 2, \u0027Modern\u0027, 15), (4, 2, \u0027Ancient\u0027, 25);", + "sql": "SELECT Museums.name, artType, SUM(quantity) FROM Museums JOIN MuseumArtPieces ON Museums.id \u003d MuseumArtPieces.museumId GROUP BY Museums.name, artType;", + "sql_explanation": "The SQL query calculates the total number of art pieces by type in each museum by using the SUM function on the quantity column, joining the Museums table and the MuseumArtPieces table on the museumId column, and grouping the results by museum name and art type." +}, { + "id": "114", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 3 construction materials used by project in Utah?", + "sql_context": "CREATE TABLE projects (id INT, name TEXT, state TEXT); CREATE TABLE project_materials (id INT, project_id INT, material TEXT); INSERT INTO projects (id, name, state) VALUES (1, \u0027Green Project 1\u0027, \u0027Utah\u0027); INSERT INTO projects (id, name, state) VALUES (2, \u0027Eco Project 2\u0027, \u0027Utah\u0027); INSERT INTO project_materials (id, project_id, material) VALUES (1, 1, \u0027Wood\u0027); INSERT INTO project_materials (id, project_id, material) VALUES (2, 1, \u0027Glass\u0027); INSERT INTO project_materials (id, project_id, material) VALUES (3, 2, \u0027Steel\u0027); INSERT INTO project_materials (id, project_id, material) VALUES (4, 2, \u0027Concrete\u0027);", + "sql": "SELECT projects.name, project_materials.material, COUNT(project_materials.id) as material_count FROM projects JOIN project_materials ON projects.id \u003d project_materials.project_id WHERE projects.state \u003d \u0027Utah\u0027 GROUP BY projects.name, project_materials.material ORDER BY material_count DESC LIMIT 3;", + "sql_explanation": "This query identifies the top 3 construction materials used by project in Utah by joining the projects and project_materials tables on project_id, filtering for rows where the state is \u0027Utah\u0027, grouping by project name and material, counting the number of rows in each group using the COUNT() function, and ordering by the material count in descending order. The query returns only the top 3 rows using the LIMIT 3 clause." +}, { + "id": "466", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total cost of materials for the project with id 3?", + "sql_context": "CREATE TABLE construction_material (id INT, material_name VARCHAR(255), quantity INT, unit_price DECIMAL(5,2)); CREATE TABLE project_material (project_id INT, material_id INT); INSERT INTO construction_material (id, material_name, quantity, unit_price) VALUES (2, \u0027Reclaimed Wood\u0027, 100, 500.00), (3, \u0027Green Concrete\u0027, 200, 700.00); INSERT INTO project_material (project_id, material_id) VALUES (2, 2), (3, 3);", + "sql": "SELECT SUM(construction_material.quantity * construction_material.unit_price) FROM construction_material JOIN project_material ON construction_material.id \u003d project_material.material_id WHERE project_material.project_id \u003d 3;", + "sql_explanation": "This query calculates the total cost of materials for the project with id 3 by joining \u0027construction_material\u0027 and \u0027project_material\u0027 tables and summing the product of quantity and unit_price." +}, { + "id": "1270", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all contractors who have completed projects in both \u0027California\u0027 and \u0027Texas\u0027", + "sql_context": "CREATE TABLE contractors (contractor_id INT, name VARCHAR(50), state VARCHAR(2)); INSERT INTO contractors (contractor_id, name, state) VALUES (1, \u0027ABC Construction\u0027, \u0027CA\u0027), (2, \u0027DEF Construction\u0027, \u0027TX\u0027), (3, \u0027GHI Construction\u0027, \u0027CA\u0027), (4, \u0027JKL Construction\u0027, \u0027TX\u0027); CREATE TABLE projects (project_id INT, contractor_id INT, state VARCHAR(2)); INSERT INTO projects (project_id, contractor_id, state) VALUES (101, 1, \u0027CA\u0027), (102, 2, \u0027TX\u0027), (103, 3, \u0027CA\u0027), (104, 4, \u0027TX\u0027);", + "sql": "SELECT c.name FROM contractors c JOIN projects p ON c.contractor_id \u003d p.contractor_id WHERE c.state IN (\u0027CA\u0027, \u0027TX\u0027) GROUP BY c.name HAVING COUNT(DISTINCT c.state) \u003d 2;", + "sql_explanation": "This SQL query returns a list of contractors who have completed projects in both \u0027California\u0027 and \u0027Texas\u0027 by joining the contractors and projects table, filtering for contractors who have worked in either \u0027California\u0027 or \u0027Texas\u0027, grouping by the contractor name, and using the HAVING clause to ensure that the contractor has completed projects in both states." +}, { + "id": "1380", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all permits and the number of labor violations for each permit", + "sql_context": "CREATE TABLE building_permits (permit_id INT); CREATE TABLE labor_stats (permit_id INT, violation VARCHAR(100));", + "sql": "SELECT bp.permit_id, COUNT(ls.permit_id) AS num_violations FROM building_permits bp LEFT JOIN labor_stats ls ON bp.permit_id \u003d ls.permit_id GROUP BY bp.permit_id;", + "sql_explanation": "* This SQL query performs a left join on the \u0027building_permits\u0027 and \u0027labor_stats\u0027 tables to get all permits and their corresponding number of labor violations." +}, { + "id": "199", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of vegetarian and non-vegetarian dishes per restaurant in \u0027City A\u0027.", + "sql_context": "CREATE TABLE restaurants (id INT, name VARCHAR(255), city VARCHAR(255)); INSERT INTO restaurants (id, name, city) VALUES (1, \u0027Restaurant A\u0027, \u0027City A\u0027), (2, \u0027Restaurant B\u0027, \u0027City B\u0027); CREATE TABLE dishes (id INT, name VARCHAR(255), type VARCHAR(255), restaurant_id INT); INSERT INTO dishes (id, name, type, restaurant_id) VALUES (1, \u0027Quinoa Salad\u0027, \u0027vegetarian\u0027, 1), (2, \u0027Chickpea Curry\u0027, \u0027vegetarian\u0027, 1), (3, \u0027Cheeseburger\u0027, \u0027non-vegetarian\u0027, 1), (4, \u0027Pizza Margherita\u0027, \u0027vegetarian\u0027, 2), (5, \u0027Fish and Chips\u0027, \u0027non-vegetarian\u0027, 2);", + "sql": "SELECT r.name, SUM(CASE WHEN d.type \u003d \u0027vegetarian\u0027 THEN 1 ELSE 0 END) AS vegetarian_count, SUM(CASE WHEN d.type \u003d \u0027non-vegetarian\u0027 THEN 1 ELSE 0 END) AS non_vegetarian_count FROM dishes d JOIN restaurants r ON d.restaurant_id \u003d r.id WHERE r.city \u003d \u0027City A\u0027 GROUP BY r.id;", + "sql_explanation": "The SQL query joins the dishes and restaurants tables, calculates the number of vegetarian and non-vegetarian dishes per restaurant in \u0027City A\u0027 using conditional aggregation, and groups the results by restaurant ID." +}, { + "id": "231", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 5 restaurants with the highest total revenue for the year 2020, along with the total number of menus they offer.", + "sql_context": "CREATE TABLE restaurants (restaurant_id INT, total_revenue DECIMAL(10,2)); CREATE TABLE menus (menu_id INT, restaurant_id INT, total_revenue DECIMAL(10,2)); INSERT INTO restaurants VALUES (1, 50000); INSERT INTO restaurants VALUES (2, 60000); INSERT INTO restaurants VALUES (3, 40000); INSERT INTO menus VALUES (1, 1, 10000); INSERT INTO menus VALUES (2, 1, 20000); INSERT INTO menus VALUES (3, 2, 30000); INSERT INTO menus VALUES (4, 2, 20000); INSERT INTO menus VALUES (5, 3, 10000);", + "sql": "SELECT r.restaurant_id, SUM(m.total_revenue) as total_revenue, COUNT(m.menu_id) as total_menus FROM restaurants r INNER JOIN menus m ON r.restaurant_id \u003d m.restaurant_id WHERE YEAR(m.order_date) \u003d 2020 GROUP BY r.restaurant_id ORDER BY total_revenue DESC LIMIT 5;", + "sql_explanation": "This query first joins the restaurants and menus tables on the restaurant_id column. It then filters the data to only include orders from the year 2020 by using the WHERE clause and the YEAR() function. Finally, it groups the data by restaurant, calculates the total revenue and total number of menus, and orders the results in descending order of total revenue. It then returns the top 5 results." +}, { + "id": "334", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 3 ingredients by sales across all dishes", + "sql_context": "CREATE TABLE dish_ingredients (id INT, name VARCHAR(50), dish_id INT); CREATE TABLE ingredient_sales (ingredient_id INT, revenue INT);", + "sql": "SELECT dish_ingredients.name, SUM(ingredient_sales.revenue) as total_sales FROM ingredient_sales JOIN dish_ingredredients ON ingredient_sales.ingredient_id \u003d dish_ingredients.id GROUP BY dish_ingredients.name ORDER BY total_sales DESC LIMIT 3;", + "sql_explanation": "This SQL query identifies the top 3 ingredients by sales across all dishes. It joins the ingredient_sales and dish_ingredients tables on the ingredient_id column and then groups the data by the ingredient name. It then calculates the total sales for each ingredient and orders the results in descending order by total sales, limiting the results to the top 3." +}, { + "id": "567", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all restaurants offering vegan options in \u0027Eastside\u0027 and their respective total revenue.", + "sql_context": "CREATE TABLE Restaurants (RestaurantID int, RestaurantName varchar(255), Region varchar(255), VeganOptions varchar(10)); CREATE TABLE MenuItems (MenuID int, MenuName varchar(255), RestaurantID int, Sales int);", + "sql": "SELECT R.RestaurantName, SUM(M.Sales) as TotalRevenue FROM Restaurants R INNER JOIN MenuItems M ON R.RestaurantID \u003d M.RestaurantID WHERE R.Region \u003d \u0027Eastside\u0027 AND R.VeganOptions \u003d \u0027Yes\u0027 GROUP BY R.RestaurantName;", + "sql_explanation": "The SQL query joins the Restaurants and MenuItems tables based on the RestaurantID. It then filters the results to show only restaurants located in \u0027Eastside\u0027 that offer vegan options and calculates their respective total revenue by summing the sales of all menu items." +}, { + "id": "645", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 3 cities with the highest average food safety inspection score, along with the total number of inspections conducted in each city.", + "sql_context": "CREATE TABLE cities (city_id INT, city VARCHAR(255)); CREATE TABLE inspections (inspection_id INT, city_id INT, score INT); INSERT INTO cities VALUES (1, \u0027New York\u0027); INSERT INTO cities VALUES (2, \u0027Los Angeles\u0027); INSERT INTO inspections VALUES (1, 1, 85); INSERT INTO inspections VALUES (2, 1, 90); INSERT INTO inspections VALUES (3, 2, 70); INSERT INTO inspections VALUES (4, 2, 75);", + "sql": "SELECT c.city, AVG(i.score) as average_score, COUNT(i.inspection_id) as total_inspections FROM cities c INNER JOIN inspections i ON c.city_id \u003d i.city_id GROUP BY c.city ORDER BY average_score DESC LIMIT 3;", + "sql_explanation": "This query first joins the cities and inspections tables on the city_id column. It then groups the data by city, calculates the average inspection score, and orders the results in descending order of average score. It then returns the top 3 results." +}, { + "id": "680", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue for each restaurant, including their sustainable sourcing efforts?", + "sql_context": "CREATE TABLE Restaurants (RestaurantID INT, Name VARCHAR(50), TotalRevenue DECIMAL(10,2)); CREATE TABLE SustainableSourcing (SourcingID INT, RestaurantID INT, SustainabilityScore INT);", + "sql": "SELECT R.Name, R.TotalRevenue + SUM(SS.SustainabilityScore) as TotalRevenueAndSustainabilityScore FROM Restaurants R INNER JOIN SustainableSourcing SS ON R.RestaurantID \u003d SS.RestaurantID GROUP BY R.Name;", + "sql_explanation": "This query first joins the Restaurants table with the SustainableSourcing table on RestaurantID. Then, it calculates the total revenue and the sum of the sustainability scores for each restaurant. The result is grouped by restaurant name." +}, { + "id": "801", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue for each cuisine type in the last quarter?", + "sql_context": "CREATE TABLE Restaurants (RestaurantID int, Name varchar(50), Cuisine varchar(50)); CREATE TABLE Revenues (RevenueID int, RestaurantID int, Revenue decimal(5,2), Date date); INSERT INTO Restaurants (RestaurantID, Name, Cuisine) VALUES (1, \u0027Big Burger\u0027, \u0027American\u0027); INSERT INTO Revenues (RevenueID, RestaurantID, Revenue, Date) VALUES (1, 1, 15000.00, \u00272022-01-01\u0027);", + "sql": "SELECT r.Cuisine, SUM(rev.Revenue) as TotalRevenue FROM Restaurants r JOIN Revenues rev ON r.RestaurantID \u003d rev.RestaurantID WHERE rev.Date \u003e\u003d DATEADD(quarter, -1, GETDATE()) GROUP BY r.Cuisine;", + "sql_explanation": "Join Restaurants and Revenues tables, filter data for the last quarter, group by cuisine, and calculate the sum of revenue for each cuisine type." +}, { + "id": "804", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the revenue breakdown by cuisine type and the number of restaurants offering each cuisine type.", + "sql_context": "CREATE TABLE Restaurants (RestaurantID int, RestaurantName varchar(255), Cuisine varchar(255)); CREATE TABLE MenuItems (MenuID int, MenuName varchar(255), RestaurantID int, Sales int);", + "sql": "SELECT R.Cuisine, SUM(M.Sales) as TotalRevenue, COUNT(DISTINCT R.RestaurantID) as RestaurantCount FROM Restaurants R INNER JOIN MenuItems M ON R.RestaurantID \u003d M.RestaurantID GROUP BY R.Cuisine;", + "sql_explanation": "The SQL query joins the Restaurants and MenuItems tables based on the RestaurantID. It then calculates the total revenue for each cuisine type and the number of restaurants offering each cuisine type by grouping by the cuisine type." +}, { + "id": "900", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the average food safety inspection score by restaurant location for the last year.", + "sql_context": "CREATE TABLE inspections (inspection_id INT, restaurant_id INT, date DATE, score INT); INSERT INTO inspections (inspection_id, restaurant_id, date, score) VALUES (1, 1, \u00272022-02-01\u0027, 95), (2, 1, \u00272022-03-01\u0027, 90), (3, 2, \u00272022-02-15\u0027, 85), (4, 2, \u00272022-03-15\u0027, 92); CREATE TABLE restaurants (restaurant_id INT, name VARCHAR(255), location VARCHAR(255)); INSERT INTO restaurants (restaurant_id, name, location) VALUES (1, \u0027Restaurant A\u0027, \u0027City A\u0027), (2, \u0027Restaurant B\u0027, \u0027City B\u0027);", + "sql": "SELECT r.location, AVG(i.score) as avg_score FROM inspections i JOIN restaurants r ON i.restaurant_id \u003d r.restaurant_id WHERE i.date \u003e\u003d DATE(NOW()) - INTERVAL 365 DAY GROUP BY r.location;", + "sql_explanation": "This query joins the \u0027inspections\u0027 and \u0027restaurants\u0027 tables on the \u0027restaurant_id\u0027 column, filters the records for the last 365 days, calculates the average food safety inspection score for each location, and returns the location and average score using the GROUP BY clause." +}, { + "id": "915", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 3 menu items contributing to daily revenue in \u0027fine_dining\u0027 restaurant", + "sql_context": "CREATE TABLE restaurant (id INT, name VARCHAR(255)); INSERT INTO restaurant (id, name) VALUES (1, \u0027fine_dining\u0027); CREATE TABLE menu (id INT, item VARCHAR(255), price DECIMAL(5,2), daily_sales INT, restaurant_id INT);", + "sql": "SELECT i.item, SUM(m.price * m.daily_sales) as revenue FROM menu m JOIN restaurant r ON m.restaurant_id \u003d r.id WHERE r.name \u003d \u0027fine_dining\u0027 GROUP BY i.item ORDER BY revenue DESC LIMIT 3;", + "sql_explanation": "This query first joins the \u0027menu\u0027 and \u0027restaurant\u0027 tables based on the restaurant_id. It then filters for the \u0027fine_dining\u0027 restaurant. After that, it groups the menu items by item and calculates the revenue for each item. Finally, it orders the items by revenue in descending order and limits the results to the top 3." +}, { + "id": "1076", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the number of unique menu items offered by each restaurant, sorted by the number of items in descending order.", + "sql_context": "CREATE TABLE menu (menu_id INT, restaurant_id INT, food_category TEXT, price DECIMAL(5,2)); CREATE TABLE restaurant (restaurant_id INT, name TEXT); INSERT INTO restaurant (restaurant_id, name) VALUES (1, \u0027Restaurant A\u0027), (2, \u0027Restaurant B\u0027), (3, \u0027Restaurant C\u0027); INSERT INTO menu (menu_id, restaurant_id, food_category) VALUES (1, 1, \u0027Appetizers\u0027), (2, 1, \u0027Entrees\u0027), (3, 1, \u0027Desserts\u0027), (4, 2, \u0027Appetizers\u0027), (5, 2, \u0027Entrees\u0027), (6, 3, \u0027Appetizers\u0027), (7, 3, \u0027Entrees\u0027), (8, 3, \u0027Desserts\u0027), (9, 3, \u0027Drinks\u0027);", + "sql": "SELECT r.name, COUNT(DISTINCT m.menu_id) AS unique_menu_items FROM menu m JOIN restaurant r ON m.restaurant_id \u003d r.restaurant_id GROUP BY r.name ORDER BY unique_menu_items DESC;", + "sql_explanation": "This query joins the restaurant and menu tables, groups by restaurant name, calculates the number of unique menu items for each restaurant, and returns the number of unique menu items sorted in descending order." +}, { + "id": "1084", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue for each restaurant location that serves vegetarian menu items?", + "sql_context": "CREATE TABLE restaurant_menu(location VARCHAR(255), menu_item VARCHAR(255)); CREATE TABLE menu_engineering(menu_item VARCHAR(255), category VARCHAR(255), revenue DECIMAL(10,2), sustainable_source BOOLEAN); INSERT INTO restaurant_menu VALUES (\u0027Location A\u0027, \u0027Veggie Burger\u0027); INSERT INTO menu_engineering VALUES (\u0027Veggie Burger\u0027, \u0027Vegetarian\u0027, 1000, TRUE);", + "sql": "SELECT rm.location, SUM(me.revenue) FROM restaurant_menu rm INNER JOIN menu_engineering me ON rm.menu_item \u003d me.menu_item WHERE me.category \u003d \u0027Vegetarian\u0027 GROUP BY rm.location;", + "sql_explanation": "This query calculates the total revenue for each restaurant location that serves vegetarian menu items. It performs an inner join between the restaurant_menu and menu_engineering tables, filters the data for the vegetarian category using the WHERE clause, and groups the data by location to calculate the sum of revenues for each group." +}, { + "id": "1207", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total revenue of restaurants located in New York with a food safety score above 80.", + "sql_context": "CREATE TABLE restaurants (id INT, name TEXT, state TEXT, safety_score INT); INSERT INTO restaurants (id, name, state, safety_score) VALUES (1, \u0027Restaurant A\u0027, \u0027New York\u0027, 85), (2, \u0027Restaurant B\u0027, \u0027California\u0027, 70), (3, \u0027Restaurant C\u0027, \u0027New York\u0027, 90); CREATE TABLE revenue (restaurant_id INT, amount INT); INSERT INTO revenue (restaurant_id, amount) VALUES (1, 10000), (1, 12000), (2, 8000), (3, 15000);", + "sql": "SELECT SUM(revenue.amount) FROM revenue JOIN restaurants ON revenue.restaurant_id \u003d restaurants.id WHERE restaurants.state \u003d \u0027New York\u0027 AND restaurants.safety_score \u003e 80;", + "sql_explanation": "This SQL query calculates the total revenue of restaurants located in New York with a food safety score above 80 by joining the restaurants and revenue tables on restaurant_id. It then filters the results to only include rows where restaurants.state is equal to \u0027New York\u0027 and restaurants.safety_score is greater than 80. Finally, it uses the SUM function to add up the amount values in the filtered results." +}, { + "id": "1442", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average food safety inspection score for restaurants located in \u0027Westwood\u0027?", + "sql_context": "CREATE TABLE Restaurants (RestaurantID int, RestaurantName varchar(255), Region varchar(255)); CREATE TABLE Inspections (InspectionID int, RestaurantID int, InspectionScore int, InspectionDate date);", + "sql": "SELECT AVG(I.InspectionScore) as AvgInspectionScore FROM Restaurants R INNER JOIN Inspections I ON R.RestaurantID \u003d I.RestaurantID WHERE R.Region \u003d \u0027Westwood\u0027;", + "sql_explanation": "The SQL query joins the Restaurants and Inspections tables based on the RestaurantID. It then calculates the average food safety inspection score for restaurants located in \u0027Westwood\u0027." +}, { + "id": "1446", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total revenue for \u0027Impossible Burger\u0027 at \u0027Vegan Delight\u0027?", + "sql_context": "CREATE TABLE restaurants (restaurant_id INT, name VARCHAR(255)); INSERT INTO restaurants (restaurant_id, name) VALUES (6, \u0027Vegan Delight\u0027); CREATE TABLE menu_items (menu_item_id INT, name VARCHAR(255), price DECIMAL(5,2), restaurant_id INT); INSERT INTO menu_items (menu_item_id, name, price, restaurant_id) VALUES (7, \u0027Impossible Burger\u0027, 10.99, 6); CREATE TABLE orders (order_id INT, menu_item_id INT, quantity INT, order_date DATE, restaurant_id INT); INSERT INTO orders (order_id, menu_item_id, quantity, order_date, restaurant_id) VALUES (5, 7, 3, \u00272022-01-02\u0027, 6);", + "sql": "SELECT SUM(price * quantity) FROM orders o JOIN menu_items mi ON o.menu_item_id \u003d mi.menu_item_id WHERE mi.name \u003d \u0027Impossible Burger\u0027 AND mi.restaurant_id \u003d 6;", + "sql_explanation": "This query calculates the total revenue for the \u0027Impossible Burger\u0027 at \u0027Vegan Delight\u0027 by summing the product of the price and quantity columns in the orders table where the name is \u0027Impossible Burger\u0027 and the restaurant_id is 6." +}, { + "id": "1582", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the average food safety score for restaurants in New York City?", + "sql_context": "CREATE TABLE inspections (restaurant_id INT, inspection_date DATE, score INT); CREATE TABLE restaurant_locations (restaurant_id INT, location VARCHAR(255)); INSERT INTO restaurant_locations (restaurant_id, location) VALUES (1, \u0027New York City\u0027), (2, \u0027Los Angeles\u0027), (3, \u0027Chicago\u0027); INSERT INTO inspections (restaurant_id, inspection_date, score) VALUES (1, \u00272021-01-01\u0027, 90), (1, \u00272021-02-01\u0027, 85), (2, \u00272021-01-01\u0027, 95), (2, \u00272021-02-01\u0027, 92), (3, \u00272021-01-01\u0027, 88), (3, \u00272021-02-01\u0027, 89);", + "sql": "SELECT AVG(score) as avg_score FROM inspections i JOIN restaurant_locations rl ON i.restaurant_id \u003d rl.restaurant_id WHERE rl.location \u003d \u0027New York City\u0027;", + "sql_explanation": "The query calculates the average food safety score for restaurants in New York City by joining the inspections and restaurant_locations tables on restaurant_id, and filtering for location equal to \u0027New York City\u0027." +}, { + "id": "1679", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average price of menu items for each cuisine type?", + "sql_context": "CREATE TABLE MenuItems (MenuItemID int, RestaurantID int, CuisineType varchar(255), Price decimal(5,2)); INSERT INTO MenuItems (MenuItemID, RestaurantID, CuisineType, Price) VALUES (1, 1, \u0027Italian\u0027, 12.99), (2, 2, \u0027Mexican\u0027, 8.99), (3, 3, \u0027Chinese\u0027, 10.99);", + "sql": "SELECT R.CuisineType, AVG(MI.Price) as AvgPrice FROM Restaurants R INNER JOIN MenuItems MI ON R.RestaurantID \u003d MI.RestaurantID GROUP BY R.CuisineType;", + "sql_explanation": "The SQL query performs an inner join between the Restaurants and MenuItems tables, linking them by the RestaurantID. It then calculates the average price for each cuisine type using the AVG function and groups the results by the CuisineType." +}, { + "id": "1804", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the menu items that were not sold in the last week.", + "sql_context": "CREATE TABLE menu_items (item_id INT, name VARCHAR(255), category VARCHAR(255)); INSERT INTO menu_items (item_id, name, category) VALUES (1, \u0027Burger\u0027, \u0027Main Course\u0027), (2, \u0027Salad\u0027, \u0027Side Dish\u0027), (3, \u0027Pizza\u0027, \u0027Main Course\u0027); CREATE TABLE sales (sale_id INT, item_id INT, date DATE, revenue DECIMAL(10, 2)); INSERT INTO sales (sale_id, item_id, date, revenue) VALUES (1, 1, \u00272022-03-01\u0027, 500), (2, 2, \u00272022-03-02\u0027, 300), (3, 3, \u00272022-03-03\u0027, 700);", + "sql": "SELECT mi.name FROM menu_items mi LEFT JOIN sales s ON mi.item_id \u003d s.item_id AND s.date \u003e\u003d DATE(NOW()) - INTERVAL 7 DAY WHERE s.sale_id IS NULL;", + "sql_explanation": "This query performs a left join between \u0027menu_items\u0027 and \u0027sales\u0027 tables on the \u0027item_id\u0027 column and filters the records for the last 7 days. It returns the menu items that do not have a corresponding record in the sales table using the WHERE clause." +}, { + "id": "2081", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue for \u0027Breakfast\u0027 menu items in the \u0027Downtown\u0027 location?", + "sql_context": "CREATE TABLE menus (menu_id INT, dish_name VARCHAR(50), dish_type VARCHAR(50), price DECIMAL(5,2), sales INT, location VARCHAR(50)); CREATE TABLE revenue (menu_id INT, date DATE, revenue INT);", + "sql": "SELECT SUM(r.revenue) FROM menus m JOIN revenue r ON m.menu_id \u003d r.menu_id WHERE m.dish_type \u003d \u0027Breakfast\u0027 AND m.location \u003d \u0027Downtown\u0027;", + "sql_explanation": "This query calculates the total revenue for \u0027Breakfast\u0027 menu items in the \u0027Downtown\u0027 location by summing the revenue from the revenue table where dish_type is \u0027Breakfast\u0027 and location is \u0027Downtown\u0027 in the menus table." +}, { + "id": "651", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total biomass of marine species in \u0027MarineResearchAreaC\u0027 and \u0027MarineResearchAreaD\u0027", + "sql_context": "CREATE TABLE SpeciesBiomass (species VARCHAR(255), biomass FLOAT); INSERT INTO SpeciesBiomass (species, biomass) VALUES (\u0027Seahorse\u0027, 120.5), (\u0027Whale\u0027, 600.0), (\u0027Plankton\u0027, 50.0); CREATE TABLE MarineResearchArea (species VARCHAR(255), area VARCHAR(255)); INSERT INTO MarineResearchArea (species, area) VALUES (\u0027Seahorse\u0027, \u0027MarineResearchAreaC\u0027), (\u0027Whale\u0027, \u0027MarineResearchAreaC\u0027), (\u0027Squid\u0027, \u0027MarineResearchAreaD\u0027);", + "sql": "SELECT SUM(biomass) FROM SpeciesBiomass INNER JOIN MarineResearchArea ON SpeciesBiomass.species \u003d MarineResearchArea.species WHERE MarineResearchArea.area IN (\u0027MarineResearchAreaC\u0027, \u0027MarineResearchAreaD\u0027);", + "sql_explanation": "The SQL query performs an INNER JOIN on \u0027SpeciesBiomass\u0027 and \u0027MarineResearchArea\u0027 tables using the \u0027species\u0027 column. The WHERE clause filters rows with \u0027MarineResearchAreaC\u0027 or \u0027MarineResearchAreaD\u0027 as the area, and the SUM function calculates the total biomass of marine species in the areas." +}, { + "id": "979", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the number of marine species recorded in protected areas in the Caribbean Sea.", + "sql_context": "CREATE TABLE Protected_Areas (area_name text, area_longitude numeric, area_latitude numeric, area_size numeric); CREATE TABLE Marine_Species (species_name text, species_longitude numeric, species_latitude numeric, species_abundance numeric);", + "sql": "SELECT COUNT(*) FROM Protected_Areas p JOIN Marine_Species s ON p.area_longitude \u003d s.species_longitude AND p.area_latitude \u003d s.species_latitude WHERE p.area_name LIKE \u0027%Caribbean%\u0027;", + "sql_explanation": "Count the total number of records in the Marine_Species table that have a matching longitude and latitude to any record in the Protected_Areas table where the area_name contains \u0027Caribbean\u0027." +}, { + "id": "989", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names of all marine life research stations in the Atlantic region that are not compliant with maritime law?", + "sql_context": "CREATE TABLE marine_life_research_stations (station_id INT, station_name TEXT, region TEXT); CREATE TABLE maritime_law_compliance (station_id INT, is_compliant BOOLEAN); INSERT INTO marine_life_research_stations (station_id, station_name, region) VALUES (1, \u0027Station A\u0027, \u0027Pacific\u0027), (2, \u0027Station B\u0027, \u0027Atlantic\u0027); INSERT INTO maritime_law_compliance (station_id, is_compliant) VALUES (1, FALSE), (2, TRUE);", + "sql": "SELECT m.station_name FROM marine_life_research_stations m LEFT JOIN maritime_law_compliance c ON m.station_id \u003d c.station_id WHERE m.region \u003d \u0027Atlantic\u0027 AND c.is_compliant IS NULL;", + "sql_explanation": "First, we create a left join between the marine_life_research_stations table and the maritime_law_compliance table on the station_id. Then, we filter the results to only include stations in the Atlantic region that are not compliant with maritime law. Finally, we select the station_name column to get the names of the stations." +}, { + "id": "1347", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List maritime law violations and corresponding fines by country, with total fines.", + "sql_context": "CREATE TABLE violations (violation_id INT, country_id INT, amount FLOAT); CREATE TABLE countries (country_id INT, name VARCHAR(255));", + "sql": "SELECT c.name, SUM(v.amount) AS total_fines FROM violations v JOIN countries c ON v.country_id \u003d c.country_id GROUP BY c.name ORDER BY total_fines DESC;", + "sql_explanation": "The query performs an INNER JOIN between the \u0027violations\u0027 and \u0027countries\u0027 tables based on the \u0027country_id\u0027 column. It then groups the results by country, calculates the total fines for each country, and orders the results by total fines in descending order." +}, { + "id": "1849", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which marine species have the same number of individuals in the \u0027PollutionMonitoring\u0027 and \u0027ConservationEfforts\u0027 tables?", + "sql_context": "CREATE TABLE PollutionMonitoring (SpeciesID INT, Individuals INT); INSERT INTO PollutionMonitoring (SpeciesID, Individuals) VALUES (1, 100), (2, 200), (3, 300); CREATE TABLE ConservationEfforts (SpeciesID INT, Individuals INT); INSERT INTO ConservationEfforts (SpeciesID, Individuals) VALUES (2, 200), (3, 300), (4, 400);", + "sql": "SELECT P.SpeciesID FROM PollutionMonitoring P INNER JOIN ConservationEfforts C ON P.SpeciesID \u003d C.SpeciesID WHERE P.Individuals \u003d C.Individuals;", + "sql_explanation": "This query performs an inner join between the PollutionMonitoring and ConservationEfforts tables based on the SpeciesID column. The WHERE clause then filters the results to show only those species with the same number of individuals in both tables." +}, { + "id": "1978", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get the names of all marine life research stations that are part of a maritime law compliance initiative", + "sql_context": "CREATE TABLE marine_life_research_stations (station_id INT, station_name TEXT); INSERT INTO marine_life_research_stations (station_id, station_name) VALUES (1, \u0027Station A\u0027), (2, \u0027Station B\u0027), (3, \u0027Station C\u0027); CREATE TABLE maritime_law_compliance_initiatives (initiative_id INT, initiative_name TEXT, station_id INT); INSERT INTO maritime_law_compliance_initiatives (initiative_id, initiative_name, station_id) VALUES (1, \u0027Initiative X\u0027, 2), (2, \u0027Initiative Y\u0027, 3);", + "sql": "SELECT m.station_name FROM marine_life_research_stations m JOIN maritime_law_compliance_initiatives mlci ON m.station_id \u003d mlci.station_id;", + "sql_explanation": "To retrieve the names of all marine life research stations that are part of a maritime law compliance initiative, we perform an inner join on the station_id." +}, { + "id": "2697", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names of the marine species found in the Arctic region?\u0027", + "sql_context": "CREATE TABLE marine_species (name VARCHAR(255), region VARCHAR(255)); CREATE TABLE arctic_region (name VARCHAR(255), region_type VARCHAR(255)); INSERT INTO marine_species (name, region) VALUES (\u0027SPECIES1\u0027, \u0027Arctic\u0027), (\u0027SPECIES2\u0027, \u0027Antarctic\u0027); INSERT INTO arctic_region (name, region_type) VALUES (\u0027SPECIES1\u0027, \u0027Marine Species\u0027);", + "sql": "SELECT marine_species.name FROM marine_species INNER JOIN arctic_region ON marine_species.name \u003d arctic_region.name;", + "sql_explanation": "The SQL query performs an inner join on the marine_species and arctic_region tables, using the name column as the common key. It then selects the name column for the resulting joined table." +}, { + "id": "3048", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all the countries with their corresponding marine pollution index", + "sql_context": "CREATE TABLE country (id INT, name VARCHAR(255)); CREATE TABLE pollution (id INT, country VARCHAR(255), index INT); INSERT INTO country (id, name) VALUES (1, \u0027Canada\u0027); INSERT INTO country (id, name) VALUES (2, \u0027Mexico\u0027); INSERT INTO pollution (id, country, index) VALUES (1, \u0027Canada\u0027, 45); INSERT INTO pollution (id, country, index) VALUES (2, \u0027Mexico\u0027, 78);", + "sql": "SELECT country.name, pollution.index FROM country INNER JOIN pollution ON country.name \u003d pollution.country;", + "sql_explanation": "The SQL query joins the country and pollution tables on the country name. It then selects the country name and marine pollution index, displaying all the countries with their corresponding marine pollution index." +}, { + "id": "547", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which causes received the most funding in Q1 2022?", + "sql_context": "CREATE TABLE Donations (DonationID INT, DonationDate DATE, DonationAmount DECIMAL, CauseID INT);", + "sql": "SELECT C.CauseName, SUM(D.DonationAmount) as Q1Funding FROM Donations D JOIN Causes C ON D.CauseID \u003d C.CauseID WHERE D.DonationDate BETWEEN \u00272022-01-01\u0027 AND \u00272022-03-31\u0027 GROUP BY C.CauseName ORDER BY Q1Funding DESC;", + "sql_explanation": "This query filters donations made in Q1 2022, joins the Donations and Causes tables on the CauseID foreign key, groups the results by the CauseName column, calculates the sum of DonationAmount for each group, and orders the results in descending order by Q1Funding, providing the causes that received the most funding in Q1 2022." +}, { + "id": "792", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 5 donors by summed donation amounts for each country?", + "sql_context": "CREATE TABLE Donors (DonorID INT, Country VARCHAR(50)); INSERT INTO Donors (DonorID, Country) VALUES (1, \u0027USA\u0027), (2, \u0027Canada\u0027), (3, \u0027Mexico\u0027), (4, \u0027Australia\u0027), (5, \u0027UK\u0027), (6, \u0027USA\u0027), (7, \u0027Canada\u0027), (8, \u0027Mexico\u0027), (9, \u0027Germany\u0027), (10, \u0027USA\u0027); CREATE TABLE Donations (DonationID INT, DonorID INT, Amount DECIMAL(10, 2)); INSERT INTO Donations (DonationID, DonorID, Amount) VALUES (1, 1, 500), (2, 1, 250), (3, 2, 300), (4, 3, 1000), (5, 3, 500), (6, 4, 200), (7, 5, 800), (8, 6, 300), (9, 6, 500), (10, 7, 250), (11, 8, 100), (12, 9, 1500), (13, 10, 400);", + "sql": "SELECT d.Country, d.DonorID, SUM(donations.Amount) AS TotalDonated FROM Donors d JOIN Donations ON d.DonorID \u003d Donations.DonorID GROUP BY d.Country, d.DonorID ORDER BY TotalDonated DESC LIMIT 5;", + "sql_explanation": "This query joins the Donors and Donations table, groups the results by Country and DonorID, calculates the total amount donated by each donor for each country, and returns the top 5 donors with the highest donation amounts for each country." +}, { + "id": "930", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average donation amount to organizations focused on climate change in 2020?", + "sql_context": "CREATE TABLE organizations (org_id INT, org_name TEXT, org_focus TEXT); INSERT INTO organizations (org_id, org_name, org_focus) VALUES (1, \u0027Clean Air Task Force\u0027, \u0027Climate Change\u0027), (2, \u0027Environmental Defense Fund\u0027, \u0027Climate Change\u0027), (3, \u0027Natural Resources Defense Council\u0027, \u0027Environment\u0027); CREATE TABLE donations (donation_id INT, donor_id INT, donation_amount FLOAT, donation_date DATE, org_id INT); INSERT INTO donations (donation_id, donor_id, donation_amount, donation_date, org_id) VALUES (1, 1, 500.00, \u00272020-01-01\u0027, 1), (2, 2, 300.00, \u00272020-02-15\u0027, 2), (3, 3, 800.00, \u00272020-03-10\u0027, 3);", + "sql": "SELECT AVG(donation_amount) FROM donations JOIN organizations ON donations.org_id \u003d organizations.org_id WHERE organizations.org_focus \u003d \u0027Climate Change\u0027 AND YEAR(donation_date) \u003d 2020;", + "sql_explanation": "This query calculates the average donation amount to organizations focused on climate change in 2020. It uses the AVG function to calculate the average donation_amount value where the org_focus is \u0027Climate Change\u0027 and the year of donation_date is 2020. The query uses the JOIN clause to combine data from the donations and organizations tables." +}, { + "id": "1091", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many donations were made by organizations based in the UK with a donation amount greater than $1000?", + "sql_context": "CREATE TABLE Donors (DonorID INT, DonorName TEXT, DonorType TEXT, Country TEXT); INSERT INTO Donors (DonorID, DonorName, DonorType, Country) VALUES (1, \u0027Bill \u0026 Melinda Gates Foundation\u0027, \u0027Organization\u0027, \u0027United States\u0027), (2, \u0027Oxfam\u0027, \u0027Organization\u0027, \u0027UK\u0027); CREATE TABLE Donations (DonationID INT, DonorID INT, DonationAmount INT); INSERT INTO Donations (DonationID, DonorID, DonationAmount) VALUES (1, 2, 5000);", + "sql": "SELECT COUNT(*) FROM Donations JOIN Donors ON Donations.DonorID \u003d Donors.DonorID WHERE Donors.DonorType \u003d \u0027Organization\u0027 AND Donors.Country \u003d \u0027UK\u0027 HAVING DonationAmount \u003e 1000;", + "sql_explanation": "The SQL query counts the number of records in the Donations table, but only for records that have a matching DonorID in the Donors table, are organizations based in the UK, and have a donation amount greater than $1000." +}, { + "id": "1095", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many donors are from the \u0027Global South\u0027 and have donated more than $500?", + "sql_context": "CREATE TABLE Donors (DonorID INT, Name VARCHAR(50), Age INT, Gender VARCHAR(20), DonationAmount DECIMAL(10,2), Location VARCHAR(50)); INSERT INTO Donors (DonorID, Name, Age, Gender, DonationAmount, Location) VALUES (2, \u0027Sofia Garcia\u0027, 45, \u0027Female\u0027, 700.00, \u0027Brazil\u0027); CREATE TABLE Countries (CountryID INT, CountryName VARCHAR(50), Continent VARCHAR(20)); INSERT INTO Countries (CountryID, CountryName, Continent) VALUES (1, \u0027Brazil\u0027, \u0027South America\u0027);", + "sql": "SELECT COUNT(DISTINCT Donors.DonorID) FROM Donors INNER JOIN Countries ON Donors.Location \u003d Countries.CountryName WHERE Continent \u003d \u0027South America\u0027 AND DonationAmount \u003e 500.00;", + "sql_explanation": "Count the number of donors from the Global South who have donated more than $500 by using the COUNT function on the DonorID column, filtering for donors from the Global South using the INNER JOIN and WHERE clauses on the Donors and Countries tables, and filtering for donations greater than $500 using the DonationAmount condition." +}, { + "id": "1199", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the count of donors by type and their respective total donation amounts, in the \u0027Donors\u0027 and \u0027Donations\u0027 tables?", + "sql_context": "CREATE TABLE Donors (id INT, name VARCHAR(50), city VARCHAR(50), state VARCHAR(50), country VARCHAR(50), type VARCHAR(10), donation_amount DECIMAL(10, 2));", + "sql": "SELECT d.type, COUNT(DISTINCT d.id) as num_donors, SUM(Donations.amount) as total_donations FROM Donors d JOIN Donations ON Donors.id \u003d Donations.donor_id GROUP BY d.type;", + "sql_explanation": "This query performs an inner join between the \u0027Donors\u0027 and \u0027Donations\u0027 tables, groups donors by type, and calculates the count of unique donors and total donation amounts for each type." +}, { + "id": "1578", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average donation amount by donors from India in the year 2020?", + "sql_context": "CREATE TABLE Donors (DonorID int, DonorName varchar(50), Country varchar(50)); INSERT INTO Donors (DonorID, DonorName, Country) VALUES (1, \u0027John Doe\u0027, \u0027USA\u0027); INSERT INTO Donors (DonorID, DonorName, Country) VALUES (2, \u0027Jane Smith\u0027, \u0027Canada\u0027); INSERT INTO Donors (DonorID, DonorName, Country) VALUES (4, \u0027Raj Patel\u0027, \u0027India\u0027); CREATE TABLE Donations (DonationID int, DonorID int, DonationAmount decimal(10,2), DonationDate date); INSERT INTO Donations (DonationID, DonorID, DonationAmount, DonationDate) VALUES (1, 1, 500, \u00272020-01-01\u0027); INSERT INTO Donations (DonationID, DonorID, DonationAmount, DonationDate) VALUES (2, 1, 750, \u00272020-05-15\u0027); INSERT INTO Donations (DonationID, DonorID, DonationAmount, DonationDate) VALUES (4, 4, 150, \u00272020-07-22\u0027);", + "sql": "SELECT AVG(DonationAmount) FROM Donations D INNER JOIN Donors DON ON D.DonorID \u003d DON.DonorID WHERE DON.Country \u003d \u0027India\u0027 AND YEAR(D.DonationDate) \u003d 2020;", + "sql_explanation": "This SQL query calculates the average donation amount by donors from India in the year 2020. It does this by summing the DonationAmount column from the Donations table, but only for records where the Donor\u0027s country is \u0027India\u0027 and the DonationDate is in the year 2020. The AVG() function is then used to calculate the average value of the DonationAmount column. The query filters on the DonationDate column using the YEAR() function to ensure only donations from the year 2020 are included." +}, { + "id": "1618", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 3 cities with the highest total donation amounts in the \u0027Donations\u0027 table, along with their respective sums.", + "sql_context": "CREATE TABLE Donations (id INT, donor_id INT, organization_id INT, amount DECIMAL(10, 2), date DATE);", + "sql": "SELECT city, SUM(amount) as total_donations FROM Donors d JOIN Donations don ON d.id \u003d don.donor_id GROUP BY city ORDER BY total_donations DESC LIMIT 3;", + "sql_explanation": "This query performs an inner join between the \u0027Donors\u0027 and \u0027Donations\u0027 tables, groups donations by city, and calculates the total donation amounts for each city. It then orders the results by total donations in descending order and returns the top 3 cities." +}, { + "id": "1693", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which donors have made donations only in the education sector?", + "sql_context": "CREATE TABLE education (donor_id INT, donation_amount DECIMAL(10,2), donation_date DATE); INSERT INTO education VALUES (1, 15000, \u00272020-01-01\u0027), (2, 12000, \u00272020-02-01\u0027), (3, 9000, \u00272020-03-01\u0027), (4, 6000, \u00272020-04-01\u0027), (5, 3000, \u00272020-05-01\u0027); CREATE TABLE global_health (donor_id INT, donation_amount DECIMAL(10,2), donation_date DATE); INSERT INTO global_health VALUES (1, 20000, \u00272020-01-01\u0027), (2, 18000, \u00272020-02-01\u0027), (3, 16000, \u00272020-03-01\u0027), (6, 14000, \u00272020-03-01\u0027);", + "sql": "SELECT education.donor_id FROM education LEFT JOIN global_health ON education.donor_id \u003d global_health.donor_id WHERE global_health.donor_id IS NULL;", + "sql_explanation": "This SQL query first creates two tables, education and global_health, each containing donor information. It then performs a LEFT JOIN on these tables based on the donor_id. By checking for NULL values in the global_health table, the query identifies donors who have made donations only in the education sector and returns their donor_id." +}, { + "id": "2420", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum donation amount made by donors from Africa?", + "sql_context": "CREATE TABLE Donors (DonorID INT, DonorName TEXT, DonorType TEXT, Country TEXT); INSERT INTO Donors (DonorID, DonorName, DonorType, Country) VALUES (1, \u0027Aisha Mohammed\u0027, \u0027Individual\u0027, \u0027Nigeria\u0027), (2, \u0027Anthony Nwosu\u0027, \u0027Individual\u0027, \u0027Nigeria\u0027); CREATE TABLE Donations (DonationID INT, DonorID INT, DonationAmount INT); INSERT INTO Donations (DonationID, DonorID, DonationAmount) VALUES (1, 1, 250), (2, 1, 500), (3, 2, 100);", + "sql": "SELECT MIN(DonationAmount) FROM Donations JOIN Donors ON Donations.DonorID \u003d Donors.DonorID WHERE Donors.Country \u003d \u0027Nigeria\u0027;", + "sql_explanation": "The SQL query retrieves the minimum donation amount from the DonationAmount column in the Donations table, but only for records that have a matching DonorID in the Donors table and are from Nigeria." +}, { + "id": "203", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many telemedicine consultations were conducted in 2022 for patients with diabetes in rural clinic Y?", + "sql_context": "CREATE TABLE clinics (clinic_id INT, clinic_name VARCHAR(50)); CREATE TABLE telemedicine (telemed_id INT, patient_id INT, consultation_date DATE, diagnosis VARCHAR(50)); CREATE TABLE patients (patient_id INT, patient_name VARCHAR(50), residence_area VARCHAR(50), medical_condition VARCHAR(50));", + "sql": "SELECT COUNT(*) FROM telemedicine INNER JOIN patients ON telemedicine.patient_id \u003d patients.patient_id WHERE telemedicine.consultation_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-12-31\u0027 AND patients.residence_area \u003d \u0027rural clinic Y\u0027 AND patients.medical_condition \u003d \u0027diabetes\u0027;", + "sql_explanation": "This query joins telemedicine and patients tables, filters 2022 consultations, residence area, and medical condition, and calculates the number of telemedicine consultations." +}, { + "id": "233", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 3 states with the highest rural hospital readmission rates for patients with diabetes in the last year.", + "sql_context": "CREATE TABLE hospitals (id INT, state TEXT); CREATE TABLE readmissions (id INT, hospital_id INT, readmission_date DATE, diagnosis TEXT);", + "sql": "SELECT h.state, COUNT(*) AS readmissions FROM hospitals h JOIN readmissions r ON h.id \u003d r.hospital_id WHERE diagnosis \u003d \u0027Diabetes\u0027 AND readmission_date BETWEEN DATE_SUB(CURDATE(), INTERVAL 1 YEAR) AND CURDATE() GROUP BY h.state ORDER BY readmissions DESC LIMIT 3;", + "sql_explanation": "We join the hospitals and readmissions tables based on the hospital_id. We then filter the rows with diabetes diagnosis and readmission date within the last year. Finally, we group the data by state and count the number of readmissions, then order the results by readmissions and limit the output to the top 3 states." +}, { + "id": "259", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many healthcare professionals are there in rural counties by state?", + "sql_context": "CREATE TABLE states (state_abbr CHAR(2), state_name VARCHAR(50)); INSERT INTO states (state_abbr, state_name) VALUES (\u0027AK\u0027, \u0027Alaska\u0027), (\u0027AL\u0027, \u0027Alabama\u0027), (\u0027AR\u0027, \u0027Arkansas\u0027); CREATE TABLE counties (county_id INT, county_name VARCHAR(50), state_abbr CHAR(2), rural BOOLEAN); INSERT INTO counties (county_id, county_name, state_abbr, rural) VALUES (1, \u0027Rural County A\u0027, \u0027AK\u0027, true), (2, \u0027Urban County B\u0027, \u0027AL\u0027, false); CREATE TABLE healthcare_professionals (professional_id INT, professional_name VARCHAR(100), county_id INT); INSERT INTO healthcare_professionals (professional_id, professional_name, county_id) VALUES (1, \u0027Professional A\u0027, 1), (2, \u0027Professional B\u0027, 2);", + "sql": "SELECT hp.county_id, hp.professional_name, hp.state_abbr, COUNT(*) as num_healthcare_professionals FROM healthcare_professionals hp JOIN counties c ON hp.county_id \u003d c.county_id WHERE c.rural \u003d true GROUP BY hp.county_id, hp.state_abbr, hp.professional_name;", + "sql_explanation": "This query joins the healthcare_professionals table with the counties table using the county_id and calculates the count of healthcare professionals per rural county by grouping on the county_id, state_abbr, and professional_name columns." +}, { + "id": "550", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a view for displaying hospital bed capacity and associated healthcare workers by hospital type", + "sql_context": "CREATE TABLE hospital_bed_capacity (id INT PRIMARY KEY, hospital_name VARCHAR(255), hospital_type VARCHAR(255), total_beds INT); CREATE TABLE healthcare_workers (id INT PRIMARY KEY, worker_name VARCHAR(255), hospital_id INT, position VARCHAR(255), years_of_experience INT);", + "sql": "CREATE VIEW hospital_worker_capacity_by_type AS SELECT h.hospital_type, h.total_beds, w.worker_name, w.position, w.years_of_experience FROM hospital_bed_capacity h JOIN healthcare_workers w ON h.id \u003d w.hospital_id;", + "sql_explanation": "Create a view called hospital_worker_capacity_by_type that displays hospital bed capacity along with associated healthcare worker information, grouped by hospital type. The view uses a JOIN to combine the hospital_bed_capacity and healthcare_workers tables based on the id and hospital_id columns." +}, { + "id": "833", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the name, location, and number of physicians for each hospital in rural areas of Texas.", + "sql_context": "CREATE TABLE hospital (hospital_id INT, name TEXT, location TEXT, beds INT); CREATE TABLE physician (physician_id INT, name TEXT, specialty TEXT, location TEXT, hospital_id INT);", + "sql": "SELECT a.name, a.location, COUNT(b.physician_id) FROM hospital a INNER JOIN physician b ON a.hospital_id \u003d b.hospital_id WHERE a.location LIKE \u0027%rural%\u0027 AND a.state \u003d \u0027Texas\u0027 GROUP BY a.name;", + "sql_explanation": "This query joins the hospital and physician tables on the hospital_id column, filters the resulting rows based on the hospital location and state columns to include only hospitals in rural areas of Texas, then returns the name, location, and number of physicians for each hospital." +}, { + "id": "975", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of patients in each hospital in rural Oregon?", + "sql_context": "CREATE TABLE hospitals (id INT, name TEXT, location TEXT); CREATE TABLE patients (id INT, name TEXT, hospital_id INT); INSERT INTO hospitals (id, name, location) VALUES (1, \u0027Hospital A\u0027, \u0027Rural Oregon\u0027); INSERT INTO hospitals (id, name, location) VALUES (7, \u0027Hospital G\u0027, \u0027Rural Oregon\u0027); INSERT INTO patients (id, name, hospital_id) VALUES (1, \u0027Patient A\u0027, 1); INSERT INTO patients (id, name, hospital_id) VALUES (2, \u0027Patient B\u0027, 1); INSERT INTO patients (id, name, hospital_id) VALUES (3, \u0027Patient C\u0027, 7);", + "sql": "SELECT hospitals.name, COUNT(patients.id) FROM hospitals INNER JOIN patients ON hospitals.id \u003d patients.hospital_id WHERE hospitals.location \u003d \u0027Rural Oregon\u0027 GROUP BY hospitals.name;", + "sql_explanation": "This query performs an inner join between the hospitals and patients tables on the hospital_id column. It then selects the name column from the hospitals table and the count of the id column from the patients table. The WHERE clause filters the results to only hospitals in \u0027Rural Oregon\u0027. Finally, the GROUP BY clause groups the results by the hospitals.name column, resulting in the number of patients in each hospital in rural Oregon." +}, { + "id": "1400", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of healthcare workers in each rural area?", + "sql_context": "CREATE TABLE rural_areas (id INT, name TEXT, num_workers INT, avg_age FLOAT); INSERT INTO rural_areas (id, name, num_workers, avg_age) VALUES (1, \u0027Rural Area 1\u0027, 100, 49.5), (2, \u0027Rural Area 2\u0027, 125, 47.8); CREATE TABLE rural_clinics (id INT, name TEXT, location TEXT, num_workers INT, avg_age FLOAT); INSERT INTO rural_clinics (id, name, location, num_workers, avg_age) VALUES (1, \u0027Rural Clinic A\u0027, \u0027Rural Area 1\u0027, 10, 45.3), (2, \u0027Rural Clinic B\u0027, \u0027Rural Area 2\u0027, 15, 42.8);", + "sql": "SELECT r.name, SUM(rural_clinics.num_workers + rural_areas.num_workers) FROM rural_areas r JOIN rural_clinics ON r.name \u003d rural_clinics.location GROUP BY r.name;", + "sql_explanation": "This query joins the \u0027rural_areas\u0027 and \u0027rural_clinics\u0027 tables on the \u0027location\u0027 column and then calculates the sum of \u0027num_workers\u0027 for each rural area using the SUM() function, providing the total number of healthcare workers in each rural area." +}, { + "id": "1449", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many patients are there in each hospital, and what is the average age of those patients?", + "sql_context": "CREATE TABLE hospitals (id INT, name TEXT, state TEXT); INSERT INTO hospitals (id, name, state) VALUES (1, \u0027Hospital A\u0027, \u0027California\u0027), (2, \u0027Hospital B\u0027, \u0027Texas\u0027); CREATE TABLE patients (id INT, name TEXT, age INT, hospital_id INT); INSERT INTO patients (id, name, age, hospital_id) VALUES (1, \u0027John Doe\u0027, 65, 1), (2, \u0027Jane Smith\u0027, 45, 1), (3, \u0027Bob Johnson\u0027, 35, 2);", + "sql": "SELECT hospitals.name, COUNT(patients.id), AVG(patients.age) FROM patients INNER JOIN hospitals ON patients.hospital_id \u003d hospitals.id GROUP BY hospitals.name;", + "sql_explanation": "This query joins the patients and hospitals tables on the hospital_id column. It then groups the results by hospital and calculates the number of patients and average age of those patients for each hospital." +}, { + "id": "1533", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of patients with diabetes in rural areas?", + "sql_context": "CREATE TABLE patients (id INT, name TEXT, age INT, rural BOOLEAN); INSERT INTO patients (id, name, age, rural) VALUES (1, \u0027John Doe\u0027, 65, true), (2, \u0027Jane Smith\u0027, 45, false); CREATE TABLE diseases (id INT, patient_id INT, name TEXT); INSERT INTO diseases (id, patient_id, name) VALUES (1, 1, \u0027Diabetes\u0027), (2, 2, \u0027Asthma\u0027);", + "sql": "SELECT AVG(patients.age) FROM patients INNER JOIN diseases ON patients.id \u003d diseases.patient_id WHERE patients.rural \u003d true AND diseases.name \u003d \u0027Diabetes\u0027;", + "sql_explanation": "This query joins the patients and diseases tables on the patient_id column. It then filters for patients in rural areas who have diabetes, and calculates the average age of those patients." +}, { + "id": "1730", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of medical professionals working in rural hospitals in the state of New York?", + "sql_context": "CREATE TABLE medical_professionals (id INT, name VARCHAR(50), hospital_id INT); CREATE TABLE hospitals (id INT, name VARCHAR(50), location VARCHAR(50)); INSERT INTO medical_professionals (id, name, hospital_id) VALUES (1, \u0027Dr. Smith\u0027, 1), (2, \u0027Dr. Johnson\u0027, 1), (3, \u0027Dr. Lee\u0027, 2); INSERT INTO hospitals (id, name, location) VALUES (1, \u0027Hospital A\u0027, \u0027New York\u0027), (2, \u0027Hospital B\u0027, \u0027New York\u0027);", + "sql": "SELECT COUNT(*) FROM medical_professionals JOIN hospitals ON medical_professionals.hospital_id \u003d hospitals.id WHERE hospitals.location \u003d \u0027New York\u0027;", + "sql_explanation": "This query counts the total number of medical professionals working in rural hospitals in the state of New York. It does this by joining the medical_professionals table and the hospitals table on the hospital_id column. It then filters the results to only include rows where the location is \u0027New York\u0027. It then uses the COUNT function to count the number of rows that meet this criteria." +}, { + "id": "1742", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of hospital beds in rural hospitals per state?", + "sql_context": "CREATE TABLE states (state_abbr CHAR(2), state_name VARCHAR(50)); INSERT INTO states (state_abbr, state_name) VALUES (\u0027AK\u0027, \u0027Alaska\u0027), (\u0027AL\u0027, \u0027Alabama\u0027), (\u0027AR\u0027, \u0027Arkansas\u0027); CREATE TABLE hospitals (hospital_id INT, hospital_name VARCHAR(100), rural BOOLEAN, num_beds INT); INSERT INTO hospitals (hospital_id, hospital_name, rural, num_beds) VALUES (1, \u0027Rural Hospital A\u0027, true, 50), (2, \u0027Urban Hospital B\u0027, false, 100); CREATE TABLE hospital_location (hospital_id INT, state_abbr CHAR(2)); INSERT INTO hospital_location (hospital_id, state_abbr) VALUES (1, \u0027AK\u0027), (2, \u0027AL\u0027);", + "sql": "SELECT hl.state_abbr, h.hospital_name, h.num_beds FROM hospitals h JOIN hospital_location hl ON h.hospital_id \u003d hl.hospital_id WHERE h.rural \u003d true;", + "sql_explanation": "This query joins the hospitals table with the hospital_location table using the hospital_id and filters on the rural column with a value of true to retrieve the number of hospital beds in rural hospitals per state." +}, { + "id": "74", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 5 cities with the most user engagement on our platform, in terms of likes, shares, and comments, for the year 2022?", + "sql_context": "CREATE TABLE cities (city_id INT, city_name VARCHAR(255));CREATE TABLE user_activity (activity_id INT, user_id INT, city_id INT, activity_type VARCHAR(50), activity_date DATE);", + "sql": "SELECT c.city_name, SUM(CASE WHEN activity_type IN (\u0027like\u0027, \u0027share\u0027, \u0027comment\u0027) THEN 1 ELSE 0 END) as total_engagement FROM cities c JOIN user_activity ua ON c.city_id \u003d ua.city_id WHERE ua.activity_date \u003e\u003d \u00272022-01-01\u0027 AND ua.activity_date \u003c \u00272023-01-01\u0027 GROUP BY c.city_name ORDER BY total_engagement DESC LIMIT 5;", + "sql_explanation": "This query joins the cities table with the user_activity table on the city_id column. It then filters the activity records for the year 2022, and groups the results by city name. The query calculates the total engagement by summing the records where the activity_type is either \u0027like\u0027, \u0027share\u0027, or \u0027comment\u0027. It finally orders the cities by total engagement and returns the top 5." +}, { + "id": "197", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of unique users who engaged with posts with the hashtag \u0027#music\u0027 or \u0027#dance\u0027 in the \u0027Europe\u0027 region in the last 6 months?", + "sql_context": "CREATE TABLE engagements (engagement_id INT, post_id INT, user_id INT, engagement_date DATE); CREATE TABLE posts (post_id INT, user_id INT, hashtags VARCHAR(255), region VARCHAR(255)); INSERT INTO engagements (engagement_id, post_id, user_id, engagement_date) VALUES (1, 1, 1, \u00272021-08-01\u0027); INSERT INTO posts (post_id, user_id, hashtags, region) VALUES (1, 1, \u0027#music #europe\u0027, \u0027Europe\u0027);", + "sql": "SELECT COUNT(DISTINCT engagements.user_id) FROM engagements JOIN posts ON engagements.post_id \u003d posts.post_id WHERE (posts.hashtags LIKE \u0027%#music%\u0027 OR posts.hashtags LIKE \u0027%#dance%\u0027) AND posts.region \u003d \u0027Europe\u0027 AND engagements.engagement_date \u003e\u003d NOW() - INTERVAL 6 MONTH;", + "sql_explanation": "This query calculates the total number of unique users who engaged with posts with the hashtag \u0027#music\u0027 or \u0027#dance\u0027 in the \u0027Europe\u0027 region in the last 6 months. It uses an inner join to combine the engagements and posts tables, then filters for engagements with a matching post hashtag and region, and within the specified date range. The query selects the distinct number of user IDs to find the total." +}, { + "id": "473", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of likes for posts containing the hashtag \"#veganfood\" per month for the last two years?", + "sql_context": "CREATE TABLE posts (id INT, user_id INT, content TEXT, likes INT, timestamp DATETIME); INSERT INTO posts (id, user_id, content, likes, timestamp) VALUES (1, 1, \u0027Vegan food recipe\u0027, 350, \u00272020-01-01 10:00:00\u0027), (2, 2, \u0027Delicious vegan meal\u0027, 180, \u00272020-01-05 15:30:00\u0027);", + "sql": "SELECT EXTRACT(MONTH FROM timestamp) AS month, MAX(likes) FROM posts JOIN hashtags ON posts.id \u003d hashtags.post_id WHERE hashtag \u003d \u0027#veganfood\u0027 AND timestamp BETWEEN DATE_SUB(NOW(), INTERVAL 2 YEAR) AND NOW() GROUP BY month;", + "sql_explanation": "Determining the maximum number of likes for posts with the hashtag \"#veganfood\" per month for the last two years. Joins the posts and hashtags tables, filters for the specified hashtag and date range, extracts the month from the timestamp, and calculates the maximum likes for each month using the MAX function and GROUP BY clause." +}, { + "id": "636", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the sum of all advertising spend by companies from the United Kingdom, in February 2022?", + "sql_context": "CREATE TABLE companies (id INT, name TEXT, country TEXT); INSERT INTO companies (id, name, country) VALUES (1, \u0027Alpha\u0027, \u0027United Kingdom\u0027), (2, \u0027Beta\u0027, \u0027United Kingdom\u0027), (3, \u0027Gamma\u0027, \u0027Canada\u0027), (4, \u0027Delta\u0027, \u0027Australia\u0027); CREATE TABLE ad_spend (company_id INT, amount DECIMAL, date DATE); INSERT INTO ad_spend (company_id, amount, date) VALUES (1, 1500, \u00272022-02-01\u0027), (1, 1200, \u00272022-02-05\u0027), (2, 1800, \u00272022-02-03\u0027), (3, 800, \u00272022-02-04\u0027), (4, 1000, \u00272022-03-04\u0027);", + "sql": "SELECT SUM(ad_spend.amount) FROM ad_spend JOIN companies ON ad_spend.company_id \u003d companies.id WHERE companies.country \u003d \u0027United Kingdom\u0027 AND ad_spend.date \u003e\u003d \u00272022-02-01\u0027 AND ad_spend.date \u003c\u003d \u00272022-02-28\u0027;", + "sql_explanation": "1. Joins the ad_spend and companies tables on the company_id and id columns respectively. 2. Filters the data for companies from the United Kingdom. 3. Filters the data for ad spend in February 2022. 4. Calculates the sum of ad spend for the matching rows." +}, { + "id": "646", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many users have posted content related to \"climate change\" in the past week, and what is their average age?", + "sql_context": "CREATE TABLE users (user_id INT, age INT, gender VARCHAR(50));CREATE TABLE posts (post_id INT, user_id INT, content TEXT, post_date DATE); INSERT INTO users (user_id, age, gender) VALUES (1, 25, \u0027female\u0027), (2, 35, \u0027male\u0027); INSERT INTO posts (post_id, user_id, content, post_date) VALUES (1, 1, \u0027climate change is real\u0027, \u00272023-02-25\u0027), (2, 1, \u0027save the planet\u0027, \u00272023-02-23\u0027);", + "sql": "SELECT AVG(age) as avg_age, COUNT(DISTINCT user_id) as num_users FROM users JOIN posts ON users.user_id \u003d posts.user_id WHERE content LIKE \u0027%climate change%\u0027 AND post_date \u003e\u003d DATEADD(day, -7, CURRENT_DATE);", + "sql_explanation": "This query calculates the number of unique users who have posted about climate change in the past week, as well as their average age, by joining the posts and users tables on the user_id. It then filters the data by content and post_date, and finally calculates the AVG of age and COUNT of user_id." +}, { + "id": "669", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of posts made by users from the Middle East, grouped by day of the week.", + "sql_context": "CREATE TABLE posts (id INT, user_id INT, post_date DATE); INSERT INTO posts (id, user_id, post_date) VALUES (1, 1, \u00272021-01-01\u0027), (2, 2, \u00272021-01-02\u0027), (3, 3, \u00272021-01-03\u0027); CREATE TABLE users (id INT, country VARCHAR(50)); INSERT INTO users (id, country) VALUES (1, \u0027Iran\u0027), (2, \u0027Saudi Arabia\u0027), (3, \u0027Turkey\u0027);", + "sql": "SELECT DATE_FORMAT(post_date, \u0027%W\u0027) as day_of_week, COUNT(*) as post_count FROM posts JOIN users ON posts.user_id \u003d users.id WHERE users.country IN (\u0027Iran\u0027, \u0027Saudi Arabia\u0027, \u0027Turkey\u0027) GROUP BY day_of_week;", + "sql_explanation": "This query first joins the posts table with the users table to get the country of each user. It then extracts the day of the week from the post date and groups the data by the day of the week to calculate the number of posts made on each day of the week." +}, { + "id": "708", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many posts were made by each user in the last month?", + "sql_context": "CREATE TABLE users (user_id INT, user_name TEXT);CREATE TABLE posts (post_id INT, post_text TEXT, user_id INT, post_date DATE);", + "sql": "SELECT u.user_id, u.user_name, COUNT(p.post_id) as posts_last_month FROM users u JOIN posts p ON u.user_id \u003d p.user_id WHERE p.post_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH) GROUP BY u.user_id;", + "sql_explanation": "1. Joins the users and posts tables on their respective IDs. 2. Filters the data for posts made in the last month. 3. Groups the results by user ID. 4. Counts the number of posts per user. 5. Returns the number of posts made by each user in the last month." +}, { + "id": "744", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the count of distinct users who have interacted with content related to the hashtag \"#technology\" in the last month?", + "sql_context": "CREATE TABLE users (id INT); CREATE TABLE posts (id INT, user_id INT, hashtags TEXT);", + "sql": "SELECT COUNT(DISTINCT users.id) FROM users INNER JOIN posts ON users.id \u003d posts.user_id WHERE FIND_IN_SET(\u0027technology\u0027, posts.hashtags) \u003e 0 AND posts.created_at \u003e\u003d DATE_SUB(NOW(), INTERVAL 1 MONTH);", + "sql_explanation": "The SQL query joins the users and posts tables on the user_id and id columns, respectively. It then filters the results to only include posts created in the last month and containing the hashtag \"#technology\". Finally, it counts the number of unique users who interacted with this content." +}, { + "id": "755", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many distinct users liked posts containing the hashtag #climatechange in the past month?", + "sql_context": "CREATE TABLE likes (id INT, user_id INT, post_id INT, timestamp DATETIME);", + "sql": "SELECT COUNT(DISTINCT user_id) FROM likes JOIN posts ON likes.post_id \u003d posts.id WHERE posts.content LIKE \u0027%#climatechange%\u0027 AND posts.timestamp BETWEEN DATE_SUB(NOW(), INTERVAL 1 MONTH) AND NOW();", + "sql_explanation": "Count the distinct \u0027user_id\u0027 values that liked posts containing \u0027#climatechange\u0027 in the past month." +}, { + "id": "786", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get the top 2 languages with the most posts related to AI.", + "sql_context": "CREATE TABLE users (id INT, name VARCHAR(255), country VARCHAR(255), language VARCHAR(10)); CREATE TABLE posts (id INT, user_id INT, content TEXT, created_at TIMESTAMP);", + "sql": "SELECT posts.language, COUNT(posts.id) AS posts_count FROM posts JOIN users ON users.id \u003d posts.user_id WHERE posts.content LIKE \u0027%AI%\u0027 GROUP BY posts.language ORDER BY posts_count DESC LIMIT 2;", + "sql_explanation": "This query retrieves the top 2 languages with the most posts related to AI. It joins the users and posts tables, filters the posts by the specified keyword, groups the results by language, calculates the count of posts for each language, and orders the results in descending order by the count of posts. The query then limits the results to the top 2 languages." +}, { + "id": "890", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the total number of users and posts for each location in the social_media database.", + "sql_context": "CREATE TABLE users (user_id INT PRIMARY KEY, username VARCHAR(255), location VARCHAR(255)); INSERT INTO users (user_id, username, location) VALUES (1, \u0027user1\u0027, \u0027NYC\u0027), (2, \u0027user2\u0027, \u0027LA\u0027), (3, \u0027user3\u0027, \u0027NYC\u0027), (4, \u0027user4\u0027, \u0027SF\u0027);CREATE TABLE posts (post_id INT PRIMARY KEY, user_id INT, content TEXT); INSERT INTO posts (post_id, user_id, content) VALUES (1, 1, \u0027Hello World\u0027), (2, 1, \u0027Post 2\u0027), (3, 2, \u0027LA post\u0027), (4, 2, \u0027Post 4\u0027), (5, 2, \u0027LA again\u0027), (6, 3, \u0027NYC post\u0027), (7, 4, \u0027SF post\u0027);", + "sql": "SELECT users.location, COUNT(DISTINCT users.user_id) AS user_count, COUNT(posts.post_id) AS post_count FROM users LEFT JOIN posts ON users.user_id \u003d posts.user_id GROUP BY users.location;", + "sql_explanation": "First, we perform a LEFT JOIN between the users and posts table on the user_id column. Then, we apply a COUNT function to the DISTINCT user_id column and the post_id column, grouping the results by location. This returns the total number of users and posts for each unique location in the social_media database." +}, { + "id": "1066", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of likes on posts by users located in the United States, for posts created in the last month?", + "sql_context": "CREATE TABLE users (id INT, location VARCHAR(50)); CREATE TABLE posts (id INT, user_id INT, likes INT, created_at DATETIME);", + "sql": "SELECT AVG(posts.likes) FROM posts INNER JOIN users ON posts.user_id \u003d users.id WHERE users.location \u003d \u0027United States\u0027 AND posts.created_at \u003e\u003d DATE_SUB(NOW(), INTERVAL 1 MONTH);", + "sql_explanation": "The SQL query joins the users and posts tables on the user_id and id columns, respectively. It then filters the results to only include posts created in the last month and users located in the United States. Finally, it calculates the average number of likes on these posts." +}, { + "id": "1241", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many followers do users have who posted content related to \"climate change\" in the last 30 days?", + "sql_context": "CREATE TABLE users (id INT, followers INT); CREATE TABLE posts (id INT, user_id INT, content VARCHAR(255), post_date DATE); INSERT INTO users (id, followers) VALUES (1, 5000); INSERT INTO posts (id, user_id, content, post_date) VALUES (1, 1, \u0027Climate change is real\u0027, \u00272022-03-15\u0027);", + "sql": "SELECT users.followers FROM users JOIN posts ON users.id \u003d posts.user_id WHERE posts.content ILIKE \u0027%climate change%\u0027 AND posts.post_date \u003e\u003d NOW() - INTERVAL \u002730 days\u0027;", + "sql_explanation": "This query calculates the number of followers for users who posted content related to \"climate change\" in the last 30 days. It does this by joining the users and posts tables on the user_id column, filtering for posts made in the last 30 days and with content related to \"climate change\", and then returning the number of followers for those users." +}, { + "id": "1336", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 3 most popular content types in terms of ad impressions.", + "sql_context": "CREATE TABLE content_types (content_type VARCHAR(50), ad_id INT); INSERT INTO content_types (content_type, ad_id) VALUES (\u0027video\u0027, 1), (\u0027image\u0027, 2), (\u0027text\u0027, 3), (\u0027video\u0027, 4), (\u0027image\u0027, 5), (\u0027text\u0027, 6);", + "sql": "SELECT content_type, COUNT(*) as impressions FROM content_types JOIN ads ON content_types.ad_id \u003d ads.ad_id GROUP BY content_type ORDER BY impressions DESC LIMIT 3;", + "sql_explanation": "The SQL query joins the content_types table with the ads table to associate content types with ad_ids. Then, it groups the resulting table by content_type and counts the number of impressions for each content type. Finally, it orders the groups by impressions in descending order and returns the top 3 groups with the highest number of impressions." +}, { + "id": "2152", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many users from country \u0027US\u0027 liked post ID 1001?", + "sql_context": "CREATE TABLE users (id INT, country VARCHAR(2)); INSERT INTO users (id, country) VALUES (1, \u0027US\u0027), (2, \u0027CA\u0027); CREATE TABLE post_likes (user_id INT, post_id INT); INSERT INTO post_likes (user_id, post_id) VALUES (1, 1001), (3, 1001), (4, 1002);", + "sql": "SELECT COUNT(*) FROM users JOIN post_likes ON users.id \u003d post_likes.user_id WHERE users.country \u003d \u0027US\u0027 AND post_likes.post_id \u003d 1001;", + "sql_explanation": "The SQL query joins the users and post_likes tables on the user_id column, filters for users from the US and post_id 1001, and then counts the number of matching records." +}, { + "id": "3084", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many users joined from Africa in the last 6 months?", + "sql_context": "CREATE TABLE users (id INT, join_date DATE, region VARCHAR(10)); INSERT INTO users (id, join_date, region) VALUES (1, \u00272022-01-01\u0027, \u0027Africa\u0027), (2, \u00272022-02-01\u0027, \u0027Europe\u0027), (3, \u00272022-03-01\u0027, \u0027Asia\u0027), (4, \u00272022-04-01\u0027, \u0027Africa\u0027), (5, \u00272022-06-01\u0027, \u0027Africa\u0027);", + "sql": "SELECT COUNT(*) FROM users WHERE region \u003d \u0027Africa\u0027 AND join_date \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 6 MONTH);", + "sql_explanation": "The SQL query filters the users table for Africa and the last 6 months, and then counts the number of matching records." +}, { + "id": "3259", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many users joined Twitter from India in the last year?", + "sql_context": "CREATE TABLE user_data (user_id INT, join_date DATE, country VARCHAR(50)); INSERT INTO user_data (user_id, join_date, country) VALUES (1, \u00272021-01-01\u0027, \u0027India\u0027), (2, \u00272022-01-02\u0027, \u0027USA\u0027), (3, \u00272021-06-03\u0027, \u0027India\u0027);", + "sql": "SELECT COUNT(*) FROM user_data WHERE country \u003d \u0027India\u0027 AND join_date \u003e\u003d DATEADD(year, -1, GETDATE());", + "sql_explanation": "This query counts the number of users who joined Twitter from India in the last year. It does so by filtering the user_data table for country \u0027India\u0027 and join_dates within the last year, and then using the COUNT(*) function to count the number of rows returned." +}, { + "id": "436", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the collective bargaining agreements for the \u0027Construction Workers Union\u0027 and \u0027Teachers Union\u0027.", + "sql_context": "CREATE TABLE CollectiveBargaining (CBAID INT, UnionID INT, AgreementDate DATE); INSERT INTO CollectiveBargaining (CBAID, UnionID, AgreementDate) VALUES (1, 1, \u00272020-01-01\u0027), (2, 2, \u00272019-06-15\u0027), (3, 3, \u00272018-09-01\u0027);", + "sql": "SELECT Unions.UnionName, CollectiveBargaining.AgreementDate FROM Unions JOIN CollectiveBargaining ON Unions.UnionID \u003d CollectiveBargaining.UnionID WHERE Unions.UnionName IN (\u0027Construction Workers Union\u0027, \u0027Teachers Union\u0027);", + "sql_explanation": "The SQL query lists the collective bargaining agreements for the \u0027Construction Workers Union\u0027 and \u0027Teachers Union\u0027 by joining the Unions and CollectiveBargaining tables on the UnionID column. It then filters the records where the UnionName is either \u0027Construction Workers Union\u0027 or \u0027Teachers Union\u0027." +}, { + "id": "536", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all unique workplace names from the \u0027workplace_data\u0027 table that do not have any records in the \u0027collective_bargaining\u0027 table.", + "sql_context": "CREATE TABLE workplace_data (workplace_id INT, workplace_name TEXT); CREATE TABLE collective_bargaining (agreement_status TEXT, workplace_id INT);", + "sql": "SELECT DISTINCT workplace_data.workplace_name FROM workplace_data LEFT JOIN collective_bargaining ON workplace_data.workplace_id \u003d collective_bargaining.workplace_id WHERE collective_bargaining.workplace_id IS NULL;", + "sql_explanation": "This query performs a left join on the \u0027workplace_data\u0027 and \u0027collective_bargaining\u0027 tables, matching rows based on the \u0027workplace_id\u0027 column. It then filters the result set to include only rows where the \u0027workplace_id\u0027 column in the \u0027collective_bargaining\u0027 table is NULL, indicating that there are no matching rows in the \u0027collective_bargaining\u0027 table. Finally, the query selects the distinct values of the \u0027workplace_name\u0027 column from the \u0027workplace_data\u0027 table." +}, { + "id": "548", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage change in Union membership from 2020 to 2021 for each union?", + "sql_context": "CREATE TABLE union_membership (id INT, union_name VARCHAR(255), year INT, membership INT); INSERT INTO union_membership (id, union_name, year, membership) VALUES (1, \u0027Union A\u0027, 2020, 5000), (2, \u0027Union A\u0027, 2021, 5500), (3, \u0027Union B\u0027, 2020, 6000), (4, \u0027Union B\u0027, 2021, 6200), (5, \u0027Union C\u0027, 2020, 4000), (6, \u0027Union C\u0027, 2021, 4100);", + "sql": "SELECT u.union_name, ((m2.membership - m1.membership) * 100.0 / m1.membership) as pct_change FROM union_membership m1 JOIN union_membership m2 ON m1.union_name \u003d m2.union_name AND m1.year \u003d 2020 AND m2.year \u003d 2021;", + "sql_explanation": "This SQL query joins the union_membership table to itself, filtering the results to only include records from 2020 and 2021 for each union, and then calculates the percentage change in membership using the formula (membership_2021 - membership_2020) * 100.0 / membership_2020." +}, { + "id": "737", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average contract length for unions in the Southeast region?", + "sql_context": "CREATE TABLE collective_bargaining (bargaining_id INT, union_name VARCHAR(50), company_name VARCHAR(50), contract_start_date DATE, contract_end_date DATE);CREATE VIEW union_region AS SELECT union_name, \u0027Southeast\u0027 as region FROM collective_bargaining GROUP BY union_name;", + "sql": "SELECT AVG(DATEDIFF(contract_end_date, contract_start_date)) as avg_contract_length FROM collective_bargaining cb JOIN union_region ur ON cb.union_name \u003d ur.union_name WHERE ur.region \u003d \u0027Southeast\u0027;", + "sql_explanation": "This SQL query calculates the average length of collective bargaining contracts for unions in the Southeast region by joining the \u0027collective_bargaining\u0027 table with the \u0027union_region\u0027 view on the union_name column. It then uses the AVG function on the difference between the contract end date and contract start date, filtering for the Southeast region using the WHERE clause." +}, { + "id": "897", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of employees in each department in the \u0027hr\u0027 schema, sorted by the number of employees in descending order?", + "sql_context": "CREATE SCHEMA hr; CREATE TABLE departments (id INT, name VARCHAR); INSERT INTO departments VALUES (1, \u0027Marketing\u0027); CREATE TABLE employees (id INT, name VARCHAR, department_id INT); INSERT INTO employees VALUES (1, \u0027John Doe\u0027, 1);", + "sql": "SELECT departments.name, COUNT(*) AS num_employees FROM hr.departments JOIN hr.employees ON departments.id \u003d employees.department_id GROUP BY departments.name ORDER BY num_employees DESC;", + "sql_explanation": "First, a join is performed between \u0027departments\u0027 and \u0027employees\u0027 tables on \u0027department_id\u0027 column. Then, COUNT(*) function is used to count the number of employees for each department. Finally, the departments are sorted by the number of employees in descending order." +}, { + "id": "920", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum contract length for \u0027Transportation\u0027 union collective bargaining agreements?", + "sql_context": "CREATE TABLE CollectiveBargaining (agreement_id INT, union_id INT, terms TEXT, contract_length INT); CREATE TABLE Unions (union_id INT, industry TEXT);", + "sql": "SELECT MAX(CollectiveBargaining.contract_length) FROM CollectiveBargaining INNER JOIN Unions ON CollectiveBargaining.union_id \u003d Unions.union_id WHERE Unions.industry \u003d \u0027Transportation\u0027;", + "sql_explanation": "The SQL query performs an inner join on the \u0027CollectiveBargaining\u0027 and \u0027Unions\u0027 tables, based on their \u0027union_id\u0027 column. It then filters for \u0027Transportation\u0027 unions and calculates the maximum contract length for their collective bargaining agreements." +}, { + "id": "1396", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of workplace safety violations recorded for each union in Texas?", + "sql_context": "CREATE TABLE unions (id INT, name VARCHAR(255), state VARCHAR(255)); CREATE TABLE safety_violations (id INT, union_id INT, violation_count INT); INSERT INTO unions (id, name, state) VALUES (1, \u0027AFSCME\u0027, \u0027Texas\u0027); INSERT INTO safety_violations (id, union_id, violation_count) VALUES (1, 1, 75);", + "sql": "SELECT u.name, SUM(sv.violation_count) as total_violations FROM unions u JOIN safety_violations sv ON u.id \u003d sv.union_id WHERE u.state \u003d \u0027Texas\u0027 GROUP BY u.name;", + "sql_explanation": "This query joins the unions and safety_violations tables on the union_id column, then filters for rows where the state is Texas. It then groups the result by union name and sums the violation_count column for each group." +}, { + "id": "1453", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of union members in the \u0027Safety Advocates\u0027 union?", + "sql_context": "CREATE TABLE Unions (UnionID INT, UnionName VARCHAR(20)); INSERT INTO Unions (UnionID, UnionName) VALUES (1, \u0027Construction Workers Union\u0027), (2, \u0027Safety Advocates\u0027), (3, \u0027Teachers Union\u0027);", + "sql": "SELECT COUNT(*) as TotalMembers FROM UnionMembership JOIN Unions ON UnionMembership.UnionID \u003d Unions.UnionID WHERE Unions.UnionName \u003d \u0027Safety Advocates\u0027;", + "sql_explanation": "The SQL query counts the total number of union members in the \u0027Safety Advocates\u0027 union by joining the UnionMembership and Unions tables on the UnionID column. It then filters the records where the UnionName is \u0027Safety Advocates\u0027." +}, { + "id": "2078", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of workers in unions that have collective bargaining agreements and are in the \u0027Education\u0027 industry?", + "sql_context": "CREATE TABLE unions (id INT, industry VARCHAR(255), has_cba BOOLEAN); CREATE TABLE workers (id INT, union_id INT);", + "sql": "SELECT COUNT(*) FROM workers JOIN unions ON workers.union_id \u003d unions.id WHERE unions.industry \u003d \u0027Education\u0027 AND unions.has_cba \u003d TRUE;", + "sql_explanation": "1. Join the workers and unions tables on the union_id and id columns respectively. 2. Filter the records where industry is \u0027Education\u0027 and has_cba is TRUE. 3. Count the number of remaining records, which represents the total number of workers in unions that have collective bargaining agreements and are in the education industry." +}, { + "id": "2151", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum and maximum salary for workers in unions that have collective bargaining agreements?", + "sql_context": "CREATE TABLE unions (id INT, has_cba BOOLEAN); CREATE TABLE workers (id INT, union_id INT, salary DECIMAL(10,2));", + "sql": "SELECT MIN(workers.salary), MAX(workers.salary) FROM workers JOIN unions ON workers.union_id \u003d unions.id WHERE unions.has_cba \u003d TRUE;", + "sql_explanation": "1. Join the workers and unions tables on the union_id and id columns respectively. 2. Filter the records where has_cba is TRUE. 3. Calculate the minimum and maximum values of the salary column, which represents the minimum and maximum salary for workers in unions with collective bargaining agreements." +}, { + "id": "4187", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all workers and the industries they are part of.", + "sql_context": "CREATE TABLE if not exists worker (worker_id INT, worker_name TEXT); CREATE TABLE if not exists industry (industry_id INT, industry_name TEXT, total_workers INT); INSERT INTO worker (worker_id, worker_name) VALUES (1001, \u0027John Smith\u0027), (1002, \u0027Jane Doe\u0027), (1003, \u0027Bob Johnson\u0027); INSERT INTO industry (industry_id, industry_name, total_workers) VALUES (1, \u0027manufacturing\u0027, 5000), (2, \u0027technology\u0027, 7000), (3, \u0027healthcare\u0027, 6000);", + "sql": "SELECT worker.worker_name, industry.industry_name FROM worker CROSS JOIN industry;", + "sql_explanation": "This SQL query retrieves the names of all workers and the industries they are part of by using a cross join between the \u0027worker\u0027 and \u0027industry\u0027 tables, which results in all possible combinations of worker and industry pairs." +}, { + "id": "1981", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the genetic sequence of samples with startup funding over 500000?", + "sql_context": "CREATE TABLE genetic_data (id INT, sample_id VARCHAR(20), gene_sequence TEXT); CREATE TABLE startup_funding (id INT, sample_id VARCHAR(20), funding_amount FLOAT); INSERT INTO genetic_data (id, sample_id, gene_sequence) VALUES (1, \u0027GD001\u0027, \u0027ATGCGA...\u0027), (2, \u0027GD002\u0027, \u0027ATGCGC...\u0027); INSERT INTO startup_funding (id, sample_id, funding_amount) VALUES (1, \u0027GD001\u0027, 700000.0), (2, \u0027GD002\u0027, 300000.0);", + "sql": "SELECT gd.gene_sequence FROM genetic_data gd INNER JOIN startup_funding sf ON gd.sample_id \u003d sf.sample_id WHERE sf.funding_amount \u003e 500000;", + "sql_explanation": "This query joins the genetic_data and startup_funding tables on the sample_id and filters records with funding_amount greater than 500000 to get the corresponding gene_sequence." +}, { + "id": "2087", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average funding amount for biotech startups in the Americas?", + "sql_context": "CREATE TABLE Startups (startup_id INT, startup_name TEXT, industry TEXT, total_funding FLOAT, region TEXT); CREATE VIEW BiotechStartups AS SELECT * FROM Startups WHERE industry \u003d \u0027Biotech\u0027; CREATE VIEW AmericasStartups AS SELECT * FROM Startups WHERE region \u003d \u0027Americas\u0027;", + "sql": "SELECT AVG(total_funding) FROM BiotechStartups INNER JOIN AmericasStartups ON BiotechStartups.startup_id \u003d AmericasStartups.startup_id;", + "sql_explanation": "This query retrieves the average funding amount for biotech startups in the Americas by joining the BiotechStartups and AmericasStartups views on the startup_id column and calculating the average total_funding." +}, { + "id": "350", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify systems with the highest number of high severity vulnerabilities.", + "sql_context": "CREATE TABLE vulnerabilities (id INT, date DATE, system TEXT, vulnerability TEXT, severity TEXT);INSERT INTO vulnerabilities (id, date, system, vulnerability, severity) VALUES (1, \u00272021-01-02\u0027, \u0027webserver\u0027, \u0027SQL injection\u0027, \u0027high\u0027); CREATE TABLE systems (id INT, system TEXT, location TEXT);INSERT INTO systems (id, system, location) VALUES (1, \u0027webserver\u0027, \u0027USA\u0027);", + "sql": "SELECT s.system, s.location, COUNT(v.id) as high_vulnerabilities FROM systems s JOIN vulnerabilities v ON s.system \u003d v.system WHERE v.severity \u003d \u0027high\u0027 GROUP BY s.system, s.location ORDER BY high_vulnerabilities DESC FETCH FIRST 5 ROWS ONLY;", + "sql_explanation": "This query identifies the top 5 systems with the highest number of high severity vulnerabilities. It uses a join to combine the vulnerabilities and systems table." +}, { + "id": "361", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 3 countries with the most open vulnerabilities in the last month?", + "sql_context": "CREATE TABLE vulnerabilities (id INT, title TEXT, description TEXT, country TEXT, severity TEXT, created_at DATETIME); INSERT INTO vulnerabilities (id, title, description, country, severity, created_at) VALUES (1, \u0027Vuln1\u0027, \u0027Desc1\u0027, \u0027USA\u0027, \u0027High\u0027, \u00272022-01-01 10:00:00\u0027), (2, \u0027Vuln2\u0027, \u0027Desc2\u0027, \u0027Canada\u0027, \u0027Medium\u0027, \u00272022-01-02 11:00:00\u0027); CREATE TABLE systems (id INT, name TEXT, vulnerability_id INT, country TEXT); INSERT INTO systems (id, name, vulnerability_id, country) VALUES (1, \u0027Sys1\u0027, 1, \u0027USA\u0027), (2, \u0027Sys2\u0027, 2, \u0027Canada\u0027);", + "sql": "SELECT v.country, COUNT(s.id) as open_vulnerabilities FROM vulnerabilities v JOIN systems s ON v.id \u003d s.vulnerability_id WHERE v.created_at \u003e\u003d DATE_SUB(NOW(), INTERVAL 1 MONTH) GROUP BY v.country ORDER BY open_vulnerabilities DESC LIMIT 3;", + "sql_explanation": "This query joins the \u0027vulnerabilities\u0027 and \u0027systems\u0027 tables to get the vulnerabilities and their associated countries. It then filters for vulnerabilities created in the last month and groups by country. Finally, it orders by the count of open vulnerabilities and limits the results to the top 3 countries." +}, { + "id": "478", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all unique IP addresses associated with malicious activities and the corresponding number of related incidents in the last 6 months.", + "sql_context": "CREATE TABLE malicious_ip (ip_address VARCHAR(15), incident_id INT); INSERT INTO malicious_ip (ip_address, incident_id) VALUES (\u0027192.168.0.10\u0027, 1), (\u0027192.168.0.11\u0027, 2), (\u0027192.168.0.12\u0027, 3), (\u0027192.168.0.13\u0027, 4), (\u0027192.168.0.14\u0027, 5);", + "sql": "SELECT ip_address, COUNT(*) as incident_count FROM malicious_ip JOIN incidents ON malicious_ip.incident_id \u003d incidents.incident_id WHERE incidents.incident_date \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 6 MONTH) GROUP BY ip_address;", + "sql_explanation": "The SQL query performs an equi-join between the malicious_ip and incidents tables based on their shared incident_id column. It then filters the results for records with incident_date values within the past 6 months, groups the output by ip_address, and calculates the count of incidents for each IP address." +}, { + "id": "291", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total weight of seafood deliveries from local suppliers in the Pacific Northwest region?", + "sql_context": "CREATE TABLE Suppliers (SupplierID INT, SupplierName VARCHAR(50), Region VARCHAR(50)); INSERT INTO Suppliers (SupplierID, SupplierName, Region) VALUES (1, \u0027Ocean Bounty\u0027, \u0027Pacific Northwest\u0027), (2, \u0027Green Gardens\u0027, \u0027California\u0027), (3, \u0027Sunrise Farms\u0027, \u0027Midwest\u0027); CREATE TABLE Deliveries (DeliveryID INT, SupplierID INT, DeliveryDate DATE, Weight FLOAT); INSERT INTO Deliveries (DeliveryID, SupplierID, DeliveryDate, Weight) VALUES (1, 1, \u00272022-01-01\u0027, 200), (2, 1, \u00272022-01-05\u0027, 250), (3, 2, \u00272022-01-03\u0027, 300), (4, 3, \u00272022-01-07\u0027, 150);", + "sql": "SELECT SUM(Weight) AS TotalWeight FROM Deliveries D INNER JOIN Suppliers S ON D.SupplierID \u003d S.SupplierID WHERE S.Region \u003d \u0027Pacific Northwest\u0027 AND D.DeliveryDate \u003e\u003d \u00272022-01-01\u0027 AND D.DeliveryDate \u003c \u00272022-02-01\u0027 AND S.SupplierName IN (\u0027Ocean Bounty\u0027);", + "sql_explanation": "This query joins the Deliveries and Suppliers tables on SupplierID and filters for the Pacific Northwest region and seafood deliveries in January 2022. It calculates the total weight of seafood deliveries from local suppliers in the Pacific Northwest region." +}, { + "id": "352", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the supplier with the most sustainable food products in South America.", + "sql_context": "CREATE TABLE SustainableFoodProducts (product_id INTEGER, supplier_id INTEGER, sustainability_score INTEGER); INSERT INTO SustainableFoodProducts (product_id, supplier_id, sustainability_score) VALUES (1, 1, 90); CREATE TABLE Suppliers (supplier_id INTEGER, supplier_name TEXT, country TEXT); INSERT INTO Suppliers (supplier_id, supplier_name, country) VALUES (1, \u0027Supplier A\u0027, \u0027South America\u0027);", + "sql": "SELECT supplier_name FROM SustainableFoodProducts JOIN Suppliers ON SustainableFoodProducts.supplier_id \u003d Suppliers.supplier_id WHERE Suppliers.country \u003d \u0027South America\u0027 GROUP BY supplier_name ORDER BY SUM(sustainability_score) DESC LIMIT 1;", + "sql_explanation": "This SQL query finds the supplier with the most sustainable food products in South America by selecting the supplier name from the SustainableFoodProducts table joined with the Suppliers table where the country is \u0027South America\u0027, grouping the results by supplier name, summing the sustainability scores for each group, and ordering the results in descending order by the sum of sustainability scores, then returning the top result with the LIMIT clause." +}, { + "id": "392", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the number of food safety violations for each restaurant in the \u0027safety_inspections\u0027 table, with an overall rating of \u0027excellent\u0027 or \u0027good\u0027 in the \u0027restaurant_ratings\u0027 table?", + "sql_context": "CREATE TABLE safety_inspections (restaurant_id INT, violation_count INT);CREATE TABLE restaurant_ratings (restaurant_id INT, overall_rating VARCHAR(20));", + "sql": "SELECT s.restaurant_id, SUM(s.violation_count) as total_violations FROM safety_inspections s INNER JOIN restaurant_ratings r ON s.restaurant_id \u003d r.restaurant_id WHERE r.overall_rating IN (\u0027excellent\u0027, \u0027good\u0027) GROUP BY s.restaurant_id;", + "sql_explanation": "The query joins the \u0027safety_inspections\u0027 table and \u0027restaurant_ratings\u0027 table (INNER JOIN restaurant_ratings r ON s.restaurant_id \u003d r.restaurant_id) and filters restaurants with an overall rating of \u0027excellent\u0027 or \u0027good\u0027 (WHERE r.overall_rating IN (\u0027excellent\u0027, \u0027good\u0027)). It then calculates the total number of food safety violations (SUM(s.violation_count) as total_violations) for each restaurant (GROUP BY s.restaurant_id)." +}, { + "id": "666", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which food safety records were updated in the last 7 days for products in the \u0027Seafood\u0027 category?", + "sql_context": "CREATE TABLE FoodSafetyRecords (record_id INT, product_id INT, updated_at TIMESTAMP); CREATE TABLE Products (product_id INT, product_name VARCHAR(100), category VARCHAR(50)); INSERT INTO FoodSafetyRecords (record_id, product_id, updated_at) VALUES (1, 1, \u00272022-01-01 12:00:00\u0027), (2, 2, \u00272022-01-15 14:00:00\u0027), (3, 3, \u00272022-02-01 09:00:00\u0027); INSERT INTO Products (product_id, product_name, category) VALUES (1, \u0027Salmon\u0027, \u0027Seafood\u0027), (2, \u0027Broccoli\u0027, \u0027Vegetables\u0027), (3, \u0027Bread\u0027, \u0027Bakery\u0027);", + "sql": "SELECT * FROM FoodSafetyRecords INNER JOIN Products ON FoodSafetyRecords.product_id \u003d Products.product_id WHERE Products.category \u003d \u0027Seafood\u0027 AND FoodSafetyRecords.updated_at \u003e\u003d NOW() - INTERVAL \u00277 days\u0027;", + "sql_explanation": "This query finds food safety records that have been updated in the last 7 days for products in the \u0027Seafood\u0027 category by joining the FoodSafetyRecords and Products tables on the product_id column, and then filtering the results by category and updated_at." +}, { + "id": "706", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 3 most nutritious meal categories in the \u0027Meals\u0027 and \u0027Nutrition\u0027 tables based on the average nutrient score.", + "sql_context": "CREATE TABLE Meals (meal_id INT, meal_category TEXT); CREATE TABLE Nutrition (nutrition_id INT, meal_id INT, nutrient_score FLOAT);", + "sql": "SELECT Meals.meal_category, AVG(Nutrition.nutrient_score) as avg_score FROM Meals INNER JOIN Nutrition ON Meals.meal_id \u003d Nutrition.meal_id GROUP BY Meals.meal_category ORDER BY avg_score DESC LIMIT 3;", + "sql_explanation": "This query identifies the top 3 most nutritious meal categories in the \u0027Meals\u0027 and \u0027Nutrition\u0027 tables based on the average nutrient score by performing an inner join on the meal_id column present in both tables, grouping the records based on the meal_category column, calculating the average of the nutrient_score column, and then ordering the results in descending order and limiting the output to the top 3 records." +}, { + "id": "816", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average caloric content of dishes in each cuisine type, excluding dishes with no caloric information?", + "sql_context": "CREATE TABLE CuisineTypes (CuisineTypeID INT, CuisineType VARCHAR(50));CREATE TABLE Dishes (DishID INT, DishName VARCHAR(50), CuisineTypeID INT, CaloricContent INT, HasCaloricInfo BOOLEAN); INSERT INTO CuisineTypes VALUES (1, \u0027Italian\u0027), (2, \u0027Chinese\u0027), (3, \u0027Indian\u0027); INSERT INTO Dishes VALUES (1, \u0027Pizza Margherita\u0027, 1, 500, true), (2, \u0027Spaghetti Bolognese\u0027, 1, 700, true), (3, \u0027Kung Pao Chicken\u0027, 2, 600, true), (4, \u0027Spring Rolls\u0027, 2, NULL, false), (5, \u0027Butter Chicken\u0027, 3, 800, true), (6, \u0027Palak Paneer\u0027, 3, 600, true);", + "sql": "SELECT ct.CuisineType, AVG(d.CaloricContent) as AvgCaloricContent FROM CuisineTypes ct JOIN Dishes d ON ct.CuisineTypeID \u003d d.CuisineTypeID WHERE d.HasCaloricInfo \u003d true GROUP BY ct.CuisineType;", + "sql_explanation": "This query joins the CuisineTypes and Dishes tables, filters for dishes with caloric information, groups the data by cuisine type, and then calculates the average caloric content for each cuisine type." +}, { + "id": "1108", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total number of suppliers in each country in the \u0027Suppliers\u0027 and \u0027Countries\u0027 tables.", + "sql_context": "CREATE TABLE Suppliers (supplier_id INT, supplier_name TEXT, country_id INT); CREATE TABLE Countries (country_id INT, country_name TEXT);", + "sql": "SELECT Countries.country_name, COUNT(Suppliers.supplier_id) FROM Suppliers INNER JOIN Countries ON Suppliers.country_id \u003d Countries.country_id GROUP BY Countries.country_name;", + "sql_explanation": "This query finds the total number of suppliers in each country in the \u0027Suppliers\u0027 and \u0027Countries\u0027 tables by performing an inner join on the country_id column present in both tables and then grouping the records based on the country_name column to calculate the total number of suppliers for each country." +}, { + "id": "1226", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum calorie count for gluten-free meals at casual dining restaurants?", + "sql_context": "CREATE TABLE meals (id INT, name TEXT, gluten_free BOOLEAN, restaurant_type TEXT); INSERT INTO meals (id, name, gluten_free, restaurant_type) VALUES (1, \u0027Grilled Chicken\u0027, true, \u0027casual dining\u0027), (2, \u0027Pork Tacos\u0027, false, \u0027casual dining\u0027), (3, \u0027Vegetable Stir Fry\u0027, true, \u0027casual dining\u0027); CREATE TABLE nutrition (meal_id INT, calorie_count INT); INSERT INTO nutrition (meal_id, calorie_count) VALUES (1, 500), (2, 800), (3, 600);", + "sql": "SELECT MIN(nutrition.calorie_count) FROM nutrition JOIN meals ON nutrition.meal_id \u003d meals.id WHERE meals.gluten_free \u003d true AND meals.restaurant_type \u003d \u0027casual dining\u0027;", + "sql_explanation": "This query calculates the minimum calorie count for gluten-free meals at casual dining restaurants. It does this by joining the \u0027nutrition\u0027 table with the \u0027meals\u0027 table on the \u0027meal_id\u0027 field. Then, it filters the results to only include rows where the \u0027gluten_free\u0027 field in the \u0027meals\u0027 table is true and the \u0027restaurant_type\u0027 field is \u0027casual dining\u0027. Finally, it calculates the minimum calorie count by using the MIN() function." +}, { + "id": "1836", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average calorie count and total fat for each cuisine type", + "sql_context": "CREATE TABLE cuisine (id INT, type VARCHAR(255), avg_calories DECIMAL(5,2), total_fat DECIMAL(5,2)); CREATE TABLE dishes (id INT, cuisine_id INT, name VARCHAR(255), calories DECIMAL(5,2), total_fat DECIMAL(5,2)); INSERT INTO cuisine (id, type, avg_calories, total_fat) VALUES (1, \u0027Italian\u0027, NULL, NULL), (2, \u0027Mexican\u0027, NULL, NULL); INSERT INTO dishes (id, cuisine_id, name, calories, total_fat) VALUES (1, 1, \u0027Pasta\u0027, 500, 20), (2, 1, \u0027Pizza\u0027, 800, 35), (3, 2, \u0027Tacos\u0027, 400, 15), (4, 2, \u0027Burritos\u0027, 700, 30);", + "sql": "SELECT c.type, AVG(d.calories) AS avg_calories, SUM(d.total_fat) AS total_fat FROM cuisine c JOIN dishes d ON c.id \u003d d.cuisine_id GROUP BY c.id;", + "sql_explanation": "The query joins the \u0027cuisine\u0027 and \u0027dishes\u0027 tables on the cuisine_id/id field. It then calculates the average calories and total fat for each cuisine type using the AVG() and SUM() aggregate functions, grouped by the cuisine id." +}, { + "id": "1853", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total number of meals sold for each meal category in the \u0027Meals\u0027 and \u0027Sales\u0027 tables.", + "sql_context": "CREATE TABLE Meals (meal_category TEXT, meal_id INT); CREATE TABLE Sales (sale_id INT, meal_id INT, sale_quantity INT);", + "sql": "SELECT Meals.meal_category, SUM(Sales.sale_quantity) FROM Meals INNER JOIN Sales ON Meals.meal_id \u003d Sales.meal_id GROUP BY Meals.meal_category;", + "sql_explanation": "This query calculates the total number of meals sold for each meal category in the \u0027Meals\u0027 and \u0027Sales\u0027 tables by performing an inner join on the meal_id column present in both tables and then grouping the records based on the meal_category column to calculate the total number of meals sold for each category." +}, { + "id": "2175", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average calorie intake per meal for vegetarian customers?", + "sql_context": "CREATE TABLE meals (id INT, name VARCHAR(255), customer_id INT, calories INT); INSERT INTO meals (id, name, customer_id, calories) VALUES (1, \u0027Vegetarian Pizza\u0027, 1001, 350), (2, \u0027Quinoa Salad\u0027, 1002, 400), (3, \u0027Chickpea Curry\u0027, 1001, 500); CREATE TABLE customers (id INT, is_vegetarian BOOLEAN); INSERT INTO customers (id, is_vegetarian) VALUES (1001, true), (1002, false);", + "sql": "SELECT AVG(meals.calories) FROM meals INNER JOIN customers ON meals.customer_id \u003d customers.id WHERE customers.is_vegetarian \u003d true;", + "sql_explanation": "1. Inner join the meals and customers table on customer_id. 2. Filter for vegetarian customers (is_vegetarian \u003d true). 3. Calculate the average (AVG) of the calories column." +}, { + "id": "2733", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average protein content for vegan products?", + "sql_context": "CREATE TABLE nutritional_data (id INT, product_id INT, calories INT, protein INT, fat INT); INSERT INTO nutritional_data (id, product_id, calories, protein, fat) VALUES (1, 1, 100, 5, 0); INSERT INTO nutritional_data (id, product_id, calories, protein, fat) VALUES (2, 2, 150, 15, 8); INSERT INTO products (id, name, category) VALUES (1, \u0027Tofu\u0027, \u0027Vegan\u0027); INSERT INTO products (id, name, category) VALUES (2, \u0027Almonds\u0027, \u0027Vegetarian\u0027);", + "sql": "SELECT AVG(nd.protein) FROM nutritional_data nd JOIN products p ON nd.product_id \u003d p.id WHERE p.category \u003d \u0027Vegan\u0027;", + "sql_explanation": "This query joins the nutritional_data and products tables on the product_id column, then filters for rows where the category is Vegan. It calculates the average of the protein column for the filtered rows." +}, { + "id": "274", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find dishes that contain both tomatoes and onions as ingredients.", + "sql_context": "CREATE TABLE ingredients (id INT, name VARCHAR(255)); INSERT INTO ingredients (id, name) VALUES (1, \u0027Tomatoes\u0027), (2, \u0027Onions\u0027), (3, \u0027Garlic\u0027), (4, \u0027Cheese\u0027), (5, \u0027Tofu\u0027), (6, \u0027Chicken\u0027), (7, \u0027Beef\u0027), (8, \u0027Tomato Sauce\u0027), (9, \u0027Onion Rings\u0027); CREATE TABLE dish_ingredients (dish_id INT, ingredient_id INT); INSERT INTO dish_ingredients (dish_id, ingredient_id) VALUES (1, 1), (1, 2), (1, 3), (1, 8), (2, 2), (2, 3), (2, 4), (3, 1), (3, 2), (3, 6), (4, 2), (4, 9), (5, 3), (5, 4), (5, 7);", + "sql": "SELECT dish_ingredients.dish_id FROM dish_ingredients INNER JOIN ingredients ON dish_ingredients.ingredient_id \u003d ingredients.id WHERE ingredients.name IN (\u0027Tomatoes\u0027, \u0027Onions\u0027) GROUP BY dish_ingredients.dish_id HAVING COUNT(DISTINCT ingredients.name) \u003d 2;", + "sql_explanation": "The SQL query joins the dish_ingredients and ingredients tables based on their respective IDs, filters for dishes that contain either tomatoes or onions as ingredients, and then groups the results by dish ID. The COUNT function is used to count the total number of distinct ingredients used in each dish. The query then filters for dishes that contain both tomatoes and onions by using the HAVING clause and the condition COUNT(DISTINCT ingredients.name) \u003d 2." +}, { + "id": "294", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many times has each dish been ordered in the past week, broken down by customer demographics?", + "sql_context": "CREATE TABLE orders (dish VARCHAR(255), customer_id INT, order_date DATE); CREATE TABLE customer (customer_id INT, age INT, gender VARCHAR(50), location VARCHAR(50)); INSERT INTO orders (dish, customer_id, order_date) VALUES (\u0027Pizza\u0027, 1, \u00272021-09-01\u0027), (\u0027Pizza\u0027, 2, \u00272021-09-02\u0027), (\u0027Pasta\u0027, 3, \u00272021-09-01\u0027); INSERT INTO customer (customer_id, age, gender, location) VALUES (1, 25, \u0027Female\u0027, \u0027New York\u0027), (2, 35, \u0027Male\u0027, \u0027Los Angeles\u0027), (3, 45, \u0027Female\u0027, \u0027Chicago\u0027);", + "sql": "SELECT c.age, c.gender, c.location, o.dish, COUNT(o.customer_id) AS order_count FROM orders o JOIN customer c ON o.customer_id \u003d c.customer_id WHERE o.order_date \u003e\u003d DATE_SUB(CURRENT_DATE(), INTERVAL 7 DAY) GROUP BY o.dish, c.age, c.gender, c.location;", + "sql_explanation": "This SQL query calculates how many times each dish has been ordered in the past week, broken down by customer demographics. It joins the orders and customer tables using the customer_id as the key, and then groups the results by dish and customer demographics. The query counts the number of orders for each dish and demographic combination." +}, { + "id": "735", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of expired ingredients for each supplier?", + "sql_context": "CREATE TABLE Ingredients (IngredientID int, IngredientName varchar(50), Quantity int, ExpirationDate date, SupplierID int); INSERT INTO Ingredients (IngredientID, IngredientName, Quantity, ExpirationDate, SupplierID) VALUES (1, \u0027Lettuce\u0027, 50, \u00272023-01-01\u0027, 1), (2, \u0027Tomatoes\u0027, 75, \u00272023-01-15\u0027, 1), (3, \u0027Chicken\u0027, 100, \u00272023-02-01\u0027, 2), (4, \u0027Beef\u0027, 120, \u00272023-02-15\u0027, 2); INSERT INTO Suppliers (SupplierID, SupplierName) VALUES (1, \u0027Supplier A\u0027), (2, \u0027Supplier B\u0027);", + "sql": "SELECT S.SupplierName, SUM(I.Quantity) as TotalExpiredIngredients FROM Ingredients I INNER JOIN Suppliers S ON I.SupplierID \u003d S.SupplierID WHERE I.ExpirationDate \u003c CURDATE() GROUP BY S.SupplierName;", + "sql_explanation": "This SQL query calculates the total quantity of expired ingredients for each supplier by joining the Ingredients table and Suppliers table on the SupplierID column. It then filters the Ingredients table to only include ingredients that have expired and groups the results by SupplierName." +}, { + "id": "869", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of each menu item sold?", + "sql_context": "CREATE TABLE SalesDetails (SaleID INT, MenuItemID INT, Quantity INT, SaleDate DATETIME); INSERT INTO SalesDetails (SaleID, MenuItemID, Quantity, SaleDate) VALUES (4, 5, 2, \u00272022-01-04 16:00:00\u0027); INSERT INTO SalesDetails (SaleID, MenuItemID, Quantity, SaleDate) VALUES (5, 6, 1, \u00272022-01-05 18:00:00\u0027); INSERT INTO SalesDetails (SaleID, MenuItemID, Quantity, SaleDate) VALUES (6, 5, 3, \u00272022-01-06 20:00:00\u0027);", + "sql": "SELECT MenuItems.MenuItemName, SUM(SalesDetails.Quantity) as TotalQuantity FROM MenuItems JOIN SalesDetails ON MenuItems.MenuItemID \u003d SalesDetails.MenuItemID GROUP BY MenuItems.MenuItemName", + "sql_explanation": "This query calculates the total quantity of each menu item sold in the SalesDetails table by joining with the MenuItems table, grouping by the MenuItemName column, and summing the Quantity column." +}, { + "id": "937", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many times has each dish been ordered, excluding dishes sold more than 60 days ago?", + "sql_context": "CREATE TABLE dishes (dish_id INT, dish_name VARCHAR(255), last_sold DATE); CREATE TABLE orders (order_id INT, dish_id INT, quantity INT, order_date DATE); INSERT INTO dishes VALUES (1, \u0027Pizza Margherita\u0027, \u00272022-04-15\u0027); INSERT INTO dishes VALUES (2, \u0027Chole Bhature\u0027, \u00272022-04-20\u0027); INSERT INTO orders VALUES (1, 1, 5, \u00272022-04-15\u0027); INSERT INTO orders VALUES (2, 2, 3, \u00272022-04-20\u0027);", + "sql": "SELECT d.dish_name, SUM(o.quantity) as total_orders FROM dishes d JOIN orders o ON d.dish_id \u003d o.dish_id WHERE last_sold \u003e DATE_SUB(CURRENT_DATE, INTERVAL 60 DAY) GROUP BY d.dish_name;", + "sql_explanation": "This query performs an inner join between the dishes and orders tables on the dish_id column. It then filters for dishes sold within the past 60 days using the WHERE clause and the DATE_SUB and CURRENT_DATE functions. It then groups the results by dish_name and calculates the total orders for each dish using the SUM function." +}, { + "id": "997", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many unique customers ordered dishes in the breakfast category in the last week?", + "sql_context": "CREATE TABLE customer (customer_id INT, name VARCHAR(50), zip VARCHAR(10));CREATE TABLE orders (order_id INT, customer_id INT, dish VARCHAR(50), price DECIMAL(5,2), order_date TIMESTAMP);", + "sql": "SELECT COUNT(DISTINCT o.customer_id) as unique_customers FROM orders o JOIN dishes d ON o.dish \u003d d.dish WHERE d.category \u003d \u0027breakfast\u0027 AND o.order_date \u003e\u003d NOW() - INTERVAL \u00271 week\u0027;", + "sql_explanation": "The SQL query joins the orders and dishes table on the dish, filters the data for breakfast dishes ordered in the last week, calculates the number of unique customers, and provides the number of unique customers who ordered dishes in the breakfast category in the last week." +}, { + "id": "1022", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average inventory quantity for each menu category, from the inventory_fact table, grouped by menu_category.", + "sql_context": "CREATE TABLE supplier_dim (supplier_id INT, supplier_name VARCHAR, supplier_country VARCHAR);", + "sql": "SELECT m.menu_category, AVG(i.inventory_quantity) as avg_inventory_quantity FROM inventory_fact i JOIN menu_item_dim m ON i.menu_item_id \u003d m.menu_item_id GROUP BY m.menu_category;", + "sql_explanation": "The SQL query joins the inventory_fact table with the menu_item_dim table using menu_item_id, then calculates the average inventory quantity for each menu category, grouped by menu_category." +}, { + "id": "1118", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which menu items have not been ordered in the last month?", + "sql_context": "CREATE TABLE menus (id INT, name VARCHAR(255), category VARCHAR(255), price DECIMAL(10,2), PRIMARY KEY(id)); CREATE TABLE orders (id INT, menu_id INT, order_date DATE, PRIMARY KEY(id), FOREIGN KEY (menu_id) REFERENCES menus(id));", + "sql": "SELECT menus.name FROM menus LEFT JOIN orders ON menus.id \u003d orders.menu_id WHERE orders.id IS NULL AND order_date BETWEEN DATE_SUB(CURDATE(), INTERVAL 1 MONTH) AND CURDATE();", + "sql_explanation": "Using a LEFT JOIN to include all menu items, this query returns those items with no matching order in the past month." +}, { + "id": "1237", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 5 most frequently ordered dishes across all locations.", + "sql_context": "CREATE TABLE orders (order_id INT, location_id INT, item_id INT, quantity INT, date DATE); CREATE VIEW item_summary AS SELECT item_id, SUM(quantity) as total_quantity FROM orders GROUP BY item_id;", + "sql": "SELECT item_id, total_quantity FROM item_summary JOIN menu ON menu.item_id \u003d item_summary.item_id WHERE menu.category IN (\u0027entree\u0027) ORDER BY total_quantity DESC LIMIT 5;", + "sql_explanation": "This query first creates a view called item_summary that calculates the total quantity of each item ordered. Then it joins this view with the menu table to get the item name, and filters for entree items only. Finally, it orders the results by the total quantity in descending order and limits it to the top 5 results." +}, { + "id": "1518", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the total cost of inventory for non-dairy items.", + "sql_context": "CREATE TABLE inventory(inventory_id INT, item_id INT, quantity INT, cost_price DECIMAL); CREATE TABLE menu_items(menu_item_id INT, name TEXT, type TEXT, is_dairy BOOLEAN, price DECIMAL);", + "sql": "SELECT SUM(inventory.quantity * inventory.cost_price) FROM inventory JOIN menu_items ON inventory.item_id \u003d menu_items.menu_item_id WHERE is_dairy \u003d FALSE;", + "sql_explanation": "This query determines the total cost of inventory for non-dairy items by performing a join between the \u0027inventory\u0027 and \u0027menu_items\u0027 tables and filtering for non-dairy menu items. The \u0027SUM\u0027 function is used to calculate the total cost of inventory for non-dairy items." +}, { + "id": "1545", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the dishes that have not been sold in the last 60 days", + "sql_context": "CREATE TABLE sales_data (sale_id INT, dish_id INT, sale_date DATE); INSERT INTO sales_data (sale_id, dish_id, sale_date) VALUES (1, 1, \u00272022-01-01\u0027), (2, 2, \u00272022-01-05\u0027), (3, 1, \u00272022-01-10\u0027), (4, 3, \u00272022-01-15\u0027); CREATE TABLE menu (dish_id INT, dish_name VARCHAR(255), dish_type VARCHAR(255)); INSERT INTO menu (dish_id, dish_name, dish_type) VALUES (1, \u0027Quinoa Salad\u0027, \u0027Vegetarian\u0027), (2, \u0027Chicken Sandwich\u0027, \u0027Non-Vegetarian\u0027), (3, \u0027Pumpkin Soup\u0027, \u0027Vegetarian\u0027), (4, \u0027Beef Stew\u0027, \u0027Non-Vegetarian\u0027);", + "sql": "SELECT m.dish_id, m.dish_name FROM menu m LEFT JOIN sales_data s ON m.dish_id \u003d s.dish_id WHERE s.sale_date \u003c DATE_SUB(CURDATE(), INTERVAL 60 DAY) IS NULL;", + "sql_explanation": "The query uses a LEFT JOIN to combine the sales_data and menu tables and checks for NULL values in the sales_date column to find dishes that have not been sold in the last 60 days." +}, { + "id": "1872", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of dishes sold by category in branch 7?", + "sql_context": "CREATE TABLE Branches (branch_id INT, branch_name VARCHAR(255));CREATE TABLE Menu (dish_name VARCHAR(255), branch_id INT, dish_type VARCHAR(255), price DECIMAL(5,2));CREATE TABLE Sales (sale_date DATE, dish_name VARCHAR(255), quantity INT);", + "sql": "SELECT dish_type, SUM(quantity) as total_sales FROM Sales JOIN Menu ON Sales.dish_name \u003d Menu.dish_name WHERE branch_id \u003d 7 GROUP BY dish_type;", + "sql_explanation": "The SQL query joins the Sales and Menu tables on dish_name and filters for sales in branch 7. It then groups the data by dish_type and calculates the total quantity of dishes sold by category." +}, { + "id": "1967", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all suppliers providing ingredients for a vegan-only menu.", + "sql_context": "CREATE TABLE Ingredients (IngredientID INT, Name TEXT, IsVegan BOOLEAN, SupplierID INT); CREATE TABLE Suppliers (SupplierID INT, Name TEXT);", + "sql": "SELECT DISTINCT Suppliers.Name FROM Suppliers INNER JOIN Ingredients ON Suppliers.SupplierID \u003d Ingredients.SupplierID WHERE IsVegan \u003d TRUE;", + "sql_explanation": "This query lists all suppliers providing ingredients for a vegan-only menu by joining the \u0027Suppliers\u0027 and \u0027Ingredients\u0027 tables and filtering for vegan ingredients. The \u0027DISTINCT\u0027 keyword ensures no duplicate supplier names are returned." +}, { + "id": "3272", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of dishes with the \u0027vegan\u0027 tag?", + "sql_context": "CREATE TABLE dishes (id INT, name TEXT, tags TEXT); CREATE TABLE orders (id INT, dish_id INT, quantity INT); INSERT INTO dishes (id, name, tags) VALUES (1, \u0027Quinoa Salad\u0027, \u0027vegan\u0027), (2, \u0027Chickpea Curry\u0027, \u0027vegan\u0027), (3, \u0027Beef Burger\u0027, \u0027none\u0027), (4, \u0027Spicy Chicken Sandwich\u0027, \u0027none\u0027); INSERT INTO orders (id, dish_id, quantity) VALUES (1, 1, 10), (2, 2, 8), (3, 3, 5), (4, 1, 7), (5, 2, 9), (6, 4, 12);", + "sql": "SELECT SUM(quantity) FROM orders JOIN dishes ON dishes.id \u003d orders.dish_id WHERE tags LIKE \u0027%vegan%\u0027;", + "sql_explanation": "This query calculates the total quantity of dishes with the \u0027vegan\u0027 tag by summing the quantity from the orders table where the dish is tagged as vegan in the dishes table." +}, { + "id": "217", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which employees have completed advanced training?", + "sql_context": "CREATE TABLE employees (id INT PRIMARY KEY, name TEXT, department TEXT, role TEXT, training_completion TEXT); CREATE TABLE workforce_development (id INT PRIMARY KEY, employee_id INT, training_completed TEXT, FOREIGN KEY (employee_id) REFERENCES employees(id));", + "sql": "SELECT employees.name, employees.department, employees.role, workforce_development.training_completed FROM employees INNER JOIN workforce_development ON employees.id \u003d workforce_development.employee_id WHERE workforce_development.training_completed LIKE \u0027%advanced%\u0027;", + "sql_explanation": "This query performs an inner join between the employees and workforce_development tables, joining on the id and employee_id columns, respectively. It returns employees who have completed advanced training." +}, { + "id": "533", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the names and locations of all factories that have implemented circular economy practices, along with the number of employees in each.", + "sql_context": "CREATE TABLE factories (id INT, name VARCHAR(50), location VARCHAR(50), industry VARCHAR(50)); CREATE TABLE circular_economy_practices (factory_id INT, practice VARCHAR(50), employees INT); INSERT INTO factories (id, name, location, industry) VALUES (1, \u0027Factory One\u0027, \u0027New York\u0027, \u0027Textile\u0027); INSERT INTO circular_economy_practices (factory_id, practice, employees) VALUES (1, \u0027Recycling\u0027, 200); INSERT INTO circular_economy_practices (factory_id, practice, employees) VALUES (1, \u0027Upcycling\u0027, 150);", + "sql": "SELECT factories.name, factories.location, circular_economy_practices.employees FROM factories INNER JOIN circular_economy_practices ON factories.id \u003d circular_economy_practices.factory_id WHERE industry \u003d \u0027Textile\u0027;", + "sql_explanation": "Joins the factories table with the circular economy practices table to list the names and locations of all factories in the textile industry that have implemented circular economy practices, along with the number of employees in each." +}, { + "id": "688", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the names and locations of factories that have not implemented any circular economy practices?", + "sql_context": "CREATE TABLE factories (id INT, name VARCHAR(50), location VARCHAR(50), industry VARCHAR(50)); CREATE TABLE circular_economy_practices (factory_id INT, practice VARCHAR(50), employees INT); INSERT INTO factories (id, name, location, industry) VALUES (1, \u0027Factory One\u0027, \u0027New York\u0027, \u0027Textile\u0027); INSERT INTO factories (id, name, location, industry) VALUES (2, \u0027Factory Two\u0027, \u0027Los Angeles\u0027, \u0027Textile\u0027);", + "sql": "SELECT factories.name, factories.location FROM factories LEFT JOIN circular_economy_practices ON factories.id \u003d circular_economy_practices.factory_id WHERE circular_economy_practices.factory_id IS NULL;", + "sql_explanation": "Joins the factories table with the circular economy practices table to list the names and locations of factories that have not implemented any circular economy practices." +}, { + "id": "749", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the names and locations of all suppliers who have supplied to a factory with a diversity score above 90.", + "sql_context": "CREATE TABLE suppliers (supplier_id INT, name TEXT, factories TEXT); CREATE TABLE factories (factory_id INT, name TEXT, location TEXT, diversity_score FLOAT);", + "sql": "SELECT DISTINCT suppliers.name, suppliers.location FROM suppliers INNER JOIN factories ON factories.factory_id \u003d ANY(string_to_array(suppliers.factories, \u0027,\u0027)) WHERE factories.diversity_score \u003e 90;", + "sql_explanation": "This SQL query lists the names and locations of all suppliers who have supplied to a factory with a diversity score above 90. It uses an inner join to combine the suppliers and factories tables, based on the factory_id in the factories table and the factories column in the suppliers table. The WHERE clause is used to filter the results to only include records where the diversity score is above 90. The DISTINCT keyword is used to ensure that each supplier is only listed once in the results, even if they have supplied to multiple factories with a diversity score above 90." +}, { + "id": "790", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average time taken to resolve quality issues for each product category?", + "sql_context": "CREATE TABLE products(id INT, product_name TEXT, category TEXT, resolution_time TIMESTAMP);CREATE TABLE categories(id INT, category TEXT);INSERT INTO products(id, product_name, category, resolution_time) VALUES (1, \u0027Product 8\u0027, \u0027Category A\u0027, \u00272021-06-01 10:30:00\u0027), (2, \u0027Product 9\u0027, \u0027Category B\u0027, \u00272021-07-02 15:45:00\u0027), (3, \u0027Product 10\u0027, \u0027Category A\u0027, \u00272021-08-03 09:15:00\u0027), (4, \u0027Product 11\u0027, \u0027Category C\u0027, \u00272021-09-04 16:00:00\u0027); INSERT INTO categories(id, category) VALUES (1, \u0027Category A\u0027), (2, \u0027Category B\u0027), (3, \u0027Category C\u0027);", + "sql": "SELECT category, AVG(TIMESTAMPDIFF(MINUTE, resolution_time, CURRENT_TIMESTAMP)) as avg_resolution_time FROM products JOIN categories ON products.category \u003d categories.category GROUP BY category;", + "sql_explanation": "The query calculates the difference between resolution_time and the current time for each product, and then groups the result by category to calculate the average time taken to resolve quality issues for each product category." +}, { + "id": "917", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of employees working in factories that have a production output above 5000 units and are located in the United States?", + "sql_context": "CREATE TABLE factories (factory_id INT, name VARCHAR(100), location VARCHAR(100), production_output INT); CREATE TABLE employees (employee_id INT, factory_id INT, name VARCHAR(100), position VARCHAR(100)); INSERT INTO factories (factory_id, name, location, production_output) VALUES (1, \u0027ABC Factory\u0027, \u0027New York\u0027, 5500), (2, \u0027XYZ Factory\u0027, \u0027California\u0027, 4000), (3, \u0027LMN Factory\u0027, \u0027Texas\u0027, 6000); INSERT INTO employees (employee_id, factory_id, name, position) VALUES (1, 1, \u0027John Doe\u0027, \u0027Engineer\u0027), (2, 1, \u0027Jane Smith\u0027, \u0027Manager\u0027), (3, 2, \u0027Mike Johnson\u0027, \u0027Operator\u0027), (4, 3, \u0027Sara Brown\u0027, \u0027Engineer\u0027);", + "sql": "SELECT COUNT(*) FROM factories INNER JOIN employees ON factories.factory_id \u003d employees.factory_id WHERE factories.production_output \u003e 5000 AND factories.location LIKE \u0027%United States%\u0027;", + "sql_explanation": "This SQL query first performs an inner join between the factories and employees tables on the factory_id column. Then, it filters the rows where production_output is greater than 5000 and the location contains the string \u0027United States\u0027. Finally, it counts the number of rows in the result and returns the total." +}, { + "id": "924", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of machines in each factory, including factories with no machines.", + "sql_context": "CREATE TABLE factories (id INT, name VARCHAR(255));CREATE TABLE factory_machines (factory_id INT, machine_id INT);", + "sql": "SELECT factories.name, COUNT(factory_machines.machine_id) AS machine_count FROM factories LEFT JOIN factory_machines ON factories.id \u003d factory_machines.factory_id GROUP BY factories.id;", + "sql_explanation": "The SQL query shows the number of machines in each factory, including factories with no machines by using a left join to join the \u0027factories\u0027 and \u0027factory_machines\u0027 tables on the factory_id column. It then groups the results by factory and calculates the number of machines for each factory, including factories with no machines." +}, { + "id": "2058", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the suppliers that have a sustainability rating of 5 for the company\u0027s manufacturing process.", + "sql_context": "CREATE TABLE suppliers (supplier_id INT, supplier_name VARCHAR(255), sustainability_rating INT); INSERT INTO suppliers (supplier_id, supplier_name, sustainability_rating) VALUES (1, \u0027Supplier A\u0027, 5), (2, \u0027Supplier B\u0027, 3), (3, \u0027Supplier C\u0027, 5), (4, \u0027Supplier D\u0027, 2); CREATE TABLE manufacturing_process (process_id INT, process_name VARCHAR(255), supplier_id INT); INSERT INTO manufacturing_process (process_id, process_name, supplier_id) VALUES (1, \u0027Plastic Molding\u0027, 1), (2, \u0027Metal Stamping\u0027, 2), (3, \u0027Woodworking\u0027, 3), (4, \u0027Assembly\u0027, 4);", + "sql": "SELECT supplier_name FROM suppliers s JOIN manufacturing_process mp ON s.supplier_id \u003d mp.supplier_id WHERE s.sustainability_rating \u003d 5;", + "sql_explanation": "The SQL query joins the suppliers and manufacturing_process tables to get the necessary data. It filters for suppliers with a sustainability rating of 5 and lists their names." +}, { + "id": "2111", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which suppliers provide raw materials to factories in the electronics industry?", + "sql_context": "CREATE TABLE suppliers (id INT, supplier_name VARCHAR(100), industry VARCHAR(50), contact VARCHAR(50)); CREATE TABLE electronics_factories (id INT, factory_name VARCHAR(100), location VARCHAR(50), supplier_id INT); INSERT INTO suppliers (id, supplier_name, industry, contact) VALUES (1, \u0027Global Resources\u0027, \u0027Electronics\u0027, \u0027contact1@globalresources.com\u0027); INSERT INTO electronics_factories (id, factory_name, location, supplier_id) VALUES (1, \u0027ElectroTech\u0027, \u0027USA\u0027, 1);", + "sql": "SELECT s.supplier_name FROM suppliers s INNER JOIN electronics_factories ef ON s.id \u003d ef.supplier_id WHERE s.industry \u003d \u0027Electronics\u0027;", + "sql_explanation": "This query finds the suppliers that provide raw materials to factories in the electronics industry. It performs an inner join between the suppliers table and the electronics_factories table using the supplier_id, and filters the results to only include suppliers in the electronics industry." +}, { + "id": "3141", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many workers are employed in each factory?", + "sql_context": "CREATE TABLE factories (id INT, name VARCHAR(255)); CREATE TABLE workers (id INT, factory_id INT, department VARCHAR(255)); INSERT INTO factories (id, name) VALUES (1, \u0027Factory A\u0027), (2, \u0027Factory B\u0027), (3, \u0027Factory C\u0027); INSERT INTO workers (id, factory_id, department) VALUES (1, 1, \u0027Production\u0027), (2, 1, \u0027Engineering\u0027), (3, 2, \u0027Production\u0027), (4, 2, \u0027Management\u0027), (5, 3, \u0027Production\u0027);", + "sql": "SELECT f.name, COUNT(w.id) FROM factories f INNER JOIN workers w ON f.id \u003d w.factory_id GROUP BY f.name;", + "sql_explanation": "This query joins the factories and workers tables on the factory_id column, and then calculates the number of workers employed in each factory." +}, { + "id": "3189", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of machines in the \u0027machinery\u0027 table, including any machines that also appear in the \u0027maintenance\u0027 table?", + "sql_context": "CREATE TABLE machinery (machine_id INT, manufacturer VARCHAR(20)); CREATE TABLE maintenance (machine_id INT); INSERT INTO machinery (machine_id, manufacturer) VALUES (1, \u0027ABC Corp\u0027), (2, \u0027XYZ Inc\u0027), (3, \u0027ABC Corp\u0027); INSERT INTO maintenance (machine_id) VALUES (1), (3);", + "sql": "SELECT COUNT(*) FROM machinery INNER JOIN maintenance ON machinery.machine_id \u003d maintenance.machine_id;", + "sql_explanation": "The SQL query performs an inner join between the \u0027machinery\u0027 and \u0027maintenance\u0027 tables on the \u0027machine_id\u0027 column. It then counts the number of records that meet these criteria." +}, { + "id": "48", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the most common mental health conditions in a specific age group?", + "sql_context": "CREATE TABLE Patients (PatientID INT, Age INT, Gender VARCHAR(10)); CREATE TABLE MentalHealthConditions (ConditionID INT, PatientID INT, Condition VARCHAR(50));", + "sql": "SELECT Patients.Age, MentalHealthConditions.Condition, COUNT(MentalHealthConditions.ConditionID) FROM Patients INNER JOIN MentalHealthConditions ON Patients.PatientID \u003d MentalHealthConditions.PatientID WHERE Patients.Age BETWEEN 20 AND 30 GROUP BY Patients.Age, MentalHealthConditions.Condition ORDER BY Patients.Age, COUNT(MentalHealthConditions.ConditionID) DESC;", + "sql_explanation": "The SQL query joins the Patients table and the MentalHealthConditions table on the PatientID field, which is a common field in both tables. This allows us to count the number of mental health conditions for each age group between 20 and 30. The results are then ordered by age and the number of mental health conditions in descending order." +}, { + "id": "235", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of patients with depression or anxiety in Colombia?", + "sql_context": "CREATE TABLE patients (patient_id INT, condition VARCHAR(50)); INSERT INTO patients (patient_id, condition) VALUES (1, \u0027Depression\u0027), (2, \u0027Anxiety\u0027), (3, \u0027Bipolar Disorder\u0027), (4, \u0027Depression\u0027), (5, \u0027Colombia\u0027); CREATE TABLE conditions (condition_id INT, condition VARCHAR(50)); INSERT INTO conditions (condition_id, condition) VALUES (1, \u0027Depression\u0027), (2, \u0027Anxiety\u0027), (3, \u0027Bipolar Disorder\u0027);", + "sql": "SELECT COUNT(DISTINCT patients.patient_id) AS total_patients FROM patients INNER JOIN conditions ON patients.condition \u003d conditions.condition WHERE conditions.condition IN (\u0027Depression\u0027, \u0027Anxiety\u0027) AND patients.condition IS NOT NULL AND patients.condition \u003c\u003e \u0027\u0027;", + "sql_explanation": "The SQL query calculates the total number of patients with depression or anxiety in Colombia by joining the patients and conditions table, filtering based on the condition that the condition is either depression or anxiety, and then counting the distinct number of patients." +}, { + "id": "313", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many depression patients are there in each age group?", + "sql_context": "CREATE TABLE age_groups (age_group_id INT, age_group_name VARCHAR(50), lower_limit INT, upper_limit INT); INSERT INTO age_groups (age_group_id, age_group_name, lower_limit, upper_limit) VALUES (1, \u002718-30\u0027, 18, 30);", + "sql": "SELECT age_groups.age_group_name, COUNT(patients.patient_id) FROM patients INNER JOIN age_groups ON patients.age BETWEEN age_groups.lower_limit AND age_groups.upper_limit WHERE patients.diagnosis \u003d \u0027Depression\u0027 GROUP BY age_groups.age_group_name;", + "sql_explanation": "This SQL query generates a report on the number of depression patients in each age group by using the COUNT function on the patient_id column, joined by the age and age_group columns, filtered by the diagnosis column with the value \u0027Depression\u0027, and grouped by the age_group_name column." +}, { + "id": "510", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of patients who have been treated with CBT for depression in New York?", + "sql_context": "CREATE TABLE patients (patient_id INT, patient_age INT, patient_name TEXT, condition TEXT, therapist_id INT, treatment TEXT); INSERT INTO patients (patient_id, patient_age, patient_name, condition, therapist_id, treatment) VALUES (1, 35, \u0027John Smith\u0027, \u0027Depression\u0027, 1, \u0027CBT\u0027); INSERT INTO patients (patient_id, patient_age, patient_name, condition, therapist_id, treatment) VALUES (2, 40, \u0027Jane Doe\u0027, \u0027Depression\u0027, 1, \u0027Medication\u0027); CREATE TABLE therapists (therapist_id INT, therapist_name TEXT, state TEXT); INSERT INTO therapists (therapist_id, therapist_name, state) VALUES (1, \u0027Dr. Maria Rodriguez\u0027, \u0027New York\u0027);", + "sql": "SELECT AVG(patients.patient_age) FROM patients JOIN therapists ON patients.therapist_id \u003d therapists.therapist_id WHERE patients.condition \u003d \u0027Depression\u0027 AND patients.treatment \u003d \u0027CBT\u0027 AND therapists.state \u003d \u0027New York\u0027;", + "sql_explanation": "The SQL query calculates the average age of patients who have been treated with CBT for depression in New York. It first joins the patients table with the therapists table on the therapist_id column. Then, it filters the records where the condition column in the patients table is \u0027Depression\u0027, the treatment column in the patients table is \u0027CBT\u0027, and the state column in the therapists table is \u0027New York\u0027. Finally, it selects the average of the patient_age column." +}, { + "id": "529", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Rank the mental health conditions by the number of patients treated.", + "sql_context": "CREATE TABLE conditions (condition_id INT, condition VARCHAR(50)); INSERT INTO conditions (condition_id, condition) VALUES (1, \u0027Depression\u0027), (2, \u0027Anxiety\u0027), (3, \u0027Bipolar Disorder\u0027); CREATE TABLE patients (patient_id INT, condition_id INT); INSERT INTO patients (patient_id, condition_id) VALUES (1, 1), (2, 2), (3, 2), (4, 3), (5, 1);", + "sql": "SELECT conditions.condition, ROW_NUMBER() OVER(ORDER BY COUNT(patients.condition_id) DESC) AS rank FROM conditions INNER JOIN patients ON conditions.condition_id \u003d patients.condition_id GROUP BY conditions.condition;", + "sql_explanation": "The SQL query ranks the mental health conditions by the number of patients treated by grouping the patients table based on the condition_id column and then calculating the row number for each condition based on the count of patients treated." +}, { + "id": "595", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of patients with a specific mental health condition?", + "sql_context": "CREATE TABLE Patients (PatientID INT, Age INT, Gender VARCHAR(10), ConditionID INT); CREATE TABLE MentalHealthConditions (ConditionID INT, Condition VARCHAR(50));", + "sql": "SELECT MentalHealthConditions.Condition, AVG(Patients.Age) FROM Patients INNER JOIN MentalHealthConditions ON Patients.ConditionID \u003d MentalHealthConditions.ConditionID GROUP BY MentalHealthConditions.Condition;", + "sql_explanation": "The SQL query joins the Patients table and the MentalHealthConditions table on the ConditionID field, which is a common field in both tables. This allows us to calculate the average age of patients for each mental health condition." +}, { + "id": "624", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the therapists in Germany that provide interpersonal psychotherapy (IPT)?", + "sql_context": "CREATE TABLE mental_health.therapists (therapist_id INT, first_name VARCHAR(50), last_name VARCHAR(50), gender VARCHAR(50), country VARCHAR(50)); CREATE TABLE mental_health.treatments (treatment_id INT, patient_id INT, therapist_id INT, treatment_type VARCHAR(50), country VARCHAR(50)); INSERT INTO mental_health.therapists (therapist_id, first_name, last_name, gender, country) VALUES (7, \u0027Anna\u0027, \u0027Schmidt\u0027, \u0027Female\u0027, \u0027Germany\u0027); INSERT INTO mental_health.treatments (treatment_id, patient_id, therapist_id, treatment_type, country) VALUES (8, 1005, 7, \u0027IPT\u0027, \u0027Germany\u0027);", + "sql": "SELECT t.therapist_id, t.first_name, t.last_name FROM mental_health.therapists t JOIN mental_health.treatments tr ON t.therapist_id \u003d tr.therapist_id WHERE tr.treatment_type \u003d \u0027IPT\u0027 AND t.country \u003d \u0027Germany\u0027;", + "sql_explanation": "This query joins the therapists and treatments tables, filters for IPT type treatments in Germany, and retrieves the therapist information." +}, { + "id": "811", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many patients have been treated for anxiety in Hawaii?", + "sql_context": "CREATE TABLE condition_records (patient_id INT, condition VARCHAR(50)); INSERT INTO condition_records (patient_id, condition) VALUES (1, \u0027Depression\u0027), (2, \u0027Anxiety\u0027), (3, \u0027PTSD\u0027), (4, \u0027Anxiety\u0027), (5, \u0027Anxiety\u0027), (6, \u0027Bipolar Disorder\u0027), (7, \u0027Depression\u0027), (8, \u0027Anxiety\u0027); CREATE TABLE patient_location (patient_id INT, location VARCHAR(50)); INSERT INTO patient_location (patient_id, location) VALUES (1, \u0027California\u0027), (2, \u0027Hawaii\u0027), (3, \u0027Texas\u0027), (4, \u0027Hawaii\u0027), (5, \u0027Hawaii\u0027), (6, \u0027Florida\u0027), (7, \u0027California\u0027), (8, \u0027Hawaii\u0027);", + "sql": "SELECT COUNT(DISTINCT patient_id) FROM condition_records JOIN patient_location ON condition_records.patient_id \u003d patient_location.patient_id WHERE condition \u003d \u0027Anxiety\u0027 AND location \u003d \u0027Hawaii\u0027;", + "sql_explanation": "This query joins the condition_records table with the patient_location table, filters for patients treated for anxiety in Hawaii, and calculates the number of unique patients treated for anxiety in Hawaii." +}, { + "id": "1062", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of patients who have received psychotherapy in California?", + "sql_context": "CREATE TABLE patients (id INT, name TEXT, age INT, state TEXT);CREATE TABLE treatments (id INT, patient_id INT, therapy TEXT);INSERT INTO patients (id, name, age, state) VALUES (1, \u0027John Doe\u0027, 45, \u0027California\u0027);INSERT INTO treatments (id, patient_id, therapy) VALUES (1, 1, \u0027Psychotherapy\u0027);", + "sql": "SELECT AVG(patients.age) FROM patients INNER JOIN treatments ON patients.id \u003d treatments.patient_id WHERE patients.state \u003d \u0027California\u0027 AND treatments.therapy \u003d \u0027Psychotherapy\u0027;", + "sql_explanation": "This query calculates the average age of patients who received psychotherapy treatment in California by joining the patients and treatments tables on the id and patient_id columns, respectively, and filtering for patients from California who received psychotherapy treatment." +}, { + "id": "1249", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum age of patients diagnosed with depression in Japan?", + "sql_context": "CREATE TABLE patients (id INT, age INT, gender TEXT, country TEXT); CREATE TABLE diagnoses (id INT, patient_id INT, disorder_name TEXT); INSERT INTO diagnoses (id, patient_id, disorder_name) VALUES (1, 101, \u0027Depression\u0027);", + "sql": "SELECT MIN(patients.age) FROM patients JOIN diagnoses ON patients.id \u003d diagnoses.patient_id WHERE diagnoses.disorder_name \u003d \u0027Depression\u0027 AND patients.country \u003d \u0027Japan\u0027;", + "sql_explanation": "We calculate the minimum age of patients in Japan diagnosed with depression. We join the patients and diagnoses tables on the patient_id field. Then, we filter for patients diagnosed with depression in Japan. Lastly, we calculate the minimum age of these patients." +}, { + "id": "1277", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of patients in India who received group therapy?", + "sql_context": "CREATE TABLE patients (id INT, country VARCHAR(20)); INSERT INTO patients (id, country) VALUES (1, \u0027India\u0027), (2, \u0027USA\u0027); CREATE TABLE treatments (id INT, patient_id INT, treatment VARCHAR(20)); INSERT INTO treatments (id, patient_id, treatment) VALUES (1, 1, \u0027Group Therapy\u0027), (2, 2, \u0027Individual Therapy\u0027);", + "sql": "SELECT COUNT(*) FROM patients INNER JOIN treatments ON patients.id \u003d treatments.patient_id WHERE treatments.treatment \u003d \u0027Group Therapy\u0027 AND patients.country \u003d \u0027India\u0027;", + "sql_explanation": "First, we join the patients and treatments tables on the id and patient_id columns, respectively. Then, we filter the results to only include rows where the treatment is group therapy and the country is India. Lastly, we calculate the number of rows in this filtered dataset." +}, { + "id": "1491", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all mental health conditions and their corresponding treatment approaches that have been implemented in the African region.", + "sql_context": "CREATE TABLE conditions (id INT, name VARCHAR(50)); CREATE TABLE treatments (id INT, condition_id INT, name VARCHAR(50), region VARCHAR(50)); INSERT INTO conditions (id, name) VALUES (1, \u0027Anxiety Disorder\u0027); INSERT INTO treatments (id, condition_id, name, region) VALUES (1, 1, \u0027Cognitive Behavioral Therapy\u0027, \u0027Africa\u0027);", + "sql": "SELECT conditions.name, treatments.name FROM conditions INNER JOIN treatments ON conditions.id \u003d treatments.condition_id WHERE treatments.region \u003d \u0027Africa\u0027;", + "sql_explanation": "The SQL query joins the \u0027conditions\u0027 and \u0027treatments\u0027 tables on the \u0027id\u0027 column and filters the results by the \u0027region\u0027 column in the \u0027treatments\u0027 table. The INNER JOIN keyword returns only the matching rows from both tables. The WHERE keyword is used to filter the results based on a specified condition." +}, { + "id": "1580", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of patients who received therapy from counselors named \"James\" or \"Michelle\"?", + "sql_context": "CREATE TABLE counselors (counselor_id INT, name TEXT, age INT); INSERT INTO counselors (counselor_id, name, age) VALUES (1, \u0027James\u0027, 35), (2, \u0027Michelle\u0027, 40); CREATE TABLE patients (patient_id INT, counselor_id INT, age INT); INSERT INTO patients (patient_id, counselor_id, age) VALUES (1, 1, 25), (2, 1, 30), (3, 2, 45), (4, 2, 50);", + "sql": "SELECT AVG(patients.age) FROM patients JOIN counselors ON patients.counselor_id \u003d counselors.counselor_id WHERE counselors.name IN (\u0027James\u0027, \u0027Michelle\u0027);", + "sql_explanation": "Joins the patients and counselors tables on the counselor_id foreign key. Filters for counselors named \"James\" or \"Michelle\" and calculates the average age of their patients." +}, { + "id": "2020", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many patients have participated in public awareness campaigns in the past year?", + "sql_context": "CREATE TABLE campaigns (id INT, patient_id INT); CREATE TABLE patients (id INT, age INT, country VARCHAR(255)); INSERT INTO campaigns (id, patient_id) VALUES (1, 1), (2, 3), (3, 5); INSERT INTO patients (id, age, country) VALUES (1, 35, \u0027USA\u0027), (2, 42, \u0027Canada\u0027), (3, 28, \u0027Mexico\u0027), (4, 31, \u0027Brazil\u0027), (5, 45, \u0027Argentina\u0027);", + "sql": "SELECT COUNT(*) FROM campaigns JOIN patients ON campaigns.patient_id \u003d patients.id WHERE patients.country IN (\u0027USA\u0027, \u0027Canada\u0027, \u0027Mexico\u0027);", + "sql_explanation": "This query identifies the number of patients who have participated in public awareness campaigns in the past year by joining the campaigns and patients tables on the patient_id column and filtering the results to only include patients from the USA, Canada, and Mexico. The query then counts the number of rows in the resulting table using the COUNT function." +}, { + "id": "41", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of unique healthcare centers in the USA where at least 5 infectious disease cases were reported in the last month.", + "sql_context": "CREATE TABLE healthcare_centers (id INT, name TEXT, country TEXT, created_at TIMESTAMP); INSERT INTO healthcare_centers (id, name, country, created_at) VALUES (1, \u0027St. John Hospital\u0027, \u0027USA\u0027, \u00272021-01-01 10:00:00\u0027), (2, \u0027Montreal General Hospital\u0027, \u0027Canada\u0027, \u00272021-01-02 12:00:00\u0027); CREATE TABLE infectious_disease_reports (id INT, patient_id INT, healthcare_center_id INT, report_date TIMESTAMP); INSERT INTO infectious_disease_reports (id, patient_id, healthcare_center_id, report_date) VALUES (1, 1, 1, \u00272021-07-10 14:30:00\u0027), (2, 2, 1, \u00272021-06-15 09:00:00\u0027), (3, 3, 2, \u00272021-07-16 11:00:00\u0027);", + "sql": "SELECT COUNT(DISTINCT healthcare_centers.id) FROM healthcare_centers JOIN infectious_disease_reports ON healthcare_centers.id \u003d infectious_disease_reports.healthcare_center_id WHERE infectious_disease_reports.report_date \u003e\u003d DATEADD(month, -1, CURRENT_TIMESTAMP) GROUP BY healthcare_centers.id HAVING COUNT(infectious_disease_reports.id) \u003e\u003d 5 AND healthcare_centers.country \u003d \u0027USA\u0027;", + "sql_explanation": "This query finds the number of unique healthcare centers in the USA with at least 5 infectious disease cases reported in the last month. It does so by first joining the healthcare_centers and infectious_disease_reports tables, then filtering based on report date and country. Next, it groups the records by healthcare_center_id and filters based on the number of records in each group. Finally, it counts the number of distinct healthcare_center_id records in the filtered group." +}, { + "id": "627", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many dentists are there in rural areas per state?", + "sql_context": "CREATE TABLE areas (area_type VARCHAR(10)); INSERT INTO areas (area_type) VALUES (\u0027Rural\u0027); CREATE TABLE providers (provider_id INT, provider_type VARCHAR(20), area_id INT); INSERT INTO providers (provider_id, provider_type, area_id) VALUES (1, \u0027Dentist\u0027, 1), (2, \u0027Surgeon\u0027, 2); CREATE TABLE area_lookup (area_id INT, area_name VARCHAR(20), state_abbr VARCHAR(2)); INSERT INTO area_lookup (area_id, area_name, state_abbr) VALUES (1, \u0027Alabama\u0027, \u0027AL\u0027), (2, \u0027Alaska\u0027, \u0027AK\u0027);", + "sql": "SELECT al.state_abbr, COUNT(p.provider_id) as dentist_count FROM providers p JOIN area_lookup al ON p.area_id \u003d al.area_id WHERE p.provider_type \u003d \u0027Dentist\u0027 AND al.area_type \u003d \u0027Rural\u0027 GROUP BY al.state_abbr;", + "sql_explanation": "This query joins the providers and area_lookup tables, filters for dentists in rural areas, groups the results by state_abbr, and calculates the count of dentists per state." +}, { + "id": "1437", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of primary care physicians per 100,000 population for each state in the physicians table?", + "sql_context": "CREATE TABLE physicians (state TEXT, specialty TEXT, num_physicians INT); INSERT INTO physicians (state, specialty, num_physicians) VALUES (\u0027California\u0027, \u0027Primary Care\u0027, 15000), (\u0027Texas\u0027, \u0027Primary Care\u0027, 12000), (\u0027New York\u0027, \u0027Primary Care\u0027, 18000), (\u0027Florida\u0027, \u0027Primary Care\u0027, 14000);", + "sql": "SELECT state, (num_physicians * 100000) / population AS physicians_per_100k FROM physicians JOIN state_population ON physicians.state \u003d state_population.state;", + "sql_explanation": "This query calculates the number of primary care physicians per 100,000 population for each state in the physicians table. It uses the formula (num_physicians * 100000) / population to calculate the number of physicians per 100,000 population and returns the state and the number of physicians per 100,000 population for each state. The query joins the physicians table with the state_population table on the state column." +}, { + "id": "2107", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many hospitals are there in each province of China?", + "sql_context": "CREATE TABLE china_provinces (id INT, province VARCHAR(50)); CREATE TABLE hospitals (id INT, name VARCHAR(50), province_id INT); INSERT INTO china_provinces (id, province) VALUES (1, \u0027Anhui\u0027), (2, \u0027Beijing\u0027), (3, \u0027Chongqing\u0027); INSERT INTO hospitals (id, name, province_id) VALUES (1, \u0027Anqing Hospital\u0027, 1), (2, \u0027Beijing Hospital\u0027, 2), (3, \u0027Chongqing Central Hospital\u0027, 3);", + "sql": "SELECT p.province, COUNT(h.id) AS total_hospitals FROM hospitals h JOIN china_provinces p ON h.province_id \u003d p.id GROUP BY p.province;", + "sql_explanation": "This SQL query joins the hospitals and china_provinces tables based on their respective province_id and id columns. It then groups the results by the province column and calculates the count of hospitals within each group." +}, { + "id": "2882", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many COVID-19 cases have been reported in each province of Canada?", + "sql_context": "CREATE TABLE canada_provinces (id INT, name VARCHAR(255)); CREATE TABLE covid_cases (id INT, province_id INT, cases INT); INSERT INTO canada_provinces (id, name) VALUES (1, \u0027Ontario\u0027), (2, \u0027Quebec\u0027), (3, \u0027British Columbia\u0027), (4, \u0027Alberta\u0027), (5, \u0027Manitoba\u0027);", + "sql": "SELECT p.name, SUM(c.cases) FROM covid_cases c JOIN canada_provinces p ON c.province_id \u003d p.id GROUP BY p.name;", + "sql_explanation": "This query joins the covid_cases and canada_provinces tables on the province_id column. It then groups the results by the name column of the canada_provinces table and calculates the sum of COVID-19 cases for each province." +}, { + "id": "343", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of emergency incidents reported by each neighborhood in district 3?", + "sql_context": "CREATE TABLE districts (id INT, name VARCHAR(255)); INSERT INTO districts (id, name) VALUES (3, \u0027Downtown\u0027); CREATE TABLE neighborhoods (id INT, district_id INT, name VARCHAR(255)); INSERT INTO neighborhoods (id, district_id, name) VALUES (101, 3, \u0027Central Park\u0027); INSERT INTO neighborhoods (id, district_id, name) VALUES (102, 3, \u0027Riverview\u0027); CREATE TABLE emergency_incidents (id INT, neighborhood_id INT, reported_date DATE); INSERT INTO emergency_incidents (id, neighborhood_id, reported_date) VALUES (1001, 101, \u00272021-01-01\u0027); INSERT INTO emergency_incidents (id, neighborhood_id, reported_date) VALUES (1002, 101, \u00272021-01-02\u0027); INSERT INTO emergency_incidents (id, neighborhood_id, reported_date) VALUES (1003, 102, \u00272021-01-01\u0027);", + "sql": "SELECT neighborhoods.name, COUNT(emergency_incidents.id) AS total_incidents FROM neighborhoods JOIN emergency_incidents ON neighborhoods.id \u003d emergency_incidents.neighborhood_id WHERE neighborhoods.district_id \u003d 3 GROUP BY neighborhoods.name;", + "sql_explanation": "This query joins the neighborhoods table and emergency_incidents table on the neighborhood_id column, filters for the neighborhoods in district 3 using the WHERE clause, and then groups the results by neighborhoods.name. The COUNT function is used to count the total number of incidents for each neighborhood." +}, { + "id": "684", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average response time for fire incidents in the city of Chicago, categorized by incident type?", + "sql_context": "CREATE TABLE emergency_responses (id INT, incident_id INT, response_time INT); CREATE TABLE emergency_incidents (id INT, incident_type VARCHAR(255), report_date DATE); INSERT INTO emergency_incidents (id, incident_type, report_date) VALUES (1, \u0027Fire\u0027, \u00272022-01-01\u0027), (2, \u0027Medical Emergency\u0027, \u00272022-01-02\u0027); INSERT INTO emergency_responses (id, incident_id, response_time) VALUES (1, 1, 10), (2, 1, 12), (3, 2, 20);", + "sql": "SELECT incident_type, AVG(response_time) FROM emergency_responses JOIN emergency_incidents ON emergency_responses.incident_id \u003d emergency_incidents.id WHERE incident_type \u003d \u0027Fire\u0027 GROUP BY incident_type;", + "sql_explanation": "This query joins the emergency_responses table with the emergency_incidents table based on the incident ID and filters the records for fire incidents. It then groups the records by incident type and returns the average response time for each group." +}, { + "id": "783", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of police officers in each division?", + "sql_context": "CREATE TABLE divisions (divid INT, name TEXT);CREATE TABLE police_officers (pid INT, divid INT, active BOOLEAN);", + "sql": "SELECT divisions.name, COUNT(police_officers.pid) FROM divisions INNER JOIN police_officers ON divisions.divid \u003d police_officers.divid WHERE police_officers.active \u003d TRUE GROUP BY divisions.name;", + "sql_explanation": "This SQL query counts the number of active police officers in each division. It performs an inner join between the divisions and police_officers tables, using the division ID (divid) as the common key. It then filters the results to only include active police officers (police_officers.active \u003d TRUE). Finally, it groups the results by division name (divisions.name) and counts the number of police officers (COUNT(police_officers.pid)) in each group." +}, { + "id": "1065", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total response time for emergency calls in the city of Chicago, broken down by hour of the day?", + "sql_context": "CREATE TABLE emergency_calls (id INT, call_time TIMESTAMP, response_time INT, city VARCHAR(20)); INSERT INTO emergency_calls VALUES (1, \u00272022-01-01 10:00:00\u0027, 15, \u0027Chicago\u0027); CREATE VIEW hours AS SELECT DATEPART(hour, call_time) as hour, 1 as hour_num FROM emergency_calls WHERE city \u003d \u0027Chicago\u0027;", + "sql": "SELECT h.hour, SUM(response_time) as total_response_time FROM hours h JOIN emergency_calls ec ON h.hour \u003d DATEPART(hour, ec.call_time) WHERE ec.city \u003d \u0027Chicago\u0027 GROUP BY h.hour;", + "sql_explanation": "This query calculates the total response time for emergency calls in the city of Chicago, broken down by hour of the day. It first creates a view of hours with their respective hour numbers. Then, it joins the hours view with the emergency_calls table based on the hour of the call_time field. After that, it calculates the total response time for each hour of the day by grouping by hour." +}, { + "id": "1123", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average response time for emergency incidents in the city of Los Angeles, categorized by incident type?", + "sql_context": "CREATE TABLE emergency_responses (id INT, incident_id INT, response_time INT); CREATE TABLE emergency_incidents (id INT, incident_type VARCHAR(255), report_date DATE); INSERT INTO emergency_incidents (id, incident_type, report_date) VALUES (1, \u0027Medical Emergency\u0027, \u00272022-01-01\u0027), (2, \u0027Fire\u0027, \u00272022-01-02\u0027); INSERT INTO emergency_responses (id, incident_id, response_time) VALUES (1, 1, 10), (2, 1, 12), (3, 2, 20);", + "sql": "SELECT incident_type, AVG(response_time) FROM emergency_responses JOIN emergency_incidents ON emergency_responses.incident_id \u003d emergency_incidents.id GROUP BY incident_type;", + "sql_explanation": "This query joins the emergency_responses table with the emergency_incidents table based on the incident ID. It then groups the records by incident type and returns the average response time for each group." +}, { + "id": "1348", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average emergency response time by region for fire incidents?", + "sql_context": "CREATE TABLE Regions (RegionID INT, RegionName VARCHAR(255)); CREATE TABLE Incidents (IncidentID INT, IncidentType VARCHAR(255), RegionID INT, ResponseTime INT);", + "sql": "SELECT AVG(ResponseTime) as AvgResponseTime, RegionName FROM Incidents i JOIN Regions r ON i.RegionID \u003d r.RegionID WHERE IncidentType \u003d \u0027Fire\u0027 GROUP BY RegionName;", + "sql_explanation": "The SQL query calculates the average response time for fire incidents by joining the Incidents table with the Regions table on the RegionID. It filters for fire incidents and groups the results by region using the GROUP BY clause. Finally, it calculates the average response time per region using the AVG function." +}, { + "id": "1474", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of emergency medical service (EMS) calls in each borough of New York City in 2021?", + "sql_context": "CREATE TABLE NYCEMSCalls (Borough VARCHAR(255), Year INT, Calls INT); INSERT INTO NYCEMSCalls (Borough, Year, Calls) VALUES (\u0027Manhattan\u0027, 2021, 2500), (\u0027Brooklyn\u0027, 2021, 3000), (\u0027Queens\u0027, 2021, 2800), (\u0027Bronx\u0027, 2021, 2300), (\u0027Staten Island\u0027, 2021, 1800);", + "sql": "SELECT B.Borough, SUM(EC.Calls) as TotalEMSCalls FROM NYCEMSCalls EC INNER JOIN Boroughs B ON EC.Borough \u003d B.Borough WHERE EC.Year \u003d 2021 GROUP BY B.Borough;", + "sql_explanation": "This SQL query calculates the total number of emergency medical service (EMS) calls in each borough of New York City in 2021 by joining the NYCEMSCalls and Boroughs tables on the Borough column and grouping the result by the Borough column. The SUM function is applied to the Calls column to calculate the total number of EMS calls in each group." +}, { + "id": "1577", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average response time for fire incidents in each precinct?", + "sql_context": "CREATE TABLE precincts (precinct_id INT, precinct_name TEXT, total_population INT); INSERT INTO precincts (precinct_id, precinct_name, total_population) VALUES (1, \u00271st Precinct\u0027, 50000), (2, \u00272nd Precinct\u0027, 60000), (3, \u00273rd Precinct\u0027, 40000); CREATE TABLE fire_incidents (incident_id INT, precinct_id INT, response_time INT); INSERT INTO fire_incidents (incident_id, precinct_id, response_time) VALUES (1, 1, 8), (2, 1, 10), (3, 2, 6), (4, 2, 7), (5, 3, 12), (6, 3, 14);", + "sql": "SELECT precinct_name, AVG(response_time) FROM fire_incidents JOIN precincts ON fire_incidents.precinct_id \u003d precincts.precinct_id GROUP BY precinct_name;", + "sql_explanation": "The SQL query joins the precincts table and fire_incidents table on precinct_id. It then calculates the average response_time for each precinct using the AVG() function and groups the results by precinct_name." +}, { + "id": "1654", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many emergency calls were made in each borough in January 2021?", + "sql_context": "CREATE TABLE emergency_calls_3 (id INT, borough_id INT, call_time TIMESTAMP); INSERT INTO emergency_calls_3 (id, borough_id, call_time) VALUES (1, 1, \u00272021-01-01 01:00:00\u0027), (2, 1, \u00272021-01-02 02:00:00\u0027), (3, 2, \u00272021-01-03 23:00:00\u0027), (4, 2, \u00272021-01-04 04:00:00\u0027), (5, 3, \u00272021-01-05 05:00:00\u0027), (6, 3, \u00272021-01-06 06:00:00\u0027), (7, 4, \u00272021-01-07 07:00:00\u0027), (8, 4, \u00272021-01-08 08:00:00\u0027), (9, 5, \u00272021-01-09 09:00:00\u0027), (10, 5, \u00272021-01-10 10:00:00\u0027), (11, 1, \u00272021-01-11 11:00:00\u0027), (12, 1, \u00272021-01-12 12:00:00\u0027);", + "sql": "SELECT b.name, COUNT(ec.id) FROM borough b JOIN emergency_calls_3 ec ON b.id \u003d ec.borough_id WHERE EXTRACT(MONTH FROM ec.call_time) \u003d 1 GROUP BY b.id;", + "sql_explanation": "First, we join the borough table with the emergency_calls_3 table on the borough_id column. Then, we filter the resulting table using the WHERE clause to only include rows where the month of the call_time column is equal to 1 (January). Then, we group the results by the name column of the borough table, and finally, we count the number of calls for each borough using the COUNT function." +}, { + "id": "1705", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all the community policing initiatives in the northern region.", + "sql_context": "CREATE TABLE regions (region_id INT, region_name VARCHAR(255)); INSERT INTO regions (region_id, region_name) VALUES (1, \u0027Northern\u0027), (2, \u0027Southern\u0027), (3, \u0027Eastern\u0027), (4, \u0027Western\u0027); CREATE TABLE community_policing (initiative_id INT, region_id INT, initiative_name VARCHAR(255));", + "sql": "SELECT r.region_name, c.initiative_name FROM community_policing c INNER JOIN regions r ON c.region_id \u003d r.region_id WHERE r.region_name \u003d \u0027Northern\u0027;", + "sql_explanation": "This SQL query lists all the community policing initiatives in the northern region by joining the community_policing and regions tables on the region_id column. It then filters the results to only include initiatives in the northern region." +}, { + "id": "1745", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of emergency responders in each region?", + "sql_context": "CREATE TABLE Regions (RegionID INT, Name VARCHAR(50)); CREATE TABLE EmergencyResponders (ResponderID INT, RegionID INT);", + "sql": "SELECT R.Name, COUNT(ER.ResponderID) as NumResponders FROM Regions R INNER JOIN EmergencyResponders ER ON R.RegionID \u003d ER.RegionID GROUP BY R.Name;", + "sql_explanation": "The SQL query joins the Regions and EmergencyResponders tables on the RegionID. It then groups the results by the Name of the Region and counts the number of Responders for each group." +}, { + "id": "2729", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of crime types across different time periods (morning, afternoon, evening)?", + "sql_context": "CREATE TABLE time_periods (id INT, period TEXT); CREATE TABLE crime_stats (id INT, time_period_id INT, crime_type TEXT, frequency INT);", + "sql": "SELECT tp.period, cs.crime_type, cs.frequency FROM time_periods tp JOIN crime_stats cs ON tp.id \u003d cs.time_period_id;", + "sql_explanation": "This query joins the time_periods table with the crime_stats table based on the time_period_id, and returns the crime_type and frequency for each time_period." +}, { + "id": "3064", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many crimes were committed in each type of location?", + "sql_context": "CREATE TABLE Locations (LocId INT, Type VARCHAR(50)); CREATE TABLE Crimes (CrimeId INT, LocId INT, Date DATE);", + "sql": "SELECT L.Type, COUNT(C.CrimeId) FROM Locations L INNER JOIN Crimes C ON L.LocId \u003d C.LocId GROUP BY L.Type;", + "sql_explanation": "We are joining the Locations table with the Crimes table on the LocId column, which is a foreign key in the Crimes table. Then, we group the results by location type (L.Type) and count the number of crimes (COUNT(C.CrimeId)) for each group." +}, { + "id": "697", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of climate finance provided to countries in the Asia-Pacific region for climate adaptation initiatives in the last 5 years?", + "sql_context": "CREATE TABLE climate_finance (country VARCHAR(50), initiative VARCHAR(50), funding DECIMAL(10,2), year INT); INSERT INTO climate_finance (country, initiative, funding, year) VALUES (\u0027Fiji\u0027, \u0027Coastal Protection\u0027, 1000000, 2018), (\u0027Indonesia\u0027, \u0027Water Management\u0027, 2000000, 2019), (\u0027Nepal\u0027, \u0027Disaster Risk Reduction\u0027, 1500000, 2020), (\u0027Bangladesh\u0027, \u0027Ecosystem Restoration\u0027, 2500000, 2021); CREATE TABLE regions (country VARCHAR(50), region VARCHAR(50)); INSERT INTO regions (country, region) VALUES (\u0027Fiji\u0027, \u0027Asia-Pacific\u0027), (\u0027Indonesia\u0027, \u0027Asia-Pacific\u0027), (\u0027Nepal\u0027, \u0027Asia-Pacific\u0027), (\u0027Bangladesh\u0027, \u0027Asia-Pacific\u0027);", + "sql": "SELECT SUM(cf.funding) FROM climate_finance cf INNER JOIN regions r ON cf.country \u003d r.country WHERE r.region \u003d \u0027Asia-Pacific\u0027 AND cf.initiative \u003d \u0027climate adaptation\u0027 AND cf.year BETWEEN 2017 AND 2021;", + "sql_explanation": "Joins the climate_finance and regions tables on the country column. Filters the results based on the region and initiative columns, and the year range. Calculates the sum of the funding column for the matching records." +}, { + "id": "1261", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What organizations are working on mitigation actions in the USA?", + "sql_context": "CREATE TABLE organizations (id INT, name VARCHAR(255), country VARCHAR(255), type VARCHAR(255)); INSERT INTO organizations (id, name, country, type) VALUES (4, \u0027Sunrise Movement\u0027, \u0027USA\u0027, \u0027NGO\u0027); CREATE TABLE mitigation_actions (id INT, project_id INT, organization_id INT, date DATE, type VARCHAR(255)); INSERT INTO mitigation_actions (id, project_id, organization_id, date, type) VALUES (4, 3, 4, \u00272022-04-01\u0027, \u0027Electric Vehicles\u0027);", + "sql": "SELECT organizations.name FROM organizations INNER JOIN mitigation_actions ON organizations.id \u003d mitigation_actions.organization_id WHERE organizations.country \u003d \u0027USA\u0027;", + "sql_explanation": "Join the \u0027organizations\u0027 and \u0027mitigation_actions\u0027 tables on the \u0027organization_id\u0027 and retrieve the names of organizations from the USA." +}, { + "id": "348", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of each fabric type used in the production of sustainable clothing items in 2021?", + "sql_context": "CREATE TABLE FabricInventory (InventoryID INT, ItemID INT, FabricType TEXT, Quantity INT); INSERT INTO FabricInventory (InventoryID, ItemID, FabricType, Quantity) VALUES (1, 1, \u0027Organic Cotton\u0027, 3), (2, 1, \u0027Hemp\u0027, 2), (3, 2, \u0027Recycled Polyester\u0027, 4), (4, 3, \u0027Organic Cotton\u0027, 5), (5, 4, \u0027Tencel\u0027, 6), (6, 5, \u0027Conventional Cotton\u0027, 7), (7, 6, \u0027Organic Cotton\u0027, 8), (8, 6, \u0027Hemp\u0027, 3), (9, 7, \u0027Recycled Polyester\u0027, 9); CREATE TABLE ClothingItems (ItemID INT, ItemName TEXT, ProductionDate DATE, Sustainable BOOLEAN); INSERT INTO ClothingItems (ItemID, ItemName, ProductionDate, Sustainable) VALUES (1, \u0027Organic Cotton Dress\u0027, \u00272021-01-01\u0027, true), (2, \u0027Recycled Polyester Skirt\u0027, \u00272021-02-01\u0027, true), (3, \u0027Organic Cotton Pants\u0027, \u00272021-03-01\u0027, true), (4, \u0027Tencel Jacket\u0027, \u00272021-04-01\u0027, true), (5, \u0027Conventional Cotton Shirt\u0027, \u00272021-05-01\u0027, false), (6, \u0027Sustainable Denim Jeans\u0027, \u00272021-06-01\u0027, true);", + "sql": "SELECT FabricType, SUM(Quantity) as TotalQuantity FROM FabricInventory JOIN ClothingItems ON FabricInventory.ItemID \u003d ClothingItems.ItemID WHERE ProductionDate BETWEEN \u00272021-01-01\u0027 AND \u00272021-12-31\u0027 AND Sustainable \u003d true GROUP BY FabricType;", + "sql_explanation": "The SQL query joins FabricInventory and ClothingItems tables based on ItemID, filters for sustainable clothing items produced in 2021, calculates the sum of fabric quantities by grouping by FabricType, and returns the total quantity of each fabric type used in the production of sustainable clothing items in 2021." +}, { + "id": "953", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all factories in countries with a high percentage of sustainable textile sourcing, ordered alphabetically by factory name.", + "sql_context": "CREATE TABLE Factories (FactoryID int, FactoryName varchar(50), Country varchar(50)); INSERT INTO Factories (FactoryID, FactoryName, Country) VALUES (1, \u0027EcoFactory\u0027, \u0027Bangladesh\u0027); INSERT INTO Factories (FactoryID, FactoryName, Country) VALUES (2, \u0027GreenManufacturing\u0027, \u0027India\u0027); CREATE TABLE Sourcing (FactoryID int, SustainableSourcePercentage decimal(5,2)); INSERT INTO Sourcing (FactoryID, SustainableSourcePercentage) VALUES (1, 0.85); INSERT INTO Sourcing (FactoryID, SustainableSourcePercentage) VALUES (2, 0.90);", + "sql": "SELECT f.FactoryName FROM Factories f INNER JOIN Sourcing s ON f.FactoryID \u003d s.FactoryID WHERE s.SustainableSourcePercentage \u003e\u003d 0.80 GROUP BY f.FactoryName ORDER BY f.FactoryName ASC;", + "sql_explanation": "This query joins the Factories table and the Sourcing table on FactoryID. It filters for factories with a sustainable source percentage of at least 80% and groups by factory name. Finally, it orders the results alphabetically by factory name." +}, { + "id": "1754", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which size-inclusive garments have the highest revenue?", + "sql_context": "CREATE TABLE garments (id INT, size TEXT, revenue DECIMAL); INSERT INTO garments (id, size, revenue) VALUES (1, \u0027XS\u0027, 200), (2, \u0027S\u0027, 300), (3, \u0027M\u0027, 500), (4, \u0027L\u0027, 700), (5, \u0027XL\u0027, 800), (6, \u0027XXL\u0027, 900); CREATE TABLE sizes (id INT, size TEXT, description TEXT); INSERT INTO sizes (id, size, description) VALUES (1, \u0027XS\u0027, \u0027Extra Small\u0027), (2, \u0027S\u0027, \u0027Small\u0027), (3, \u0027M\u0027, \u0027Medium\u0027), (4, \u0027L\u0027, \u0027Large\u0027), (5, \u0027XL\u0027, \u0027Extra Large\u0027), (6, \u0027XXL\u0027, \u0027Extra Extra Large\u0027);", + "sql": "SELECT g.size, SUM(g.revenue) as total_revenue FROM garments g JOIN sizes s ON g.size \u003d s.size GROUP BY g.size ORDER BY total_revenue DESC LIMIT 1;", + "sql_explanation": "This query joins the \u0027garments\u0027 and \u0027sizes\u0027 tables based on the size column, calculates the total revenue for each size, orders them in descending order, and returns the size with the highest total revenue." +}, { + "id": "2088", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all fashion trends that are popular in both Europe and Asia.", + "sql_context": "CREATE TABLE European_Trends (Trend_ID INT, Trend_Name TEXT); INSERT INTO European_Trends (Trend_ID, Trend_Name) VALUES (1, \u0027Minimalist Style\u0027), (2, \u0027Bohemian Style\u0027); CREATE TABLE Asian_Trends (Trend_ID INT, Trend_Name TEXT); INSERT INTO Asian_Trends (Trend_ID, Trend_Name) VALUES (1, \u0027K-Pop Fashion\u0027), (2, \u0027Japanese Streetwear\u0027);", + "sql": "SELECT European_Trends.Trend_Name FROM European_Trends INNER JOIN Asian_Trends ON European_Trends.Trend_Name \u003d Asian_Trends.Trend_Name;", + "sql_explanation": "This SQL query lists all the fashion trends that are popular in both Europe and Asia by using an inner join to combine the European_Trends and Asian_Trends tables based on the Trend_Name column." +}, { + "id": "2997", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of orders for each product category?", + "sql_context": "CREATE TABLE Products (id INT, category VARCHAR(20), price DECIMAL(5,2)); CREATE TABLE Orders (id INT, product_id INT, quantity INT, order_date DATE); INSERT INTO Products (id, category, price) VALUES (1, \u0027Tops\u0027, 15.99), (2, \u0027Bottoms\u0027, 29.99), (3, \u0027Outerwear\u0027, 49.99), (4, \u0027Accessories\u0027, 7.99), (5, \u0027Tops\u0027, 12.99); INSERT INTO Orders (id, product_id, quantity, order_date) VALUES (1, 1, 10, \u00272021-01-01\u0027), (2, 2, 5, \u00272021-01-02\u0027), (3, 3, 2, \u00272021-01-03\u0027), (4, 4, 15, \u00272021-01-04\u0027), (5, 5, 8, \u00272021-01-05\u0027);", + "sql": "SELECT p.category, SUM(o.quantity) FROM Products p JOIN Orders o ON p.id \u003d o.product_id GROUP BY p.category;", + "sql_explanation": "Summarize the quantity of orders for each product category." +}, { + "id": "378", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who is the attorney with the most cases won in the \u0027Seoul\u0027 office?", + "sql_context": "CREATE TABLE attorneys (attorney_id INT, office VARCHAR(50)); INSERT INTO attorneys VALUES (1, \u0027Seoul\u0027); CREATE TABLE cases (case_id INT, attorney_id INT, case_outcome VARCHAR(10));", + "sql": "SELECT attorneys.name, COUNT(*) AS cases_won FROM attorneys INNER JOIN cases ON attorneys.attorney_id \u003d cases.attorney_id WHERE attorneys.office \u003d \u0027Seoul\u0027 AND case_outcome \u003d \u0027won\u0027 GROUP BY attorneys.name ORDER BY cases_won DESC LIMIT 1;", + "sql_explanation": "This query finds the attorney with the most cases won in the \u0027Seoul\u0027 office. It performs an inner join between the attorneys and cases tables on the attorney_id column, filters the results to only include rows where the attorney\u0027s office is \u0027Seoul\u0027 and the case was won, groups the results by attorney name, and then orders the results by the count of won cases in descending order. It then selects the top row (i.e., the attorney with the most cases won) using the LIMIT clause." +}, { + "id": "679", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all cases where the attorney\u0027s first name starts with \u0027S\u0027 and the case is still open.", + "sql_context": "CREATE TABLE attorneys (attorney_id INT, first_name VARCHAR(20)); INSERT INTO attorneys (attorney_id, first_name) VALUES (1, \u0027Samantha\u0027), (2, \u0027Jacob\u0027), (3, \u0027Steven\u0027); CREATE TABLE cases (case_id INT, attorney_id INT, open_date DATE, close_date DATE); INSERT INTO cases (case_id, attorney_id, open_date, close_date) VALUES (1, 1, \u00272022-04-01\u0027, NULL), (2, 2, \u00272022-05-01\u0027, \u00272022-05-15\u0027), (3, 3, \u00272022-02-01\u0027, \u00272022-02-28\u0027);", + "sql": "SELECT cases.case_id, attorneys.first_name, cases.open_date FROM cases INNER JOIN attorneys ON cases.attorney_id \u003d attorneys.attorney_id WHERE attorneys.first_name LIKE \u0027S%\u0027 AND cases.close_date IS NULL;", + "sql_explanation": "Joins the cases and attorneys tables on the attorney_id column, filters for rows where the attorney\u0027s first name starts with \u0027S\u0027 and the case is still open, and returns the case ID, attorney\u0027s first name, and open date." +}, { + "id": "810", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the count of cases and count of distinct case types for each attorney, based on the \u0027attorney_id\u0027 column in the \u0027attorneys\u0027 table?", + "sql_context": "CREATE TABLE attorneys (attorney_id INT, attorney_state VARCHAR(255)); CREATE TABLE cases (case_id INT, case_number INT, attorney_id INT, case_type VARCHAR(255));", + "sql": "SELECT a.attorney_id, COUNT(c.case_id) as case_count, COUNT(DISTINCT c.case_type) as case_type_count FROM attorneys a INNER JOIN cases c ON a.attorney_id \u003d c.attorney_id GROUP BY a.attorney_id;", + "sql_explanation": "This query joins the \u0027attorneys\u0027 and \u0027cases\u0027 tables on the \u0027attorney_id\u0027 column. It then groups the results by the \u0027attorney_id\u0027 column from the \u0027attorneys\u0027 table and calculates the count of \u0027case_id\u0027 and count of distinct \u0027case_type\u0027 from the \u0027cases\u0027 table for each group." +}, { + "id": "844", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many cases were won by each attorney in the Northeast region?", + "sql_context": "CREATE TABLE Cases ( CaseID INT, AttorneyID INT, Region VARCHAR(50), CaseOutcome VARCHAR(50) ); INSERT INTO Cases (CaseID, AttorneyID, Region, CaseOutcome) VALUES (1, 1, \u0027Northeast\u0027, \u0027Won\u0027), (2, 1, \u0027Northeast\u0027, \u0027Lost\u0027), (3, 2, \u0027Northeast\u0027, \u0027Won\u0027), (4, 2, \u0027Northeast\u0027, \u0027Won\u0027), (5, 3, \u0027Midwest\u0027, \u0027Won\u0027), (6, 3, \u0027Midwest\u0027, \u0027Lost\u0027), (7, 4, \u0027Southwest\u0027, \u0027Won\u0027), (8, 4, \u0027Southwest\u0027, \u0027Won\u0027), (9, 5, \u0027West\u0027, \u0027Lost\u0027), (10, 5, \u0027West\u0027, \u0027Lost\u0027);", + "sql": "SELECT a.Name AS AttorneyName, COUNT(c.CaseID) AS WonCases FROM Attorneys a JOIN Cases c ON a.AttorneyID \u003d c.AttorneyID WHERE a.Region \u003d \u0027Northeast\u0027 AND c.CaseOutcome \u003d \u0027Won\u0027 GROUP BY a.Name;", + "sql_explanation": "This SQL query calculates the number of cases won by each attorney in the Northeast region. It uses a JOIN clause to combine the data from the Attorneys and Cases tables based on the AttorneyID. The WHERE clause filters the records to only include cases from the Northeast region that were won. Finally, the GROUP BY clause groups the records by attorney name and the COUNT function calculates the number of cases won for each attorney." +}, { + "id": "1005", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who is the attorney with the highest billing amount in the \u0027New York\u0027 office?", + "sql_context": "CREATE TABLE attorneys (attorney_id INT, office VARCHAR(50)); INSERT INTO attorneys VALUES (1, \u0027New York\u0027); CREATE TABLE cases (case_id INT, attorney_id INT, billing_amount DECIMAL(10,2));", + "sql": "SELECT attorneys.name, MAX(billing_amount) FROM attorneys INNER JOIN cases ON attorneys.attorney_id \u003d cases.attorney_id WHERE attorneys.office \u003d \u0027New York\u0027 GROUP BY attorneys.name;", + "sql_explanation": "This query finds the attorney with the highest billing amount in the \u0027New York\u0027 office. It performs an inner join between the attorneys and cases tables on the attorney_id column, filters the results to only include rows where the attorney\u0027s office is \u0027New York\u0027, and then groups the results by attorney name. It then selects the attorney with the highest billing amount by taking the maximum billing_amount for each group." +}, { + "id": "1006", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 3 legal precedents cited in cases handled by attorneys from New York?", + "sql_context": "CREATE TABLE Cases (CaseID INT, AttorneyID INT, Precedent VARCHAR(255)); CREATE TABLE Attorneys (AttorneyID INT, City VARCHAR(255));", + "sql": "SELECT Precedent, COUNT(*) FROM Cases JOIN Attorneys ON Cases.AttorneyID \u003d Attorneys.AttorneyID WHERE Attorneys.City \u003d \u0027New York\u0027 GROUP BY Precedent ORDER BY COUNT(*) DESC LIMIT 3;", + "sql_explanation": "The SQL query identifies the top 3 legal precedents cited in cases handled by attorneys from New York by performing an INNER JOIN on the \u0027AttorneyID\u0027 column and grouping the results by the \u0027Precedent\u0027 column. It orders the results by the count of the \u0027Precedent\u0027 column in descending order and limits the output to the top 3." +}, { + "id": "1386", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all unique legal precedents cited in cases handled by attorneys with the last name \u0027Johnson\u0027 or \u0027Johnston\u0027?", + "sql_context": "CREATE TABLE cases (case_id INT, case_outcome VARCHAR(20), precedent_cited VARCHAR(50)); CREATE TABLE attorneys (attorney_id INT, first_name VARCHAR(20), last_name VARCHAR(20)); INSERT INTO attorneys (attorney_id, first_name, last_name) VALUES (1, \u0027John\u0027, \u0027Doe\u0027); INSERT INTO attorneys (attorney_id, first_name, last_name) VALUES (2, \u0027Jane\u0027, \u0027Smith\u0027); INSERT INTO attorneys (attorney_id, first_name, last_name) VALUES (3, \u0027Robert\u0027, \u0027Johnson\u0027); INSERT INTO attorneys (attorney_id, first_name, last_name) VALUES (4, \u0027Laura\u0027, \u0027Johnston\u0027); INSERT INTO cases (case_id, case_outcome, precedent_cited) VALUES (1, \u0027Won\u0027, \u0027Precedent A\u0027); INSERT INTO cases (case_id, case_outcome, precedent_cited) VALUES (2, \u0027Lost\u0027, \u0027Precedent B\u0027); INSERT INTO cases (case_id, case_outcome, precedent_cited) VALUES (3, \u0027Won\u0027, \u0027Precedent A\u0027); INSERT INTO cases (case_id, case_outcome, precedent_cited) VALUES (4, \u0027Lost\u0027, \u0027Precedent C\u0027);", + "sql": "SELECT DISTINCT precedent_cited FROM cases INNER JOIN attorneys ON cases.attorney_id \u003d attorneys.attorney_id WHERE attorneys.last_name IN (\u0027Johnson\u0027, \u0027Johnston\u0027);", + "sql_explanation": "This SQL query first joins the cases table with the attorneys table on the attorney_id field. It then filters the results to only include rows where the last_name field in the attorneys table is either \u0027Johnson\u0027 or \u0027Johnston\u0027. Finally, it selects the precedent_cited field from the resulting rows and removes any duplicate values using the DISTINCT keyword." +}, { + "id": "1541", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum billing amount for cases handled by attorneys who identify as LGBTQ+?", + "sql_context": "CREATE TABLE AttorneyIdentity (AttorneyID INT, Identity VARCHAR(20)); INSERT INTO AttorneyIdentity (AttorneyID, Identity) VALUES (1, \u0027Straight\u0027), (2, \u0027LGBTQ+\u0027), (3, \u0027Straight\u0027);", + "sql": "SELECT MAX(BillingAmount) FROM AttorneyBilling JOIN AttorneyIdentity ON AttorneyBilling.AttorneyID \u003d AttorneyIdentity.AttorneyID WHERE Identity \u003d \u0027LGBTQ+\u0027;", + "sql_explanation": "This query calculates the maximum billing amount for cases handled by attorneys who identify as LGBTQ+ by joining the AttorneyBilling and AttorneyIdentity tables, filtering for attorneys who identify as LGBTQ+, and using the MAX function." +}, { + "id": "1550", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total billing amount for each client?", + "sql_context": "CREATE TABLE clients (client_id INT, client_name VARCHAR(255)); INSERT INTO clients (client_id, client_name) VALUES (1, \u0027Acme Inc\u0027), (2, \u0027Beta Corp\u0027); CREATE TABLE billing (bill_id INT, client_id INT, amount DECIMAL(10, 2)); INSERT INTO billing (bill_id, client_id, amount) VALUES (1, 1, 500.00), (2, 1, 250.00), (3, 2, 750.00);", + "sql": "SELECT c.client_name, SUM(b.amount) as total_billing FROM clients c INNER JOIN billing b ON c.client_id \u003d b.client_id GROUP BY c.client_id, c.client_name;", + "sql_explanation": "The SQL query performs an inner join between the clients and billing tables, joining on the client_id field. It then calculates the total billing amount for each client by grouping the results by the client_id and client_name fields and summing the amount field." +}, { + "id": "1562", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all clients who have paid their bills in full for cases with a billing amount greater than $10,000.", + "sql_context": "CREATE TABLE clients (client_id INT, name TEXT); INSERT INTO clients (client_id, name) VALUES (1, \u0027Jane Doe\u0027), (2, \u0027John Smith\u0027), (3, \u0027Sara Connor\u0027), (4, \u0027Tom Williams\u0027); CREATE TABLE cases (case_id INT, client_id INT, billing_amount INT, paid_in_full BOOLEAN); INSERT INTO cases (case_id, client_id, billing_amount, paid_in_full) VALUES (1, 1, 12000, TRUE), (2, 2, 8000, FALSE), (3, 3, 20000, TRUE), (4, 4, 5000, FALSE);", + "sql": "SELECT clients.name FROM clients INNER JOIN cases ON clients.client_id \u003d cases.client_id WHERE cases.billing_amount \u003e 10000 AND cases.paid_in_full \u003d TRUE;", + "sql_explanation": "This query lists all clients who have paid their bills in full for cases with a billing amount greater than $10,000. It performs an inner join on the \u0027clients\u0027 and \u0027cases\u0027 tables, matching the client_id in both tables. It then filters for rows where the billing_amount is greater than $10,000 and the \u0027paid_in_full\u0027 column in the \u0027cases\u0027 table is TRUE." +}, { + "id": "1628", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average billable hours per case for attorneys in the New York office?", + "sql_context": "CREATE TABLE attorneys (attorney_id INT, attorney_name TEXT, office_location TEXT); INSERT INTO attorneys (attorney_id, attorney_name, office_location) VALUES (1, \u0027John Doe\u0027, \u0027San Francisco\u0027), (2, \u0027Jane Smith\u0027, \u0027New York\u0027); CREATE TABLE cases (case_id INT, attorney_id INT, billable_hours INT); INSERT INTO cases (case_id, attorney_id, billable_hours) VALUES (1, 1, 10), (2, 1, 15), (3, 2, 20), (4, 2, 25);", + "sql": "SELECT AVG(c.billable_hours) as avg_billable_hours FROM attorneys a JOIN cases c ON a.attorney_id \u003d c.attorney_id WHERE a.office_location \u003d \u0027New York\u0027;", + "sql_explanation": "This SQL query calculates the average billable hours per case for attorneys in the New York office. It first joins the attorneys and cases tables on the attorney_id field. Then, it filters the results to only include attorneys in the New York office. Lastly, it calculates the average of billable_hours for all cases." +}, { + "id": "1809", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average billing amount for cases in the \u0027Family Law\u0027 category, per attorney?", + "sql_context": "CREATE TABLE Attorneys (AttorneyID INT, AttorneyName VARCHAR(255)); CREATE TABLE Cases (CaseID INT, AttorneyID INT, CaseType VARCHAR(255), BillingAmount DECIMAL);", + "sql": "SELECT AVG(BillingAmount) FROM Cases JOIN Attorneys ON Cases.AttorneyID \u003d Attorneys.AttorneyID WHERE CaseType \u003d \u0027Family Law\u0027 GROUP BY AttorneyID;", + "sql_explanation": "The SQL query calculates the average billing amount from the \u0027Cases\u0027 table for entries where the \u0027CaseType\u0027 column has a value of \u0027Family Law\u0027, grouping by the \u0027AttorneyID\u0027 column." +}, { + "id": "1877", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many cases were won by attorneys with less than 3 years of experience?", + "sql_context": "CREATE TABLE attorneys (id INT, years_of_experience INT); CREATE TABLE cases (id INT, attorney_id INT, case_outcome VARCHAR(10));", + "sql": "SELECT COUNT(*) FROM cases JOIN attorneys ON cases.attorney_id \u003d attorneys.id WHERE attorneys.years_of_experience \u003c 3 AND case_outcome \u003d \u0027won\u0027;", + "sql_explanation": "This query joins the attorneys and cases tables on the attorney_id field. It then filters the results to only include attorneys with less than 3 years of experience and cases that were won. Lastly, it counts the number of rows that meet these conditions." +}, { + "id": "1886", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of cases for each attorney?", + "sql_context": "CREATE TABLE Attorneys (AttorneyID INT, Name VARCHAR(50)); INSERT INTO Attorneys (AttorneyID, Name) VALUES (1, \u0027Jose Garcia\u0027), (2, \u0027Lee Kim\u0027); CREATE TABLE Cases (CaseID INT, AttorneyID INT); INSERT INTO Cases (CaseID, AttorneyID) VALUES (1, 1), (2, 1), (3, 2), (4, 2), (5, 2);", + "sql": "SELECT Attorneys.Name, COUNT(Cases.CaseID) FROM Attorneys INNER JOIN Cases ON Attorneys.AttorneyID \u003d Cases.AttorneyID GROUP BY Attorneys.Name;", + "sql_explanation": "This query calculates the total number of cases for each attorney by joining the Attorneys table and the Cases table on the AttorneyID column. It then calculates the count of cases for each attorney and groups the results by attorney name." +}, { + "id": "2065", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the number of cases handled by attorneys who have passed the bar exam in California.", + "sql_context": "CREATE TABLE attorneys (attorney_id INT, name TEXT, passed_bar_exam_ca BOOLEAN); INSERT INTO attorneys (attorney_id, name, passed_bar_exam_ca) VALUES (1, \u0027Jane Doe\u0027, TRUE), (2, \u0027John Smith\u0027, FALSE), (3, \u0027Sara Connor\u0027, TRUE), (4, \u0027Tom Williams\u0027, FALSE); CREATE TABLE cases (case_id INT, attorney_id INT); INSERT INTO cases (case_id, attorney_id) VALUES (1, 1), (2, 2), (3, 3), (4, 3), (5, 4);", + "sql": "SELECT COUNT(*) FROM cases INNER JOIN attorneys ON cases.attorney_id \u003d attorneys.attorney_id WHERE attorneys.passed_bar_exam_ca \u003d TRUE;", + "sql_explanation": "This query identifies the number of cases handled by attorneys who have passed the bar exam in California. It performs an inner join on the \u0027attorneys\u0027 and \u0027cases\u0027 tables, matching the attorney_id in both tables. It then filters for rows where the \u0027passed_bar_exam_ca\u0027 column in the \u0027attorneys\u0027 table is TRUE and counts the number of rows." +}, { + "id": "2293", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many cases were handled by attorneys in the \u0027Associate\u0027 title category?", + "sql_context": "CREATE TABLE attorneys (attorney_id INT, name TEXT, title TEXT); INSERT INTO attorneys (attorney_id, name, title) VALUES (1, \u0027Smith\u0027, \u0027Senior Partner\u0027), (2, \u0027Johnson\u0027, \u0027Associate\u0027), (3, \u0027Williams\u0027, \u0027Associate\u0027); CREATE TABLE cases (case_id INT, attorney_id INT); INSERT INTO cases (case_id, attorney_id) VALUES (1, 1), (2, 2), (3, 3), (4, 3);", + "sql": "SELECT COUNT(*) FROM attorneys INNER JOIN cases ON attorneys.attorney_id \u003d cases.attorney_id WHERE attorneys.title \u003d \u0027Associate\u0027;", + "sql_explanation": "Joins the attorneys and cases tables on attorney_id, filters for rows where the attorney\u0027s title is \u0027Associate\u0027, and counts the number of matching rows." +}, { + "id": "2465", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many cases were won by attorneys from the New York office?", + "sql_context": "CREATE TABLE attorneys (id INT, office_location VARCHAR(50), name VARCHAR(50), years_of_experience INT, specialty VARCHAR(50)); INSERT INTO attorneys (id, office_location, name, years_of_experience, specialty) VALUES (1, \u0027New York\u0027, \u0027John Doe\u0027, 12, \u0027Criminal Law\u0027); INSERT INTO attorneys (id, office_location, name, years_of_experience, specialty) VALUES (2, \u0027Los Angeles\u0027, \u0027Jane Smith\u0027, 5, \u0027Family Law\u0027); CREATE TABLE cases (id INT, attorney_id INT, case_status VARCHAR(50)); INSERT INTO cases (id, attorney_id, case_status) VALUES (1, 1, \u0027Won\u0027); INSERT INTO cases (id, attorney_id, case_status) VALUES (2, 2, \u0027Lost\u0027);", + "sql": "SELECT COUNT(*) FROM cases JOIN attorneys ON cases.attorney_id \u003d attorneys.id WHERE attorneys.office_location \u003d \u0027New York\u0027;", + "sql_explanation": "This SQL query counts the number of cases won by attorneys from the New York office. It does so by joining the cases and attorneys tables on the attorney_id column, filtering for attorneys from the New York office, and then counting the number of rows using the COUNT() function." +}, { + "id": "2648", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total billing amount for cases with a specific outcome?", + "sql_context": "CREATE TABLE Cases (CaseID int, Outcome varchar(10)); INSERT INTO Cases VALUES (1, \u0027Won\u0027), (2, \u0027Lost\u0027), (3, \u0027Won\u0027); CREATE TABLE Billing (BillingID int, CaseID int, Amount decimal(10,2)); INSERT INTO Billing VALUES (1, 1, 5000.00), (2, 1, 7000.00), (3, 2, 3000.00), (4, 3, 4000.00);", + "sql": "SELECT SUM(B.Amount) as TotalBilling FROM Billing B INNER JOIN Cases C ON B.CaseID \u003d C.CaseID WHERE C.Outcome \u003d \u0027Won\u0027;", + "sql_explanation": "This SQL query performs an inner join on the Billing and Cases tables, based on the CaseID. It then calculates the total billing amount for cases with a specific outcome by using the SUM function on the Amount column in the Billing table, and filters the result based on the Outcome column in the Cases table." +}, { + "id": "2659", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum billing amount for cases in each region?", + "sql_context": "CREATE TABLE Regions (RegionID INT, Region VARCHAR(20)); INSERT INTO Regions (RegionID, Region) VALUES (1, \u0027Northeast\u0027), (2, \u0027Southeast\u0027), (3, \u0027Midwest\u0027), (4, \u0027Southwest\u0027), (5, \u0027West\u0027); CREATE TABLE Cases (CaseID INT, RegionID INT, BillingAmount DECIMAL(10,2)); INSERT INTO Cases (CaseID, RegionID, BillingAmount) VALUES (1, 1, 5000.00), (2, 2, 4000.00), (3, 3, 6000.00), (4, 1, 5500.00), (5, 2, 4500.00);", + "sql": "SELECT Region, MIN(BillingAmount) FROM Cases INNER JOIN Regions ON Cases.RegionID \u003d Regions.RegionID GROUP BY Region;", + "sql_explanation": "The SQL query calculates the minimum billing amount for cases in each region by performing an inner join between the Cases and Regions tables on the RegionID column, and then applying the MIN function to the BillingAmount column while grouping by the Region column." +}, { + "id": "2714", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get the names of attorneys with a \u0027civil\u0027 case", + "sql_context": "CREATE TABLE attorneys (id INT, name VARCHAR(50), department VARCHAR(20)); CREATE TABLE cases (id INT, attorney_id INT, case_number VARCHAR(20), case_type VARCHAR(10)); INSERT INTO attorneys (id, name, department) VALUES (1, \u0027John Doe\u0027, \u0027criminal\u0027); INSERT INTO attorneys (id, name, department) VALUES (2, \u0027Jane Smith\u0027, \u0027civil\u0027); INSERT INTO cases (id, attorney_id, case_number, case_type) VALUES (1, 1, \u002712345\u0027, \u0027criminal\u0027); INSERT INTO cases (id, attorney_id, case_number, case_type) VALUES (2, 2, \u002754321\u0027, \u0027civil\u0027);", + "sql": "SELECT attorneys.name FROM attorneys JOIN cases ON attorneys.id \u003d cases.attorney_id WHERE cases.case_type \u003d \u0027civil\u0027;", + "sql_explanation": "This SQL query gets the names of attorneys with a \u0027civil\u0027 case by using the JOIN clause to combine the attorneys and cases tables on the attorney_id column, and filtering the data using the WHERE clause for the \u0027civil\u0027 case type. The SELECT clause is used to get the name column from the attorneys table." +}, { + "id": "3100", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many cases were handled by attorneys with less than 5 years of experience?", + "sql_context": "CREATE TABLE Cases (CaseID int, AttorneyID int); INSERT INTO Cases (CaseID, AttorneyID) VALUES (1, 1), (2, 3), (3, 2), (4, 1), (5, 3), (6, 2), (7, 1); INSERT INTO Attorneys (AttorneyID, ExperienceYears) VALUES (1, 12), (2, 8), (3, 4);", + "sql": "SELECT COUNT(*) FROM Cases c JOIN Attorneys a ON c.AttorneyID \u003d a.AttorneyID WHERE a.ExperienceYears \u003c 5;", + "sql_explanation": "A join between Cases and Attorneys is performed to filter attorneys with less than 5 years of experience, and the count of cases is calculated." +}, { + "id": "3122", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all clients who have not paid their bills in full.", + "sql_context": "CREATE TABLE clients (client_id INT, name TEXT); CREATE TABLE bills (bill_id INT, client_id INT, amount DECIMAL(10, 2), paid BOOLEAN);", + "sql": "SELECT c.client_id, c.name FROM clients c JOIN bills b ON c.client_id \u003d b.client_id WHERE b.paid \u003d FALSE;", + "sql_explanation": "This SQL query joins the clients and bills tables on the client_id column, and then filters for clients who have not paid their bills in full by filtering for bills where paid is false." +}, { + "id": "246", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the conservation status and number of protected areas for marine species in the Indian Ocean.", + "sql_context": "CREATE TABLE marine_species_indian_ocean (species_name VARCHAR(255), habitat VARCHAR(255)); INSERT INTO marine_species_indian_ocean (species_name, habitat) VALUES (\u0027Whale Shark\u0027, \u0027Indian Ocean\u0027), (\u0027Dugong\u0027, \u0027Indian Ocean\u0027); CREATE TABLE conservation_efforts_indian_ocean (species_name VARCHAR(255), conservation_status VARCHAR(255), protected_areas INT); INSERT INTO conservation_efforts_indian_ocean (species_name, conservation_status, protected_areas) VALUES (\u0027Whale Shark\u0027, \u0027Vulnerable\u0027, 25), (\u0027Dugong\u0027, \u0027Endangered\u0027, 15);", + "sql": "SELECT m.species_name, c.conservation_status, COUNT(c.protected_areas) AS protected_areas_count FROM marine_species_indian_ocean m INNER JOIN conservation_efforts_indian_ocean c ON m.species_name \u003d c.species_name GROUP BY m.species_name, c.conservation_status;", + "sql_explanation": "Joining marine_species_indian_ocean and conservation_efforts_indian_ocean tables on species_name, selecting species_name, conservation_status, and the count of protected areas for marine species in the Indian Ocean." +}, { + "id": "560", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the impact of climate change on marine mammals in the Arctic region.", + "sql_context": "CREATE TABLE marine_mammals (id INT, species_name VARCHAR(255), region VARCHAR(255)); CREATE TABLE climate_change_impact (id INT, species_id INT, impact VARCHAR(255)); INSERT INTO marine_mammals (id, species_name, region) VALUES (1, \u0027Polar Bear\u0027, \u0027Arctic\u0027), (2, \u0027Narwhal\u0027, \u0027Arctic\u0027); INSERT INTO climate_change_impact (id, species_id, impact) VALUES (1, 1, \u0027Habitat loss\u0027), (2, 2, \u0027Changes in food availability\u0027);", + "sql": "SELECT marine_mammals.species_name, climate_change_impact.impact FROM marine_mammals INNER JOIN climate_change_impact ON marine_mammals.id \u003d climate_change_impact.species_id WHERE marine_mammals.region \u003d \u0027Arctic\u0027;", + "sql_explanation": "This query performs an inner join on the \u0027marine_mammals\u0027 and \u0027climate_change_impact\u0027 tables, using the \u0027id\u0027 column in the \u0027marine_mammals\u0027 table and the \u0027species_id\u0027 column in the \u0027climate_change_impact\u0027 table as the join key. It then filters the results to only include rows where the \u0027region\u0027 column in the \u0027marine_mammals\u0027 table is \u0027Arctic\u0027, and returns the \u0027species_name\u0027 and \u0027impact\u0027 columns for those rows." +}, { + "id": "839", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the climate change impacts on marine species in the Southern Ocean?", + "sql_context": "CREATE TABLE marine_species_southern_ocean (species_name VARCHAR(255), habitat VARCHAR(255)); INSERT INTO marine_species_southern_ocean (species_name, habitat) VALUES (\u0027Ross Seal\u0027, \u0027Southern Ocean\u0027), (\u0027Crabeater Seal\u0027, \u0027Southern Ocean\u0027); CREATE TABLE climate_change_impacts_southern_ocean (species_name VARCHAR(255), impacts TEXT); INSERT INTO climate_change_impacts_southern_ocean (species_name, impacts) VALUES (\u0027Ross Seal\u0027, \u0027Sea ice reduction\u0027), (\u0027Crabeater Seal\u0027, \u0027Changes in prey distribution\u0027);", + "sql": "SELECT m.species_name, c.impacts FROM marine_species_southern_ocean m INNER JOIN climate_change_impacts_southern_ocean c ON m.species_name \u003d c.species_name WHERE m.habitat \u003d \u0027Southern Ocean\u0027;", + "sql_explanation": "Joining marine_species_southern_ocean and climate_change_impacts_southern_ocean tables on species_name, filtering for Southern Ocean habitats, and selecting species_name and impacts." +}, { + "id": "1233", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many research projects were conducted for each marine species in 2022?", + "sql_context": "CREATE TABLE marine_species (id INT, species VARCHAR(255), population INT, habitat VARCHAR(255));CREATE TABLE research_projects (id INT, researcher VARCHAR(255), species VARCHAR(255), project_name VARCHAR(255), start_date DATE, end_date DATE);CREATE VIEW species_by_habitat AS SELECT habitat, species FROM marine_species;", + "sql": "SELECT species, COUNT(*) FROM research_projects JOIN marine_species ON research_projects.species \u003d marine_species.species WHERE YEAR(start_date) \u003d 2022 GROUP BY species;", + "sql_explanation": "This SQL query counts the number of research projects for each marine species in 2022 by joining the research_projects table with the marine_species table on the species column, filtering it by the year 2022, and grouping it by the species column." +}, { + "id": "1469", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the change in ocean temperatures for each year compared to avg.", + "sql_context": "CREATE TABLE temperature (year INT, avg_temp FLOAT, avg_ocean_temp FLOAT);", + "sql": "SELECT t1.year, t1.avg_temp - t2.avg_ocean_temp AS temp_change FROM temperature t1 INNER JOIN temperature t2 ON t1.year \u003d t2.year WHERE t2.year \u003d t1.year - 1;", + "sql_explanation": "This query performs an inner join on the temperature table to compare each year\u0027s average temperature to the overall average ocean temperature in the previous year." +}, { + "id": "1750", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the common diseases among different marine species in the Pacific Ocean?", + "sql_context": "CREATE TABLE marine_species (id INT, name VARCHAR(50), region VARCHAR(50), common_disease VARCHAR(50)); INSERT INTO marine_species (id, name, region, common_disease) VALUES (1, \u0027Clownfish\u0027, \u0027Pacific Ocean\u0027, \u0027Skin Disease\u0027); CREATE TABLE diseases (id INT, name VARCHAR(50));", + "sql": "SELECT marine_species.name, marine_species.common_disease FROM marine_species INNER JOIN diseases ON marine_species.common_disease \u003d diseases.name;", + "sql_explanation": "The SQL query joins the marine_species and diseases tables on the common_disease column, retrieving the names of marine species and their common diseases in the Pacific Ocean." +}, { + "id": "2872", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum recorded depth for a marine species habitat?", + "sql_context": "CREATE TABLE marine_species (id INT, name VARCHAR(255), habitat_type VARCHAR(255), average_depth FLOAT); INSERT INTO marine_species (id, name, habitat_type, average_depth) VALUES (1, \u0027Clownfish\u0027, \u0027Coral Reef\u0027, 20.0); INSERT INTO marine_species (id, name, habitat_type, average_depth) VALUES (2, \u0027Blue Whale\u0027, \u0027Open Ocean\u0027, 200.0); CREATE TABLE ocean_depths (location VARCHAR(255), depth FLOAT); INSERT INTO ocean_depths (location, depth) VALUES (\u0027Mariana Trench\u0027, 10994.0); INSERT INTO ocean_depths (location, depth) VALUES (\u0027Sierra Leone Rise\u0027, 5791.0);", + "sql": "SELECT MAX(od.depth) as max_depth FROM marine_species ms JOIN ocean_depths od ON ms.habitat_type \u003d od.location;", + "sql_explanation": "This SQL query retrieves the maximum recorded depth for a marine species habitat. It does so by joining the marine_species and ocean_depths tables on the habitat_type and location columns, respectively. The query then uses the MAX() function to calculate the maximum depth across all records in the joined tables." +}, { + "id": "341", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue for concerts in Canada for artists who identify as non-binary and are from Asia in 2023?", + "sql_context": "CREATE TABLE concert_events (event_id INT, artist_id INT, event_date DATE, event_location VARCHAR(255), attendance INT, revenue DECIMAL(10,2), country VARCHAR(50)); INSERT INTO concert_events (event_id, artist_id, event_date, event_location, attendance, revenue, country) VALUES (1, 1, \u00272023-01-01\u0027, \u0027NYC\u0027, 15000, 500000.00, \u0027Canada\u0027); CREATE TABLE artist_demographics (artist_id INT, artist_name VARCHAR(255), gender VARCHAR(50), ethnicity VARCHAR(50), country VARCHAR(50)); INSERT INTO artist_demographics (artist_id, artist_name, gender, ethnicity, country) VALUES (1, \u0027Li Zhang\u0027, \u0027non-binary\u0027, \u0027Asian\u0027, \u0027Canada\u0027);", + "sql": "SELECT SUM(revenue) FROM concert_events ce JOIN artist_demographics ad ON ce.artist_id \u003d ad.artist_id WHERE ce.country \u003d \u0027Canada\u0027 AND ad.gender \u003d \u0027non-binary\u0027 AND ad.ethnicity \u003d \u0027Asian\u0027 AND ce.event_date BETWEEN \u00272023-01-01\u0027 AND \u00272023-12-31\u0027;", + "sql_explanation": "The query calculates the total revenue for concerts in Canada for artists who identify as non-binary and are from Asia in 2023. It joins the concert_events and artist_demographics tables on artist_id, filters for concerts in Canada, artists who identify as non-binary and are from Asia, and concerts in 2023, and then sums the revenue for those concerts." +}, { + "id": "483", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify users who streamed the same song on consecutive days in \u0027music_streaming\u0027 table?", + "sql_context": "CREATE TABLE music_streaming (user_id INT, song_id INT, duration FLOAT, date DATE);", + "sql": "SELECT a.user_id, a.song_id, a.date as first_date, b.date as second_date FROM music_streaming a INNER JOIN music_streaming b ON a.user_id \u003d b.user_id AND a.song_id \u003d b.song_id AND b.date \u003d DATE_ADD(a.date, INTERVAL 1 DAY);", + "sql_explanation": "Find users who have streamed the same song on consecutive days by performing a self-join operation on the \u0027music_streaming\u0027 table and filtering records where the user_id, song_id, and date difference are equal." +}, { + "id": "519", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of concerts for artists who identify as female and are from South America in 2025?", + "sql_context": "CREATE TABLE concert_events (event_id INT, artist_id INT, event_date DATE, event_location VARCHAR(255), attendance INT, revenue DECIMAL(10,2), country VARCHAR(50)); INSERT INTO concert_events (event_id, artist_id, event_date, event_location, attendance, revenue, country) VALUES (1, 1, \u00272025-01-01\u0027, \u0027NYC\u0027, 15000, 500000.00, \u0027USA\u0027); CREATE TABLE artist_demographics (artist_id INT, artist_name VARCHAR(255), gender VARCHAR(50), ethnicity VARCHAR(50), country VARCHAR(50)); INSERT INTO artist_demographics (artist_id, artist_name, gender, ethnicity, country) VALUES (1, \u0027Maria Rodriguez\u0027, \u0027female\u0027, \u0027South American\u0027, \u0027BRA\u0027);", + "sql": "SELECT COUNT(*) FROM concert_events ce JOIN artist_demographics ad ON ce.artist_id \u003d ad.artist_id WHERE ad.gender \u003d \u0027female\u0027 AND ad.ethnicity \u003d \u0027South American\u0027 AND ce.event_date BETWEEN \u00272025-01-01\u0027 AND \u00272025-12-31\u0027;", + "sql_explanation": "The query calculates the number of concerts for artists who identify as female and are from South America in 2025. It joins the concert_events and artist_demographics tables on artist_id, filters for artists who identify as female and are from South America, and concerts in 2025, and then counts the number of concerts for those artists." +}, { + "id": "1054", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Number of concerts per month for artist \u0027Ariana Grande\u0027", + "sql_context": "CREATE TABLE artists (id INT, name TEXT); INSERT INTO artists (id, name) VALUES (1, \u0027Ariana Grande\u0027);", + "sql": "SELECT MONTH(concert_date) AS month, COUNT(*) AS concerts_count FROM concerts JOIN artists ON concerts.artist_id \u003d artists.id WHERE artists.name \u003d \u0027Ariana Grande\u0027 GROUP BY month;", + "sql_explanation": "This query joins the \u0027concerts\u0027 and \u0027artists\u0027 tables on the artist_id field, filters for concerts by Ariana Grande, groups the results by concert month, and counts the number of concerts for each month." +}, { + "id": "1268", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of unique users who have streamed songs from artists who have released a new album in the last year?", + "sql_context": "CREATE TABLE user_streams (user_id INT, artist_id INT, stream_date DATE); CREATE TABLE artist_albums (artist_id INT, release_date DATE);", + "sql": "SELECT COUNT(DISTINCT u.user_id) FROM user_streams u JOIN artist_albums a ON u.artist_id \u003d a.artist_id WHERE a.release_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR);", + "sql_explanation": "The SQL query joins the user_streams and artist_albums tables on the artist_id column, filters the records based on the release_date column, and then calculates the number of unique users who have streamed songs from artists who have released a new album in the last year." +}, { + "id": "1928", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of songs streamed per user for a specific genre in a given year?", + "sql_context": "CREATE TABLE Artists (id INT, name VARCHAR(100), genre VARCHAR(50)); CREATE TABLE Users (id INT, name VARCHAR(100)); CREATE TABLE Streams (id INT, user_id INT, artist_id INT, songs_streamed INT, year INT);", + "sql": "SELECT genre, AVG(songs_streamed) AS avg_songs_per_user FROM Streams s JOIN Artists a ON s.artist_id \u003d a.id WHERE year \u003d 2021 GROUP BY genre;", + "sql_explanation": "We calculate the average number of songs streamed per user for a specific genre in a given year by joining the Streams and Artists tables on the artist_id column, filtering for the desired year, and then grouping by the genre column and calculating the average songs per user using the AVG function." +}, { + "id": "2114", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of artists who performed at festivals in 2020 and 2021?", + "sql_context": "CREATE TABLE artists (id INT, name VARCHAR(255), age INT), festivals (id INT, artist_id INT, year INT); INSERT INTO artists (id, name, age) VALUES (1, \u0027ArtistA\u0027, 30), (2, \u0027ArtistB\u0027, 35), (3, \u0027ArtistC\u0027, 28), (4, \u0027ArtistD\u0027, 40); INSERT INTO festivals (id, artist_id, year) VALUES (1, 1, 2020), (2, 2, 2020), (3, 3, 2021), (4, 4, 2021);", + "sql": "SELECT AVG(age) AS avg_age FROM artists JOIN festivals ON artists.id \u003d festivals.artist_id WHERE festivals.year BETWEEN 2020 AND 2021;", + "sql_explanation": "This query joins the artists and festivals tables on the artist id and filters the data for festivals in 2020 and 2021. It then calculates the average age of the artists who performed at festivals in those years." +}, { + "id": "2414", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue generated from concert ticket sales for artists from Australia?", + "sql_context": "CREATE TABLE artists (artist_id INT, artist_name VARCHAR(255), country VARCHAR(255)); CREATE TABLE concerts (concert_id INT, artist_id INT, concert_name VARCHAR(255), country VARCHAR(255), revenue INT); INSERT INTO artists (artist_id, artist_name, country) VALUES (101, \u0027Taylor Swift\u0027, \u0027USA\u0027), (102, \u0027BTS\u0027, \u0027South Korea\u0027), (103, \u0027Olivia Newton-John\u0027, \u0027Australia\u0027); INSERT INTO concerts (concert_id, artist_id, concert_name, country, revenue) VALUES (1, 101, \u0027Pop Music Festival\u0027, \u0027UK\u0027, 500000), (2, 102, \u0027K-Pop Music Festival\u0027, \u0027Japan\u0027, 700000), (3, 103, \u0027Country Music Festival\u0027, \u0027Australia\u0027, 600000), (4, 101, \u0027Rock Music Festival\u0027, \u0027USA\u0027, 800000);", + "sql": "SELECT SUM(revenue) FROM concerts JOIN artists ON concerts.artist_id \u003d artists.artist_id WHERE artists.country \u003d \u0027Australia\u0027;", + "sql_explanation": "This query uses a JOIN to combine the \u0027artists\u0027 and \u0027concerts\u0027 tables based on the \u0027artist_id\u0027 column, then filters the results to only include rows where the \u0027country\u0027 column in the \u0027artists\u0027 table is \u0027Australia\u0027. After that, it calculates the total revenue for all concerts by summing the \u0027revenue\u0027 column." +}, { + "id": "304", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of containers shipped from the US to Canada by sea in 2020?", + "sql_context": "CREATE TABLE shipping_routes (id INT PRIMARY KEY, origin_country VARCHAR(50), destination_country VARCHAR(50), year INT); INSERT INTO shipping_routes (id, origin_country, destination_country, year) VALUES (1, \u0027USA\u0027, \u0027Canada\u0027, 2020); CREATE TABLE cargo (id INT PRIMARY KEY, container_count INT, shipping_route_id INT, FOREIGN KEY (shipping_route_id) REFERENCES shipping_routes(id));", + "sql": "SELECT SUM(cargo.container_count) FROM cargo INNER JOIN shipping_routes ON cargo.shipping_route_id \u003d shipping_routes.id WHERE shipping_routes.origin_country \u003d \u0027USA\u0027 AND shipping_routes.destination_country \u003d \u0027Canada\u0027 AND shipping_routes.year \u003d 2020;", + "sql_explanation": "This query calculates the total number of containers shipped from the US to Canada by sea in 2020. It does so by joining the \u0027cargo\u0027 table with the \u0027shipping_routes\u0027 table based on the \u0027shipping_route_id\u0027 and filtering the results by \u0027origin_country\u0027, \u0027destination_country\u0027 and \u0027year\u0027. Finally, it sums up the container counts." +}, { + "id": "390", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average weight of containers shipped from the Port of Kolkata to India in 2019?", + "sql_context": "CREATE TABLE ports (port_id INT, port_name TEXT, country TEXT);CREATE TABLE shipments (shipment_id INT, shipment_weight INT, ship_date DATE, port_id INT); INSERT INTO ports VALUES (1, \u0027Port of Kolkata\u0027, \u0027India\u0027), (2, \u0027Port of Chennai\u0027, \u0027India\u0027); INSERT INTO shipments VALUES (1, 2000, \u00272019-01-01\u0027, 1), (2, 1500, \u00272019-02-15\u0027, 2);", + "sql": "SELECT AVG(shipment_weight) FROM shipments JOIN ports ON shipments.port_id \u003d ports.port_id WHERE ports.country \u003d \u0027India\u0027 AND ports.port_name IN (\u0027Port of Kolkata\u0027, \u0027Port of Chennai\u0027) AND ship_date BETWEEN \u00272019-01-01\u0027 AND \u00272019-12-31\u0027;", + "sql_explanation": "This SQL query calculates the average weight of containers shipped from the Port of Kolkata to India by joining the shipments and ports tables on the port_id column. It then filters the records based on the conditions specified in the WHERE clause and finally calculates the average weight of the shipments using the AVG function." +}, { + "id": "402", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the number of vessels that docked in the Port of Rotterdam in Q3 2019 and had a maintenance date in Q4 2019.", + "sql_context": "CREATE TABLE vessels (vessel_id INT, vessel_name TEXT, last_maintenance_date DATE); INSERT INTO vessels VALUES (1, \u0027Vessel A\u0027, \u00272019-10-15\u0027), (2, \u0027Vessel B\u0027, \u00272019-12-10\u0027), (3, \u0027Vessel C\u0027, \u00272019-11-25\u0027), (5, \u0027Vessel E\u0027, \u00272019-08-10\u0027); CREATE TABLE port_visits (visit_id INT, vessel_id INT, port_id INT, visit_date DATE); INSERT INTO port_visits VALUES (1, 1, 6, \u00272019-07-01\u0027), (2, 2, 6, \u00272019-08-01\u0027), (3, 3, 6, \u00272019-09-01\u0027), (4, 5, 6, \u00272019-10-01\u0027); CREATE TABLE ports (port_id INT, port_name TEXT, country TEXT); INSERT INTO ports VALUES (6, \u0027Port of Rotterdam\u0027, \u0027Netherlands\u0027);", + "sql": "SELECT COUNT(DISTINCT vessels.vessel_id) FROM vessels JOIN port_visits ON vessels.vessel_id \u003d port_visits.vessel_id WHERE port_visits.port_id \u003d 6 AND QUARTER(port_visits.visit_date) \u003d 3 AND QUARTER(vessels.last_maintenance_date) \u003d 4;", + "sql_explanation": "This SQL query lists the number of vessels that docked in the Port of Rotterdam in Q3 2019 and had a maintenance date in Q4 2019. It first joins the \u0027vessels\u0027 table with the \u0027port_visits\u0027 table based on the vessel_id. It then filters the records based on the quarter of visit_date and last_maintenance_date, and port_id. Finally, it counts the number of distinct vessel_id that meet the conditions." +}, { + "id": "738", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of cargo incidents recorded for vessels owned by companies from Asia, grouped by the year in which they occurred?", + "sql_context": "CREATE TABLE companies (company_id INT, company_name TEXT, country TEXT); INSERT INTO companies VALUES (1, \u0027Oceanic Maritime Enterprise\u0027, \u0027Singapore\u0027), (2, \u0027Asian Shipping Lines\u0027, \u0027China\u0027), (3, \u0027Pacific Ocean Navigation\u0027, \u0027Japan\u0027); CREATE TABLE incidents (incident_id INT, company_id INT, incident_type TEXT, incident_date DATE); INSERT INTO incidents VALUES (1, 1, \u0027Collision\u0027, \u00272021-10-15\u0027), (2, 1, \u0027Mechanical Failure\u0027, \u00272022-03-04\u0027), (3, 2, \u0027Grounding\u0027, \u00272021-06-21\u0027), (4, 3, \u0027Fire\u0027, \u00272022-08-10\u0027);", + "sql": "SELECT COUNT(incidents.incident_id) FROM incidents JOIN companies ON incidents.company_id \u003d companies.company_id WHERE companies.country \u003d \u0027Asia\u0027 GROUP BY EXTRACT(YEAR FROM incidents.incident_date);", + "sql_explanation": "This query calculates the total number of cargo incidents recorded for vessels owned by companies from Asia, grouped by the year in which they occurred. It does this by joining the incidents and companies tables on the company_id column, filtering for rows where the country is \u0027Asia\u0027, and then grouping the results by the year of the incident_date and calculating the count of the incident_id column." +}, { + "id": "1008", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total cargo weight handled by each port, including their corresponding cargo type?", + "sql_context": "CREATE TABLE ports (port_id INT, port_name VARCHAR(100)); CREATE TABLE cargo (cargo_id INT, cargo_type VARCHAR(50), port_id INT); INSERT INTO ports VALUES (1, \u0027Port of Los Angeles\u0027); INSERT INTO ports VALUES (2, \u0027Port of Long Beach\u0027); INSERT INTO cargo VALUES (1, \u0027Container\u0027, 1); INSERT INTO cargo VALUES (2, \u0027Bulk\u0027, 2);", + "sql": "SELECT ports.port_name, cargo.cargo_type, SUM(cargo.weight) as total_weight FROM cargo INNER JOIN ports ON cargo.port_id \u003d ports.port_id GROUP BY ports.port_name, cargo.cargo_type;", + "sql_explanation": "This query performs an inner join between the cargo and ports tables, using the port_id column. It then groups the results by port_name and cargo_type, and calculates the sum of the weight column for each group." +}, { + "id": "1031", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total tonnage of cargo handled by vessels with the \u0027Bulk Carrier\u0027 type in the Pacific region?", + "sql_context": "CREATE TABLE vessels(id INT, name VARCHAR(50), type VARCHAR(50)); CREATE TABLE cargo_handling(vessel_id INT, cargo_type VARCHAR(50), tonnage INT, handling_date DATE, port_region VARCHAR(50)); INSERT INTO vessels VALUES (1, \u0027Vessel1\u0027, \u0027Bulk Carrier\u0027); INSERT INTO cargo_handling VALUES (1, \u0027Iron Ore\u0027, 12000, \u00272022-02-10\u0027, \u0027Pacific\u0027);", + "sql": "SELECT SUM(cargo_handling.tonnage) FROM cargo_handling INNER JOIN vessels ON cargo_handling.vessel_id \u003d vessels.id WHERE vessels.type \u003d \u0027Bulk Carrier\u0027 AND port_region \u003d \u0027Pacific\u0027;", + "sql_explanation": "This query calculates the total tonnage of cargo handled by vessels with the \u0027Bulk Carrier\u0027 type in the Pacific region by joining the vessels and cargo_handling tables and filtering on the vessel type and port region." +}, { + "id": "1140", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the average age (in years) of all vessels in each fleet, excluding fleets with no vessels, and show the results for the top 2 fleets with the oldest average vessel age.", + "sql_context": "CREATE TABLE fleets(fleet_id INT, company TEXT, launch_year INT);CREATE TABLE vessels(vessel_id INT, fleet_id INT, name TEXT, launch_year INT);INSERT INTO fleets VALUES (1,\u0027Company A\u0027,2000),(2,\u0027Company B\u0027,2010),(3,\u0027Company C\u0027,2005);INSERT INTO vessels VALUES (1,1,\u0027Vessel 1\u0027,2001),(2,1,\u0027Vessel 2\u0027,2002),(3,2,\u0027Vessel 3\u0027,2012),(4,3,\u0027Vessel 4\u0027,2006),(5,3,\u0027Vessel 5\u0027,2007);", + "sql": "SELECT f.company, AVG(v.launch_year) as avg_age FROM fleets f JOIN vessels v ON f.fleet_id \u003d v.fleet_id GROUP BY f.fleet_id HAVING avg_age \u003e 0 ORDER BY avg_age DESC LIMIT 2;", + "sql_explanation": "This query performs an inner join between the fleets and vessels tables on the fleet_id column, which combines all fleets with their corresponding vessels. The AVG function is then used to determine the average launch year for each fleet, which represents the average age of the vessels in each fleet. The HAVING clause filters out fleets with no vessels (i.e., with an average age of 0). Finally, the ORDER BY and LIMIT clauses sort the results and display the top 2 fleets with the oldest average vessel age." +}, { + "id": "1154", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total cargo weight handled by port \u0027A\u0027 in Q1 2022?", + "sql_context": "CREATE TABLE port (port_id VARCHAR(10), port_name VARCHAR(20)); INSERT INTO port VALUES (\u0027P1\u0027, \u0027A\u0027), (\u0027P2\u0027, \u0027B\u0027); CREATE TABLE handling (handling_id INT, port_id VARCHAR(10), cargo_weight INT, handling_date DATE); INSERT INTO handling VALUES (1, \u0027P1\u0027, 5000, \u00272022-01-01\u0027), (2, \u0027P1\u0027, 6000, \u00272022-02-01\u0027), (3, \u0027P1\u0027, 7000, \u00272022-03-01\u0027), (4, \u0027P2\u0027, 8000, \u00272022-01-01\u0027);", + "sql": "SELECT SUM(cargo_weight) FROM handling INNER JOIN port ON handling.port_id \u003d port.port_id WHERE port.port_name \u003d \u0027A\u0027 AND handling_date BETWEEN \u00272022-01-01\u0027 AND \u00272022-03-31\u0027;", + "sql_explanation": "The SQL query performs an inner join between the \u0027handling\u0027 and \u0027port\u0027 tables using the \u0027port_id\u0027 column. It then filters the data based on the port name \u0027A\u0027 and the date range for Q1 2022. Finally, it calculates the total cargo weight using the SUM function." +}, { + "id": "1304", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all regulatory violations for vessels in the \u0027Oceanic\u0027 fleet.", + "sql_context": "CREATE TABLE vessels (vessel_id INT, fleet VARCHAR(50), CO2_emission_level FLOAT); CREATE TABLE violations (violation_id INT, vessel_id INT, violation_description VARCHAR(255), violation_date DATE);", + "sql": "SELECT v.vessel_name, v.fleet, vv.violation_description, vv.violation_date FROM vessels v JOIN violations vv ON v.vessel_id \u003d vv.vessel_id WHERE v.fleet \u003d \u0027Oceanic\u0027;", + "sql_explanation": "The SQL query joins the \u0027vessels\u0027 and \u0027violations\u0027 tables and filters the results based on the fleet. It then returns all columns from both tables for the resulting records." +}, { + "id": "1354", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total container weight handled in \u0027January\u0027 2022?", + "sql_context": "CREATE TABLE time_table (time_id DATE); INSERT INTO time_table VALUES (\u00272022-01-01\u0027), (\u00272022-01-02\u0027), (\u00272022-01-03\u0027); CREATE TABLE cargo (cargo_id INT, time_id DATE, container_weight FLOAT); INSERT INTO cargo VALUES (1, \u00272022-01-01\u0027, 2000.5), (2, \u00272022-01-02\u0027, 3000.2), (3, \u00272022-01-03\u0027, 1500.3);", + "sql": "SELECT SUM(container_weight) FROM cargo INNER JOIN time_table ON cargo.time_id \u003d time_table.time_id WHERE time_table.time_id BETWEEN \u00272022-01-01\u0027 AND \u00272022-01-31\u0027;", + "sql_explanation": "The SQL query calculates the total container weight for January 2022 by joining the cargo and time_table tables on the time_id column and summing the container_weight column for records between \u00272022-01-01\u0027 and \u00272022-01-31\u0027." +}, { + "id": "1472", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of containers loaded on vessels in the port of Oakland for the year 2020?", + "sql_context": "CREATE TABLE port (port_id INT, port_name VARCHAR(50)); INSERT INTO port (port_id, port_name) VALUES (1, \u0027Oakland\u0027), (2, \u0027Seattle\u0027); CREATE TABLE vessels (vessel_id INT, port_id INT, quantity_containers INT); INSERT INTO vessels (vessel_id, port_id, quantity_containers) VALUES (101, 1, 500), (102, 1, 700), (103, 2, 800);", + "sql": "SELECT SUM(quantity_containers) FROM vessels JOIN port ON vessels.port_id \u003d port.port_id WHERE port.port_name \u003d \u0027Oakland\u0027 AND YEAR(vessels.vessel_id) \u003d 2020;", + "sql_explanation": "The SQL query calculates the total quantity of containers loaded on vessels in the port of Oakland for the year 2020 by joining the vessels and port tables on the port_id column. It then filters the records for the port name \u0027Oakland\u0027 and the year 2020. Finally, it calculates the sum of the quantity_containers column to get the total quantity." +}, { + "id": "1517", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the total cargo handling volume and number of handling operations for each type of vessel in the \u0027shipping\u0027 and \u0027port_operations\u0027 schemas.", + "sql_context": "CREATE TABLE shipping.vessels (id INT, name VARCHAR(50), type VARCHAR(50), capacity INT); CREATE TABLE port_operations.cargo_handling (id INT, port_id INT, volume INT, vessel_type VARCHAR(50));", + "sql": "SELECT s.type, COUNT(co.id), SUM(co.volume) FROM shipping.vessels s INNER JOIN port_operations.cargo_handling co ON s.type \u003d co.vessel_type GROUP BY s.type;", + "sql_explanation": "This SQL query lists the total cargo handling volume and number of handling operations for each type of vessel by joining the \u0027vessels\u0027 and \u0027cargo_handling\u0027 tables on the \u0027type\u0027 column and grouping the result by the type of vessels. The number of handling operations is calculated by counting the number of rows in the \u0027cargo_handling\u0027 table and the total cargo handling volume is calculated by summing the \u0027volume\u0027 column in the \u0027cargo_handling\u0027 table." +}, { + "id": "1940", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all ports where the company Seaspeed has at least one cargo operation.", + "sql_context": "CREATE TABLE ports (id INT, name VARCHAR(50), country VARCHAR(50)); CREATE TABLE cargo_operations (id INT, port_id INT, company VARCHAR(50), type VARCHAR(50)); INSERT INTO ports (id, name, country) VALUES (1, \u0027Port of Oakland\u0027, \u0027USA\u0027), (2, \u0027Port of Singapore\u0027, \u0027Singapore\u0027); INSERT INTO cargo_operations (id, port_id, company, type) VALUES (1, 1, \u0027Seaspeed\u0027, \u0027import\u0027), (2, 1, \u0027Seaspeed\u0027, \u0027export\u0027), (3, 2, \u0027Windstar Shipping\u0027, \u0027import\u0027);", + "sql": "SELECT ports.name FROM ports INNER JOIN cargo_operations ON ports.id \u003d cargo_operations.port_id WHERE cargo_operations.company \u003d \u0027Seaspeed\u0027;", + "sql_explanation": "This SQL query lists all ports where the company Seaspeed has at least one cargo operation. It does so by performing an inner join between ports and cargo_operations on the port_id column, then filtering the results to only show rows where the company is \u0027Seaspeed\u0027." +}, { + "id": "2163", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total cargo weight handled by each port in Spain?", + "sql_context": "CREATE TABLE ports (id INT, name VARCHAR(50), country VARCHAR(50)); INSERT INTO ports (id, name, country) VALUES (1, \u0027Algeciras\u0027, \u0027Spain\u0027), (2, \u0027Valencia\u0027, \u0027Spain\u0027), (3, \u0027Barcelona\u0027, \u0027Spain\u0027); CREATE TABLE cargo (id INT, port_id INT, weight INT); INSERT INTO cargo (id, port_id, weight) VALUES (1, 1, 1000), (2, 1, 2000), (3, 2, 1500), (4, 3, 2500);", + "sql": "SELECT p.name, SUM(c.weight) as total_weight FROM ports p JOIN cargo c ON p.id \u003d c.port_id WHERE p.country \u003d \u0027Spain\u0027 GROUP BY p.name;", + "sql_explanation": "The SQL query joins the ports and cargo tables on the port_id column. It then filters for ports in Spain and calculates the total weight for each port using the SUM function and GROUP BY clause." +}, { + "id": "2324", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the total number of cargo items handled at each port, including ports that have not handled any cargo. Display the port name and its corresponding total cargo count, even if the count is zero.", + "sql_context": "CREATE TABLE ports(port_id INT, port_name TEXT);CREATE TABLE cargo(cargo_id INT, port_id INT);INSERT INTO ports VALUES (1,\u0027Port A\u0027),(2,\u0027Port B\u0027),(3,\u0027Port C\u0027),(4,\u0027Port D\u0027),(5,\u0027Port E\u0027);INSERT INTO cargo VALUES (1,1),(2,1),(3,2),(4,3),(5,5);", + "sql": "SELECT p.port_name, COUNT(c.cargo_id) as total_cargo FROM ports p LEFT JOIN cargo c ON p.port_id \u003d c.port_id GROUP BY p.port_id;", + "sql_explanation": "This query performs a left join between the ports and cargo tables on the port_id column, which combines all ports with their corresponding cargo information. The COUNT function is then used with the cargo_id column to determine the total number of cargo items for each port. Finally, the GROUP BY clause groups the results by port." +}, { + "id": "2354", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the names of ports where the Kota Pertama has docked.", + "sql_context": "CREATE TABLE Ports (PortID INT, PortName VARCHAR(100), City VARCHAR(100), Country VARCHAR(100)); INSERT INTO Ports (PortID, PortName, City, Country) VALUES (1, \u0027Port of Singapore\u0027, \u0027Singapore\u0027, \u0027Singapore\u0027); INSERT INTO Ports (PortID, PortName, City, Country) VALUES (2, \u0027Port of Rotterdam\u0027, \u0027Rotterdam\u0027, \u0027Netherlands\u0027); CREATE TABLE Vessels (VesselID INT, VesselName VARCHAR(100), VesselType VARCHAR(100), PortID INT); INSERT INTO Vessels (VesselID, VesselName, VesselType, PortID) VALUES (1, \u0027Kota Pertama\u0027, \u0027Container Ship\u0027, 1);", + "sql": "SELECT Ports.PortName FROM Ports INNER JOIN Vessels ON Ports.PortID \u003d Vessels.PortID WHERE Vessels.VesselName \u003d \u0027Kota Pertama\u0027;", + "sql_explanation": "This query performs an INNER JOIN between the Ports and Vessels tables on the PortID column, then filters the results for rows where the VesselName is \u0027Kota Pertama\u0027. Finally, it selects the PortName column from the filtered results." +}, { + "id": "319", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many electric vehicles were sold in each city in 2020?", + "sql_context": "CREATE TABLE cities (id INT, name VARCHAR(50), population INT); INSERT INTO cities (id, name, population) VALUES (1, \u0027San Francisco\u0027, 874000); INSERT INTO cities (id, name, population) VALUES (2, \u0027New York\u0027, 8601000); CREATE TABLE electric_vehicles (id INT, city_id INT, make VARCHAR(50), model VARCHAR(50), year INT, sales INT); INSERT INTO electric_vehicles (id, city_id, make, model, year, sales) VALUES (1, 1, \u0027Tesla\u0027, \u0027Model S\u0027, 2020, 5000); INSERT INTO electric_vehicles (id, city_id, make, model, year, sales) VALUES (2, 1, \u0027Tesla\u0027, \u0027Model 3\u0027, 2020, 8000); INSERT INTO electric_vehicles (id, city_id, make, model, year, sales) VALUES (3, 2, \u0027Chevrolet\u0027, \u0027Bolt\u0027, 2020, 3000);", + "sql": "SELECT cities.name, electric_vehicles.year, SUM(electric_vehicles.sales) as total_sales FROM cities JOIN electric_vehicles ON cities.id \u003d electric_vehicles.city_id GROUP BY cities.name, electric_vehicles.year HAVING electric_vehicles.year \u003d 2020;", + "sql_explanation": "The SQL query selects the name column from the cities table, and the year and sales columns from the electric_vehicles table. It joins the two tables on the city_id column, and groups the results by the name column and the year column. It then filters the results to only include rows where the year is 2020, and calculates the sum of the sales column for each group." +}, { + "id": "836", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of electric vehicle charging stations in the top 10 most populous cities in the US.", + "sql_context": "CREATE TABLE cities (city_name TEXT, population INT);CREATE TABLE charging_stations (station_id INT, station_name TEXT, city_name TEXT, num_charging_points INT);", + "sql": "SELECT c.city_name, COUNT(cs.station_id) AS num_charging_stations FROM cities c JOIN charging_stations cs ON c.city_name \u003d cs.city_name GROUP BY c.city_name ORDER BY population DESC LIMIT 10;", + "sql_explanation": "This query shows the number of electric vehicle charging stations in the top 10 most populous cities in the US by joining the cities and charging_stations tables on the city_name column. It groups the rows by city_name using the GROUP BY clause and orders them by population in descending order using the ORDER BY clause. The LIMIT clause restricts the results to the top 10 cities. The final SELECT clause counts the number of charging stations in each city." +}, { + "id": "1563", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average distance traveled by electric vehicles in rural areas?", + "sql_context": "CREATE TABLE ElectricVehicleTransportation (EVID INT, Mode VARCHAR(50), Distance DECIMAL(5,2)); CREATE TABLE Areas (EVID INT, Area VARCHAR(50));", + "sql": "SELECT AVG(Distance) AS AvgDistance FROM ElectricVehicleTransportation JOIN Areas ON ElectricVehicleTransportation.EVID \u003d Areas.EVID WHERE Area \u003d \u0027Rural\u0027;", + "sql_explanation": "This query calculates the average distance traveled by electric vehicles in rural areas. It does this by joining the \u0027ElectricVehicleTransportation\u0027 and \u0027Areas\u0027 tables on the \u0027EVID\u0027 column, then filtering the joined table to only include rows where the \u0027Area\u0027 column is \u0027Rural\u0027 and calculating the average using the AVG function." +}, { + "id": "1710", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many electric vehicles are owned by women in each state?", + "sql_context": "CREATE TABLE states (id INT, name VARCHAR(50)); CREATE TABLE electric_vehicles (id INT, state_id INT, owner_gender VARCHAR(10), vehicle_count INT); INSERT INTO states (id, name) VALUES (1, \u0027California\u0027), (2, \u0027Texas\u0027), (3, \u0027Florida\u0027); INSERT INTO electric_vehicles (id, state_id, owner_gender, vehicle_count) VALUES (1, 1, \u0027Female\u0027, 30000), (2, 2, \u0027Male\u0027, 40000), (3, 3, \u0027Female\u0027, 20000);", + "sql": "SELECT s.name, SUM(ev.vehicle_count) FROM states s JOIN electric_vehicles ev ON s.id \u003d ev.state_id WHERE ev.owner_gender \u003d \u0027Female\u0027 GROUP BY s.name;", + "sql_explanation": "This query joins the states and electric_vehicles tables based on their IDs and groups the results by state. It then calculates the total number of electric vehicles owned by women in each state." +}, { + "id": "147", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the change in temperature for each Arctic station between 2020 and 2021?", + "sql_context": "CREATE TABLE arctic_weather (id INT, station_id INT, time TIMESTAMP, temperature DECIMAL(5,2)); INSERT INTO arctic_weather (id, station_id, time, temperature) VALUES (1, 1, \u00272020-01-01 00:00:00\u0027, 1.2), (2, 1, \u00272021-01-01 00:00:00\u0027, 1.5), (3, 2, \u00272020-01-01 00:00:00\u0027, -4.8), (4, 2, \u00272021-01-01 00:00:00\u0027, -4.5);", + "sql": "SELECT station_id, name, (MAX(CASE WHEN YEAR(time) \u003d 2021 THEN temperature ELSE NULL END) - MAX(CASE WHEN YEAR(time) \u003d 2020 THEN temperature ELSE NULL END)) as temp_diff FROM arctic_weather JOIN arctic_stations ON arctic_weather.station_id \u003d arctic_stations.id GROUP BY station_id, name", + "sql_explanation": "Calculate the temperature change (temp_diff) between 2020 and 2021 for each Arctic station (station_id, name) using window functions." +}, { + "id": "524", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of community projects managed by indigenous communities in the \u0027community_projects\u0027 table, and how many of those projects are still ongoing?", + "sql_context": "CREATE TABLE community_projects (project_id INT, project_name TEXT, status TEXT, managed_by TEXT); CREATE TABLE indigenous_communities (community_id INT, community_name TEXT);", + "sql": "SELECT COUNT(cp.project_id) as total_projects, SUM(CASE WHEN cp.status \u003d \u0027ongoing\u0027 THEN 1 ELSE 0 END) as ongoing_projects FROM community_projects cp JOIN indigenous_communities ic ON cp.managed_by \u003d ic.community_name;", + "sql_explanation": "The SQL query uses a JOIN to combine data from both tables based on the managed_by column. The COUNT function calculates the total number of projects, and the SUM function with a CASE statement calculates the number of ongoing projects." +}, { + "id": "1034", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all expeditions led by researchers with \u0027indigenous_knowledge\u0027 expertise since 2015.", + "sql_context": "CREATE TABLE researchers ( id INT PRIMARY KEY, name VARCHAR(255), expertise VARCHAR(255) ); INSERT INTO researchers (id, name, expertise) VALUES (1, \u0027Juanita Martinez\u0027, \u0027indigenous_knowledge\u0027); CREATE TABLE expeditions ( id INT PRIMARY KEY, name VARCHAR(255), location VARCHAR(255), year INT, leader_id INT, FOREIGN KEY (leader_id) REFERENCES researchers(id) ); INSERT INTO expeditions (id, name, location, year, leader_id) VALUES (1, \u0027Ice Edge Expedition\u0027, \u0027Arctic\u0027, 2016, 1);", + "sql": "SELECT e.name, e.location, e.year, r.name AS leader_name FROM expeditions e JOIN researchers r ON e.leader_id \u003d r.id WHERE r.expertise \u003d \u0027indigenous_knowledge\u0027 AND e.year \u003e\u003d 2015;", + "sql_explanation": "This SQL query selects the expedition name, location, and year, along with the leader\u0027s name from the \u0027expeditions\u0027 and \u0027researchers\u0027 tables, filtered for leaders with \u0027indigenous_knowledge\u0027 expertise and expeditions since 2015." +}, { + "id": "358", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 3 cosmetic products with the highest customer reviews, considering only those that are labeled as \u0027Clean Beauty\u0027.", + "sql_context": "CREATE TABLE products (product_id INT, product_name TEXT, clean_beauty BOOLEAN); CREATE TABLE reviews (review_id INT, product_id INT, review_rating INT); INSERT INTO products (product_id, product_name, clean_beauty) VALUES (1, \u0027Product 1\u0027, true), (2, \u0027Product 2\u0027, false), (3, \u0027Product 3\u0027, true), (4, \u0027Product 4\u0027, true), (5, \u0027Product 5\u0027, false); INSERT INTO reviews (review_id, product_id, review_rating) VALUES (1, 1, 5), (2, 1, 4), (3, 2, 3), (4, 3, 5), (5, 4, 5), (6, 4, 4), (7, 5, 3);", + "sql": "SELECT products.product_name, AVG(reviews.review_rating) AS avg_rating FROM products JOIN reviews ON products.product_id \u003d reviews.product_id WHERE products.clean_beauty \u003d true GROUP BY products.product_id ORDER BY avg_rating DESC LIMIT 3;", + "sql_explanation": "This SQL query calculates the average review rating for each product, filters the results for only those products labeled as \u0027Clean Beauty\u0027, and returns the top 3 products with the highest average ratings." +}, { + "id": "362", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue for organic skincare products in the US?", + "sql_context": "CREATE TABLE skincare_sales (product_id INT, product_name VARCHAR(255), sale_price DECIMAL(10,2), is_organic BOOLEAN, country VARCHAR(255)); CREATE TABLE products (product_id INT, product_name VARCHAR(255), category VARCHAR(255)); INSERT INTO skincare_sales (product_id, product_name, sale_price, is_organic, country) VALUES (1, \u0027Cleanser\u0027, 25.99, true, \u0027USA\u0027); INSERT INTO products (product_id, product_name, category) VALUES (1, \u0027Cleanser\u0027, \u0027Skincare\u0027);", + "sql": "SELECT SUM(skincare_sales.sale_price) FROM skincare_sales INNER JOIN products ON skincare_sales.product_id \u003d products.product_id WHERE skincare_sales.is_organic \u003d true AND skincare_sales.country \u003d \u0027USA\u0027 AND products.category \u003d \u0027Skincare\u0027;", + "sql_explanation": "The SQL query calculates the total revenue for organic skincare products in the US by summing the sale_price from skincare_sales table where is_organic is true and country is \u0027USA\u0027. It then joins the products table to ensure the category is \u0027Skincare\u0027." +}, { + "id": "477", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue of organic skincare products sold in the US in Q1 2021?", + "sql_context": "CREATE TABLE sales(product_id INT, sale_date DATE, revenue DECIMAL(10,2), country VARCHAR(50)); INSERT INTO sales VALUES (1, \u00272021-01-01\u0027, 50.00, \u0027US\u0027); INSERT INTO sales VALUES (2, \u00272021-01-02\u0027, 75.00, \u0027US\u0027); CREATE TABLE products(product_id INT, product_name VARCHAR(50), is_organic BOOLEAN); INSERT INTO products VALUES (1, \u0027Aloe Vera Moisturizer\u0027, TRUE); INSERT INTO products VALUES (2, \u0027Vitamin C Serum\u0027, FALSE);", + "sql": "SELECT SUM(sales.revenue) FROM sales INNER JOIN products ON sales.product_id \u003d products.product_id WHERE products.is_organic \u003d TRUE AND YEAR(sales.sale_date) \u003d 2021 AND QUARTER(sales.sale_date) \u003d 1 AND sales.country \u003d \u0027US\u0027;", + "sql_explanation": "The query calculates the total revenue of organic skincare products sold in the US during Q1 2021 by summing the revenue column in the sales table. It joins the sales table with the products table using the product_id. The WHERE clause filters for organic products (is_organic \u003d TRUE) and sales in the US during Q1 2021." +}, { + "id": "717", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total sales of refillable skincare products by month?", + "sql_context": "CREATE TABLE months (month_id INT, month_name VARCHAR(255)); CREATE TABLE products (product_id INT, product_name VARCHAR(255), is_refillable BOOLEAN, sales INT, month_id INT);", + "sql": "SELECT m.month_name, SUM(p.sales) as total_sales FROM months m INNER JOIN products p ON m.month_id \u003d p.month_id WHERE p.is_refillable \u003d TRUE AND p.product_name LIKE \u0027%skincare%\u0027 GROUP BY m.month_name;", + "sql_explanation": "The SQL query first joins the months and products tables on the month_id column. It then filters the products table to only include refillable skincare items using the is_refillable and product_name columns. Finally, it calculates the total sales for refillable skincare products for each month and groups the results by month_name." +}, { + "id": "837", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which countries have the highest sales of organic skincare products?", + "sql_context": "CREATE TABLE sales (sale_id INT, product_id INT, country VARCHAR(50), sales_volume INT); CREATE TABLE products (product_id INT, product_name VARCHAR(100), is_organic BOOLEAN);", + "sql": "SELECT country, SUM(sales_volume) as total_sales FROM sales JOIN products ON sales.product_id \u003d products.product_id WHERE is_organic \u003d TRUE GROUP BY country ORDER BY total_sales DESC LIMIT 5;", + "sql_explanation": "The SQL query finds the highest sales of organic skincare products by joining the sales and products table on the product_id column and filtering where the is_organic column is true. Then it groups the results by country, sums the sales volume, and orders the results in descending order to find the top 5 countries with the highest sales volume." +}, { + "id": "983", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many new skincare products were launched in the second half of 2021 by brands that are certified B Corporations?", + "sql_context": "CREATE TABLE skincare_products (product_id INT, brand_id INT, launch_date DATE); CREATE TABLE brands (brand_id INT, name VARCHAR(255), is_b_corp BOOLEAN);", + "sql": "SELECT COUNT(*) FROM skincare_products INNER JOIN brands ON skincare_products.brand_id \u003d brands.brand_id WHERE EXTRACT(MONTH FROM launch_date) BETWEEN 7 AND 12 AND is_b_corp \u003d TRUE;", + "sql_explanation": "The SQL query calculates the number of new skincare products launched in the second half of 2021 by brands that are certified B Corporations by joining the skincare_products table with the brands table based on the brand_id column, filtering the joined table based on the launch_date and is_b_corp columns using the EXTRACT function and the BETWEEN operator, and then using the COUNT function to count the number of matching records." +}, { + "id": "1090", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average sustainability score of cosmetics brands that use recycled packaging in the EU.", + "sql_context": "CREATE TABLE CosmeticsBrands (brand_id INT, brand TEXT, sustainability_score DECIMAL(3,1), uses_recycled_packaging BOOLEAN); INSERT INTO CosmeticsBrands (brand_id, brand, sustainability_score, uses_recycled_packaging) VALUES (1, \u0027Lush\u0027, 4.8, true); CREATE TABLE BrandCountry (brand_id INT, country TEXT); INSERT INTO BrandCountry (brand_id, country) VALUES (1, \u0027Germany\u0027);", + "sql": "SELECT AVG(cb.sustainability_score) FROM CosmeticsBrands cb JOIN BrandCountry bc ON cb.brand_id \u003d bc.brand_id WHERE cb.uses_recycled_packaging \u003d true AND bc.country LIKE \u0027EU%\u0027;", + "sql_explanation": "Join CosmeticsBrands and BrandCountry tables where brand_id is the same and uses_recycled_packaging is true and country starts with \u0027EU\u0027. Then, calculate the average sustainability score for these brands." +}, { + "id": "1097", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the top 5 brands by sales of natural products", + "sql_context": "CREATE TABLE sales (id INT, product_id INT, brand_id INT, quantity INT); CREATE TABLE brands (id INT, name VARCHAR(255), is_natural BOOLEAN);", + "sql": "SELECT b.name, SUM(s.quantity) as total_sales FROM brands b INNER JOIN sales s ON b.id \u003d s.brand_id WHERE b.is_natural \u003d TRUE GROUP BY b.name ORDER BY total_sales DESC LIMIT 5;", + "sql_explanation": "This query shows the top 5 brands by sales of natural products. It first calculates the total sales for each brand using a GROUP BY clause and filters for is_natural \u003d TRUE. Then, it orders the results by total sales in descending order and limits the results to the top 5." +}, { + "id": "1213", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total sales revenue of organic skincare products in the European market?", + "sql_context": "CREATE TABLE SkincareSales (productID INT, productName VARCHAR(50), region VARCHAR(50), revenue DECIMAL(10,2)); INSERT INTO SkincareSales (productID, productName, region, revenue) VALUES (1, \u0027Nourishing Cream\u0027, \u0027Europe\u0027, 5000.00), (2, \u0027Soothing Lotion\u0027, \u0027Europe\u0027, 7000.00), (3, \u0027Regenerating Serum\u0027, \u0027Europe\u0027, 8000.00); CREATE TABLE ProductIngredients (productID INT, ingredient VARCHAR(50), organic BOOLEAN); INSERT INTO ProductIngredients (productID, ingredient, organic) VALUES (1, \u0027Aloe Vera\u0027, true), (2, \u0027Chamomile\u0027, true), (3, \u0027Retinol\u0027, false);", + "sql": "SELECT SUM(revenue) FROM SkincareSales INNER JOIN ProductIngredients ON SkincareSales.productID \u003d ProductIngredients.productID WHERE organic \u003d true AND region \u003d \u0027Europe\u0027;", + "sql_explanation": "This query calculates the total sales revenue of organic skincare products in the European market by joining the SkincareSales table with the ProductIngredients table on the productID. It then filters for organic products and the European region, and finally calculates the sum of revenue for these records." +}, { + "id": "1250", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total sales volume of makeup products with a cruelty-free label in the US?", + "sql_context": "CREATE TABLE sales (sale_id INT, product_id INT, product_type VARCHAR(50), country VARCHAR(50), sales_volume INT); CREATE TABLE products (product_id INT, product_name VARCHAR(100), is_cruelty_free BOOLEAN);", + "sql": "SELECT SUM(sales_volume) FROM sales JOIN products ON sales.product_id \u003d products.product_id WHERE is_cruelty_free \u003d TRUE AND product_type \u003d \u0027makeup\u0027 AND country \u003d \u0027US\u0027;", + "sql_explanation": "The SQL query calculates the total sales volume of makeup products with a cruelty-free label in the US by joining the sales and products table on the product_id column and filtering where the is_cruelty_free column is true, the product_type is makeup, and the country is US. Then it uses the SUM function to find the total sales volume." +}, { + "id": "2116", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Count the number of organic skincare products sold in France", + "sql_context": "CREATE TABLE sales (id INT, product_id INT, quantity INT, country VARCHAR(50)); CREATE TABLE products (id INT, name VARCHAR(50), organic BOOLEAN);", + "sql": "SELECT COUNT(*) FROM sales JOIN products ON sales.product_id \u003d products.id WHERE products.organic \u003d TRUE AND sales.country \u003d \u0027France\u0027;", + "sql_explanation": "This SQL query counts the number of organic skincare products sold in France by joining the \u0027sales\u0027 and \u0027products\u0027 table on the \u0027product_id\u0027 column, filtering for organic products and sales in France, and then using the COUNT function to count the number of rows." +}, { + "id": "8", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "How many orders were placed by new and returning customers in each month of the year 2021?\u0027", + "sql_context": "CREATE TABLE customer (id INT, first_order_date DATE, last_order_date DATE);", + "sql": "INSERT INTO customer (id, first_order_date, last_order_date) SELECT customer_id, MIN(order_date) AS first_order_date, MAX(order_date) AS last_order_date FROM orders GROUP BY customer_id; SELECT YEAR(order_date) AS year, MONTH(order_date) AS month, CASE WHEN DATEDIFF(last_order_date, first_order_date) \u003e 30 THEN \u0027returning\u0027 ELSE \u0027new\u0027 END AS customer_type, COUNT(DISTINCT id) AS num_orders FROM orders JOIN customer ON orders.customer_id \u003d customer.id WHERE YEAR(order_date) \u003d 2021 GROUP BY year, month, customer_type;", + "sql_explanation": "The first SQL query creates a customer table by selecting the minimum and maximum order dates for each customer from the orders table. The second query calculates the number of orders placed by new and returning customers, grouped by year, month, and customer type. The CASE statement is used to determine whether a customer is new or returning based on the difference between their first and last order dates." +}, { + "id": "21", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the average prices of recycled material products in the European market?", + "sql_context": "CREATE TABLE products (id INT PRIMARY KEY, name VARCHAR(50), price DECIMAL(5,2), material VARCHAR(50), country VARCHAR(50)); CREATE TABLE materials (id INT PRIMARY KEY, name VARCHAR(50), recycled BOOLEAN);", + "sql": "SELECT AVG(products.price) AS avg_price FROM products INNER JOIN materials ON products.material \u003d materials.name WHERE materials.recycled \u003d TRUE AND products.country IN (\u0027Austria\u0027, \u0027Belgium\u0027, \u0027Czech Republic\u0027, \u0027Denmark\u0027, \u0027Estonia\u0027, \u0027Finland\u0027, \u0027France\u0027, \u0027Germany\u0027, \u0027Greece\u0027, \u0027Hungary\u0027, \u0027Ireland\u0027, \u0027Italy\u0027, \u0027Latvia\u0027, \u0027Lithuania\u0027, \u0027Luxembourg\u0027, \u0027Malta\u0027, \u0027Netherlands\u0027, \u0027Poland\u0027, \u0027Portugal\u0027, \u0027Slovakia\u0027, \u0027Slovenia\u0027, \u0027Spain\u0027, \u0027Sweden\u0027);", + "sql_explanation": "This query calculates the average prices of recycled material products in the European market by joining the products and materials tables, and filtering for recycled materials and European countries." +}, { + "id": "149", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 5 consumers of sustainable products in North America by total expenditure in 2020.", + "sql_context": "CREATE TABLE customers (customer_id INT, name VARCHAR(255), location VARCHAR(255));CREATE TABLE orders (order_id INT, customer_id INT, order_date DATE, total_expenditure DECIMAL(5,2));INSERT INTO customers (customer_id, name, location) VALUES (1, \u0027John Smith\u0027, \u0027USA\u0027), (2, \u0027Jane Doe\u0027, \u0027Canada\u0027), (3, \u0027Mike Johnson\u0027, \u0027USA\u0027), (4, \u0027Sarah Lee\u0027, \u0027Mexico\u0027), (5, \u0027David Kim\u0027, \u0027Canada\u0027);INSERT INTO orders (order_id, customer_id, order_date, total_expenditure) VALUES (1, 1, \u00272020-01-01\u0027, 100.00), (2, 2, \u00272020-02-01\u0027, 150.00), (3, 3, \u00272020-03-01\u0027, 200.00), (4, 1, \u00272020-04-01\u0027, 250.00), (5, 5, \u00272020-05-01\u0027, 300.00);", + "sql": "SELECT customers.name, SUM(orders.total_expenditure) as total_expenditure FROM customers JOIN orders ON customers.customer_id \u003d orders.customer_id WHERE customers.location LIKE \u0027North%\u0027 AND YEAR(orders.order_date) \u003d 2020 GROUP BY customers.name ORDER BY total_expenditure DESC LIMIT 5;", + "sql_explanation": "First, we join the customers and orders tables. Then, we filter the records where the customer location is in North America and the order date is in 2020. Next, we group the records by the customer\u0027s name and calculate the total expenditure for each customer using the SUM function. Finally, we return the top 5 consumers of sustainable products in North America by total expenditure in 2020." +}, { + "id": "222", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 3 materials by quantity in the inventory and supplier_orders tables.", + "sql_context": "CREATE TABLE inventory (id INT PRIMARY KEY, material_id INT, location VARCHAR(255), quantity INT); INSERT INTO inventory (id, material_id, location, quantity) VALUES (1, 1, \u0027London\u0027, 600), (2, 2, \u0027Paris\u0027, 700); CREATE TABLE supplier_orders (id INT PRIMARY KEY, supplier_id INT, material_id INT, quantity INT); INSERT INTO supplier_orders (id, supplier_id, material_id, quantity) VALUES (1, 1, 1, 1000), (2, 2, 2, 800);", + "sql": "SELECT i.material_id, SUM(i.quantity) AS total_inventory, SUM(so.quantity) AS total_supplier_orders FROM inventory i INNER JOIN supplier_orders so ON i.material_id \u003d so.material_id GROUP BY i.material_id ORDER BY total_inventory + total_supplier_orders DESC LIMIT 3;", + "sql_explanation": "This query identifies the top 3 materials by quantity by joining the inventory and supplier_orders tables on material_id, and grouping by material_id. It calculates the sum of the quantity for each group in both tables and orders by the sum in descending order, limiting the results to the top 3." +}, { + "id": "499", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total quantity of orders for each supplier on each date, excluding orders from suppliers with ethical certification.", + "sql_context": "CREATE TABLE orders (id INT PRIMARY KEY, supplier_id INT, material_id INT, quantity INT, order_date DATE); INSERT INTO orders (id, supplier_id, material_id, quantity, order_date) VALUES (1, 1, 1, 500, \u00272021-01-01\u0027), (2, 2, 2, 300, \u00272021-01-01\u0027), (3, 3, 3, 400, \u00272021-01-02\u0027), (4, 1, 4, 600, \u00272021-01-02\u0027); CREATE TABLE ethical_certifications (id INT PRIMARY KEY, supplier_id INT, certification_name VARCHAR(255), certification_date DATE); INSERT INTO ethical_certifications (id, supplier_id, certification_name, certification_date) VALUES (1, 1, \u0027Fair Trade\u0027, \u00272020-01-01\u0027);", + "sql": "SELECT o.supplier_id, SUM(o.quantity) as total_quantity, o.order_date FROM orders o LEFT JOIN ethical_certifications ec ON o.supplier_id \u003d ec.supplier_id WHERE ec.supplier_id IS NULL GROUP BY o.supplier_id, o.order_date;", + "sql_explanation": "The SQL query joins the orders table with the ethical_certifications table and excludes orders from suppliers that have an ethical certification. It then calculates the total quantity of orders for each supplier on each date." +}, { + "id": "676", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all companies and their circular economy initiatives in the \u0027Africa\u0027 region.", + "sql_context": "CREATE TABLE Companies (id INT, name VARCHAR(255), region VARCHAR(255)); INSERT INTO Companies (id, name, region) VALUES (1, \u0027CompanyA\u0027, \u0027Africa\u0027), (2, \u0027CompanyB\u0027, \u0027Europe\u0027), (3, \u0027CompanyC\u0027, \u0027Asia-Pacific\u0027); CREATE TABLE CircularEconomy (id INT, company_id INT, initiative VARCHAR(255)); INSERT INTO CircularEconomy (id, company_id, initiative) VALUES (1, 1, \u0027Recycling program\u0027), (2, 1, \u0027Product repair services\u0027), (3, 2, \u0027Recyclable packaging\u0027), (4, 3, \u0027Product remanufacturing\u0027);", + "sql": "SELECT Companies.name, GROUP_CONCAT(CircularEconomy.initiative) FROM Companies JOIN CircularEconomy ON Companies.id \u003d CircularEconomy.company_id WHERE Companies.region \u003d \u0027Africa\u0027 GROUP BY Companies.name;", + "sql_explanation": "Join the Companies and CircularEconomy tables based on company_id, list all companies and their circular economy initiatives in the \u0027Africa\u0027 region, and return the company names and initiatives." +}, { + "id": "881", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average delivery time for recycled packaging orders?", + "sql_context": "CREATE TABLE OrderDetails (order_id INT, product_id INT, delivery_date DATE, packaging_type VARCHAR(255)); CREATE TABLE PackagingInfo (packaging_id INT, packaging_type VARCHAR(255), is_recycled BOOLEAN, avg_delivery_time INT); INSERT INTO PackagingInfo (packaging_id, packaging_type, is_recycled, avg_delivery_time) VALUES (1, \u0027Recycled Cardboard\u0027, true, 5), (2, \u0027Plastic Wrap\u0027, false, 3); INSERT INTO OrderDetails (order_id, product_id, delivery_date, packaging_type) VALUES (1, 1, \u00272021-01-05\u0027, \u0027Recycled Cardboard\u0027), (2, 2, \u00272021-01-06\u0027, \u0027Plastic Wrap\u0027);", + "sql": "SELECT AVG(PackagingInfo.avg_delivery_time) FROM PackagingInfo INNER JOIN OrderDetails ON PackagingInfo.packaging_type \u003d OrderDetails.packaging_type WHERE PackagingInfo.is_recycled \u003d true;", + "sql_explanation": "This query calculates the average delivery time for orders that used recycled packaging. It does so by joining the OrderDetails table with the PackagingInfo table on the packaging_type column, filtering for rows where is_recycled is true, and then calculating the average of the avg_delivery_time column." +}, { + "id": "948", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the difference between the maximum and minimum quantities of \u0027hemp\u0027 used in products for each brand?", + "sql_context": "CREATE TABLE brands(brand_id INT, brand_name TEXT); INSERT INTO brands(brand_id, brand_name) VALUES (1, \u0027BrandA\u0027), (2, \u0027BrandB\u0027), (3, \u0027BrandC\u0027); CREATE TABLE products(product_id INT, brand_id INT, material TEXT, quantity INT); INSERT INTO products(product_id, brand_id, material, quantity) VALUES (1, 1, \u0027hemp\u0027, 10), (2, 1, \u0027hemp\u0027, 20), (3, 1, \u0027hemp\u0027, 30), (4, 2, \u0027hemp\u0027, 40), (5, 2, \u0027hemp\u0027, 50), (6, 2, \u0027hemp\u0027, 60), (7, 3, \u0027hemp\u0027, 70), (8, 3, \u0027hemp\u0027, 80), (9, 3, \u0027hemp\u0027, 90);", + "sql": "SELECT brand_id, brand_name, MAX(quantity) - MIN(quantity) as difference FROM brands b JOIN products p ON b.brand_id \u003d p.brand_id WHERE material \u003d \u0027hemp\u0027 GROUP BY brand_id, brand_name;", + "sql_explanation": "Calculate the difference between the maximum and minimum quantities of \u0027hemp\u0027 for each brand by grouping by brand_id, brand_name and applying aggregate functions MAX, MIN to quantity, then subtracting the two results." +}, { + "id": "1059", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average order value for purchases made using sustainable payment methods?", + "sql_context": "CREATE TABLE Orders (order_id INT, total_value FLOAT, payment_method VARCHAR(20)); CREATE TABLE Payment_Methods (payment_method_id INT, payment_type VARCHAR(20)); INSERT INTO Payment_Methods (payment_method_id, payment_type) VALUES (1, \u0027Sustainable\u0027), (2, \u0027Non-Sustainable\u0027);", + "sql": "SELECT AVG(Orders.total_value) FROM Orders INNER JOIN Payment_Methods ON Orders.payment_method \u003d Payment_Methods.payment_type WHERE Payment_Methods.payment_type \u003d \u0027Sustainable\u0027;", + "sql_explanation": "The SQL query calculates the average total value of orders with a sustainable payment method by performing an inner join between the Orders and Payment_Methods tables. It filters the results to only include orders with sustainable payment types and calculates the average total value of those orders." +}, { + "id": "1145", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of working hours for workers in factories with fair labor practices in Africa?", + "sql_context": "CREATE TABLE FairLaborFactories (factory_id INT, region VARCHAR(20)); INSERT INTO FairLaborFactories (factory_id, region) VALUES (1, \u0027Africa\u0027), (2, \u0027South America\u0027), (3, \u0027Europe\u0027); CREATE TABLE Workers (worker_id INT, factory_id INT, hours_worked INT); INSERT INTO Workers (worker_id, factory_id, hours_worked) VALUES (1, 1, 40), (2, 1, 45), (3, 2, 35), (4, 3, 42);", + "sql": "SELECT MAX(Workers.hours_worked) FROM Workers INNER JOIN FairLaborFactories ON Workers.factory_id \u003d FairLaborFactories.factory_id WHERE FairLaborFactories.region \u003d \u0027Africa\u0027;", + "sql_explanation": "This query determines the maximum number of working hours for workers in factories with fair labor practices in Africa. It does this by performing a max of the hours_worked column in the Workers table, where the factory_id in the Workers table matches a factory_id in the FairLaborFactories table, and the region is \u0027Africa\u0027. The result is the maximum number of working hours for workers in factories that meet these criteria." +}, { + "id": "1224", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the average production usage of sustainable materials per factory for factories located in urban areas.", + "sql_context": "CREATE TABLE Factory_Location (id INT, factory_id INT, area VARCHAR(255)); INSERT INTO Factory_Location (id, factory_id, area) VALUES (1, 1001, \u0027Urban\u0027), (2, 1002, \u0027Rural\u0027); CREATE TABLE Material_Production (id INT, factory_id INT, material VARCHAR(255), production_usage INT); INSERT INTO Material_Production (id, factory_id, material, production_usage) VALUES (1, 1001, \u0027Organic Cotton\u0027, 500), (2, 1002, \u0027Recycled Polyester\u0027, 750);", + "sql": "SELECT f.area, AVG(mp.production_usage) FROM Factory_Location f INNER JOIN Material_Production mp ON f.factory_id \u003d mp.factory_id WHERE f.area \u003d \u0027Urban\u0027 GROUP BY f.area;", + "sql_explanation": "This query calculates the average production usage of sustainable materials per factory for factories located in urban areas by using the AVG function, joining the Factory_Location and Material_Production tables, filtering for factories in urban areas, and grouping by the area column." +}, { + "id": "1369", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average production cost of organic cotton garments across all factories in India?", + "sql_context": "CREATE TABLE factories (factory_id INT, factory_name TEXT, country TEXT); INSERT INTO factories (factory_id, factory_name, country) VALUES (1, \u0027Organic Textile Co\u0027, \u0027India\u0027), (2, \u0027Eco-Friendly Fashion\u0027, \u0027India\u0027); CREATE TABLE garments (garment_id INT, garment_name TEXT, production_cost FLOAT, factory_id INT); INSERT INTO garments (garment_id, garment_name, production_cost, factory_id) VALUES (1, \u0027Organic Cotton Tee\u0027, 15.50, 1), (2, \u0027Cotton Tote Bag\u0027, 8.25, 1), (3, \u0027Recycled Polyester Hoodie\u0027, 28.99, 2), (4, \u0027Organic Cotton Dress\u0027, 22.00, 1);", + "sql": "SELECT AVG(g.production_cost) FROM garments g JOIN factories f ON g.factory_id \u003d f.factory_id WHERE f.country \u003d \u0027India\u0027 AND g.garment_name LIKE \u0027%Organic Cotton%\u0027;", + "sql_explanation": "Join garments and factories tables, filter for organic cotton garments produced in India, and calculate the average production cost." +}, { + "id": "1415", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of labor violations in factories in Southeast Asia?", + "sql_context": "CREATE TABLE LaborViolations (id INT, factory_id INT, num_violations INT, violation_date DATE); INSERT INTO LaborViolations (id, factory_id, num_violations, violation_date) VALUES (1, 1, 3, \u00272021-01-01\u0027), (2, 2, 5, \u00272021-02-01\u0027), (3, 3, 1, \u00272021-01-15\u0027), (4, 4, 2, \u00272021-03-01\u0027); CREATE TABLE Factories (id INT, factory_location TEXT); INSERT INTO Factories (id, factory_location) VALUES (1, \u0027Southeast Asia\u0027), (2, \u0027South America\u0027), (3, \u0027Europe\u0027), (4, \u0027North America\u0027);", + "sql": "SELECT MAX(num_violations) FROM LaborViolations JOIN Factories ON LaborViolations.factory_id \u003d Factories.id WHERE Factories.factory_location \u003d \u0027Southeast Asia\u0027;", + "sql_explanation": "This query returns the maximum number of labor violations in factories located in Southeast Asia. It does this by joining the LaborViolations and Factories tables on the factory_id column, and then using the MAX function to find the maximum value of the num_violations column for factories located in Southeast Asia." +}, { + "id": "1657", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of eco-friendly products in the products and inventory tables?", + "sql_context": "CREATE TABLE products (product_id INT, product_name TEXT, is_eco_friendly BOOLEAN); INSERT INTO products VALUES (1, \u0027Organic Cotton Shirt\u0027, TRUE); INSERT INTO products VALUES (2, \u0027Regular Shirt\u0027, FALSE); CREATE TABLE inventory (product_id INT, quantity INT); INSERT INTO inventory VALUES (1, 100); INSERT INTO inventory VALUES (2, 200);", + "sql": "SELECT SUM(inventory.quantity) FROM products INNER JOIN inventory ON products.product_id \u003d inventory.product_id WHERE products.is_eco_friendly \u003d TRUE;", + "sql_explanation": "This query calculates the total quantity of eco-friendly products by selecting the quantity column from the inventory table and joining it with the products table on the product_id column, then filtering the results to only include eco-friendly products using the WHERE clause and finally finding the sum of records in the resulting set using the SUM() function." +}, { + "id": "1719", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average sustainability score of suppliers that provide linen?", + "sql_context": "CREATE TABLE supplier_sustainability (supplier_id INT, name TEXT, sustainability_score INT); INSERT INTO supplier_sustainability (supplier_id, name, sustainability_score) VALUES (1, \u0027Supplier A\u0027, 85), (2, \u0027Supplier B\u0027, 90), (3, \u0027Supplier C\u0027, 70), (4, \u0027Supplier D\u0027, 60), (5, \u0027Supplier E\u0027, 50), (6, \u0027Supplier F\u0027, 95), (7, \u0027Supplier G\u0027, 80); CREATE TABLE supplier_materials (supplier_id INT, material TEXT); INSERT INTO supplier_materials (supplier_id, material) VALUES (3, \u0027linen\u0027), (6, \u0027linen\u0027), (7, \u0027linen\u0027);", + "sql": "SELECT AVG(sustainability_score) FROM supplier_sustainability s JOIN supplier_materials m ON s.supplier_id \u003d m.supplier_id WHERE material \u003d \u0027linen\u0027;", + "sql_explanation": "This query calculates the average sustainability score of suppliers that provide linen by joining the supplier_sustainability and supplier_materials tables on the supplier_id field, filtering the results based on the material field, and using the AVG function on the sustainability_score field." +}, { + "id": "1929", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of recycled materials used by each brand?", + "sql_context": "CREATE TABLE Brands (BrandID INT, BrandName VARCHAR(50)); INSERT INTO Brands (BrandID, BrandName) VALUES (1, \u0027BrandX\u0027), (2, \u0027BrandY\u0027), (3, \u0027BrandZ\u0027); CREATE TABLE Products (ProductID INT, ProductName VARCHAR(50), BrandID INT, RecycledMaterials INT); INSERT INTO Products (ProductID, ProductName, BrandID, RecycledMaterials) VALUES (1, \u0027ProductA\u0027, 1, 25), (2, \u0027ProductB\u0027, 1, 30), (3, \u0027ProductC\u0027, 2, 15), (4, \u0027ProductD\u0027, 2, 0), (5, \u0027ProductE\u0027, 3, 35), (6, \u0027ProductF\u0027, 3, 40);", + "sql": "SELECT BrandName, SUM(RecycledMaterials) as TotalRecycledMaterials FROM Brands b JOIN Products p ON b.BrandID \u003d p.BrandID GROUP BY BrandName;", + "sql_explanation": "We join the Brands table with the Products table using the BrandID. Then, we use the GROUP BY clause to group the results by BrandName, and the SUM function to calculate the total quantity of recycled materials used by each brand." +}, { + "id": "2110", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the water usage of \u0027Ethical Supplies\u0027?", + "sql_context": "CREATE TABLE suppliers (id INT PRIMARY KEY, name VARCHAR(100)); INSERT INTO suppliers (id, name) VALUES (1, \u0027Ethical Supplies\u0027); CREATE TABLE sustainability_reports (id INT PRIMARY KEY, company_id INT, year INT, water_usage INT); INSERT INTO sustainability_reports (id, company_id, year, water_usage) VALUES (1, 1, 2021, 3000);", + "sql": "SELECT sr.water_usage FROM suppliers s INNER JOIN sustainability_reports sr ON s.id \u003d sr.company_id WHERE s.name \u003d \u0027Ethical Supplies\u0027;", + "sql_explanation": "The SQL query performs an inner join on the suppliers and sustainability_reports tables, selecting the water_usage column from the sustainability_reports table. The query filters the results where the name of the supplier is \u0027Ethical Supplies\u0027. This allows users to see the water usage for a specific supplier." +}, { + "id": "2338", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many items are produced in each country?", + "sql_context": "CREATE TABLE CountryProduction (id INT, country VARCHAR(50)); CREATE TABLE CountryItems (id INT, country INT, quantity INT); INSERT INTO CountryProduction (id, country) VALUES (1, \u0027Country A\u0027), (2, \u0027Country B\u0027), (3, \u0027Country C\u0027); INSERT INTO CountryItems (id, country, quantity) VALUES (1, 1, 500), (2, 1, 700), (3, 2, 300), (4, 2, 800), (5, 3, 900);", + "sql": "SELECT c.country, SUM(ci.quantity) FROM CountryProduction c INNER JOIN CountryItems ci ON c.id \u003d ci.country GROUP BY c.country;", + "sql_explanation": "This SQL query lists the number of items produced in each country by joining the CountryProduction and CountryItems tables on the id column and using the SUM function on the quantity column, grouped by the country column in the CountryProduction table." +}, { + "id": "2387", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of sustainable materials used by each supplier?", + "sql_context": "CREATE TABLE supplier_materials (supplier_id INT, material VARCHAR(50), quantity INT); INSERT INTO supplier_materials (supplier_id, material, quantity) VALUES (1, \u0027Recycled Polyester\u0027, 5000), (1, \u0027Organic Cotton\u0027, 2000), (2, \u0027Recycled Polyester\u0027, 3000), (2, \u0027Bamboo Viscose\u0027, 1000), (3, \u0027Cotton\u0027, 4000), (3, \u0027Hemp\u0027, 1000), (4, \u0027Recycled Polyester\u0027, 6000), (5, \u0027Organic Cotton\u0027, 7000), (5, \u0027Tencel\u0027, 3000); CREATE TABLE suppliers (supplier_id INT, name VARCHAR(255)); INSERT INTO suppliers (supplier_id, name) VALUES (1, \u0027Green Fabrics\u0027), (2, \u0027Eco Yarns\u0027), (3, \u0027Blue Textiles\u0027), (4, \u0027Sustainable Threads\u0027), (5, \u0027Natural Fibers\u0027);", + "sql": "SELECT s.name, SUM(sm.quantity) FROM supplier_materials sm JOIN suppliers s ON sm.supplier_id \u003d s.supplier_id GROUP BY s.name;", + "sql_explanation": "This query calculates the total quantity of sustainable materials used by each supplier. It does this by using the JOIN clause to combine the supplier_materials and suppliers tables on the supplier_id column. The SUM function is used to add up the quantity column, and the GROUP BY clause is used to group the rows by the name column in the suppliers table." +}, { + "id": "3104", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average price of ethically made items", + "sql_context": "CREATE TABLE sales (id INT, item_id INT, price INT, purchase_date DATE); CREATE TABLE standards (id INT, item_id INT, standard VARCHAR(255)); INSERT INTO sales (id, item_id, price, purchase_date) VALUES (1, 1, 100, \u00272022-01-01\u0027), (2, 2, 75, \u00272022-01-02\u0027), (3, 1, 120, \u00272022-01-03\u0027); INSERT INTO standards (id, item_id, standard) VALUES (1, 1, \u0027ethical\u0027), (2, 2, \u0027sustainable\u0027);", + "sql": "SELECT AVG(price) FROM sales s JOIN standards st ON s.item_id \u003d st.item_id WHERE st.standard \u003d \u0027ethical\u0027;", + "sql_explanation": "This SQL query calculates the average price of ethically made items by joining the sales and standards tables on the item_id and then selecting the average price where the standard is ethical." +}, { + "id": "254", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which \u0027Racing\u0027 game has the highest number of active players in India?", + "sql_context": "CREATE TABLE player_profiles (player_id INT, player_country VARCHAR(50)); INSERT INTO player_profiles (player_id, player_country) VALUES (1, \u0027USA\u0027), (2, \u0027Canada\u0027), (3, \u0027India\u0027), (4, \u0027Brazil\u0027), (5, \u0027Germany\u0027); CREATE TABLE player_games (player_id INT, game_name VARCHAR(100), game_type VARCHAR(50)); INSERT INTO player_games (player_id, game_name, game_type) VALUES (1, \u0027GameE\u0027, \u0027Racing\u0027), (2, \u0027GameF\u0027, \u0027Shooter\u0027), (3, \u0027GameG\u0027, \u0027Racing\u0027), (4, \u0027GameH\u0027, \u0027Strategy\u0027), (5, \u0027GameI\u0027, \u0027Racing\u0027);", + "sql": "SELECT game_name, COUNT(player_id) AS active_players FROM player_profiles JOIN player_games ON player_profiles.player_id \u003d player_games.player_id WHERE player_country \u003d \u0027India\u0027 AND game_type \u003d \u0027Racing\u0027 GROUP BY game_name ORDER BY active_players DESC LIMIT 1;", + "sql_explanation": "This query identifies the Racing game with the highest number of active players in India by counting the number of players from India who play Racing games and returning the game with the highest count." +}, { + "id": "337", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 3 countries with the highest average player spending on mobile games in the Asian market", + "sql_context": "CREATE TABLE country_codes (country_code CHAR(2), country VARCHAR(50), PRIMARY KEY (country_code)); INSERT INTO country_codes VALUES (\u0027US\u0027, \u0027United States\u0027), (\u0027CN\u0027, \u0027China\u0027), (\u0027JP\u0027, \u0027Japan\u0027), (\u0027IN\u0027, \u0027India\u0027), (\u0027KR\u0027, \u0027South Korea\u0027); CREATE TABLE player_spending (player_id INT, country_code CHAR(2), amount DECIMAL(10, 2), PRIMARY KEY (player_id, country_code)); INSERT INTO player_spending VALUES (1, \u0027CN\u0027, 500.00), (2, \u0027CN\u0027, 600.00), (3, \u0027JP\u0027, 400.00), (4, \u0027JP\u0027, 700.00), (5, \u0027KR\u0027, 800.00), (6, \u0027KR\u0027, 900.00);", + "sql": "SELECT c.country, AVG(ps.amount) as avg_spending FROM country_codes c INNER JOIN player_spending ps ON c.country_code \u003d ps.country_code WHERE c.country IN (\u0027China\u0027, \u0027Japan\u0027, \u0027South Korea\u0027) GROUP BY c.country ORDER BY avg_spending DESC LIMIT 3;", + "sql_explanation": "The SQL query joins the country_codes table and player_spending table on the country_code column. It calculates the average spending for each country and groups them. The WHERE clause filters the results to only include China, Japan, and South Korea. Finally, it orders the results by average spending in descending order and limits the results to the top 3 countries." +}, { + "id": "441", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of active players in each country, defined as players who have played a game in the last month, grouped by country.", + "sql_context": "CREATE TABLE players(id INT, name VARCHAR(50), country VARCHAR(50), last_login DATETIME); CREATE TABLE game_sessions(id INT, player_id INT, game_name VARCHAR(50), start_time DATETIME);", + "sql": "SELECT players.country, COUNT(DISTINCT players.id) as active_players FROM players JOIN game_sessions ON players.id \u003d game_sessions.player_id WHERE start_time \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH) GROUP BY players.country;", + "sql_explanation": "The query joins the players and game_sessions tables and filters for sessions that started in the last month. It then groups the data by player country and calculates the number of distinct active players for each country." +}, { + "id": "607", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the average number of daily active users (DAU) for the last 30 days in \u0027Fortnite\u0027", + "sql_context": "CREATE TABLE daily_usage (date DATE, game_id INT, daus INT, PRIMARY KEY (date, game_id)); INSERT INTO daily_usage VALUES (\u00272022-01-01\u0027, 1, 1000), (\u00272022-01-01\u0027, 2, 2000), (\u00272022-01-01\u0027, 3, 3000), (\u00272022-01-02\u0027, 1, 1100), (\u00272022-01-02\u0027, 2, 2100), (\u00272022-01-02\u0027, 3, 3100); CREATE TABLE game_titles (game_id INT, title VARCHAR(50), PRIMARY KEY (game_id)); INSERT INTO game_titles VALUES (1, \u0027Fortnite\u0027), (2, \u0027Minecraft\u0027), (3, \u0027Among Us\u0027);", + "sql": "SELECT AVG(du.daus) as avg_dau FROM daily_usage du INNER JOIN game_titles gt ON du.game_id \u003d gt.game_id WHERE gt.title \u003d \u0027Fortnite\u0027 AND du.date BETWEEN DATE_SUB(CURRENT_DATE, INTERVAL 30 DAY) AND CURRENT_DATE;", + "sql_explanation": "The SQL query joins the daily_usage table and game_titles table on the game_id column. It calculates the average number of daily active users (DAU) for the last 30 days in \u0027Fortnite\u0027 and groups them. The WHERE clause filters the results to only include rows where the game title is \u0027Fortnite\u0027 and the date is within the last 30 days. Finally, it calculates the average DAU for the specified time period." +}, { + "id": "933", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the total playtime of each player, ordered by the total playtime in descending order.", + "sql_context": "CREATE TABLE players (id INT, name VARCHAR(255)); INSERT INTO players (id, name) VALUES (1, \u0027Player1\u0027), (2, \u0027Player2\u0027), (3, \u0027Player3\u0027), (4, \u0027Player4\u0027), (5, \u0027Player5\u0027); CREATE TABLE player_games (player_id INT, game_id INT, playtime INT); INSERT INTO player_games (player_id, game_id, playtime) VALUES (1, 1, 100), (2, 2, 200), (3, 3, 300), (4, 1, 400), (5, 2, 500), (1, 4, 600), (2, 3, 700), (3, 1, 800), (4, 2, 900), (5, 3, 1000); CREATE TABLE games (id INT, name VARCHAR(255)); INSERT INTO games (id, name) VALUES (1, \u0027Game1\u0027), (2, \u0027Game2\u0027), (3, \u0027Game3\u0027), (4, \u0027Game4\u0027);", + "sql": "SELECT players.name, SUM(player_games.playtime) as total_playtime FROM players JOIN player_games ON players.id \u003d player_games.player_id GROUP BY players.id ORDER BY total_playtime DESC;", + "sql_explanation": "List the total playtime of each player, ordered by the total playtime in descending order." +}, { + "id": "1333", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Remove any duplicate records in the games table", + "sql_context": "CREATE TABLE games (id INT PRIMARY KEY, player_id INT, game_name VARCHAR(100), last_played TIMESTAMP); INSERT INTO games VALUES (1, 1001, \u0027GameA\u0027, \u00272021-01-01 12:00:00\u0027), (2, 1002, \u0027GameB\u0027, \u00272021-02-15 14:30:00\u0027), (3, 1003, \u0027GameA\u0027, \u00272021-06-20 09:15:00\u0027), (4, 1001, \u0027GameA\u0027, \u00272021-01-01 12:00:00\u0027);", + "sql": "DELETE t1 FROM games t1 INNER JOIN games t2 WHERE t1.id \u003c t2.id AND t1.player_id \u003d t2.player_id AND t1.game_name \u003d t2.game_name AND t1.last_played \u003d t2.last_played;", + "sql_explanation": "The DELETE statement uses a self-join to find records with duplicate player_id, game_name, and last_played values and deletes the records with the lower id." +}, { + "id": "1464", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all games and their total duration in the \u0027GameSessions\u0027 and \u0027GameDetails\u0027 tables", + "sql_context": "CREATE TABLE GameSessions (GameID INT, SessionDuration TIME); CREATE TABLE GameDetails (GameID INT, GameName VARCHAR(100));", + "sql": "SELECT gd.GameName, SUM(gs.SessionDuration) as TotalGameDuration FROM GameSessions gs INNER JOIN GameDetails gd ON gs.GameID \u003d gd.GameID GROUP BY gd.GameName;", + "sql_explanation": "This SQL query performs an inner join between GameSessions and GameDetails tables on GameID and calculates the sum of SessionDuration for each GameID to find the total duration for each game." +}, { + "id": "1686", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many games were released in each year, and what was the total number of games released?", + "sql_context": "CREATE TABLE Games (GameID INT PRIMARY KEY, Name VARCHAR(50), Genre VARCHAR(50), ReleaseDate DATE); CREATE VIEW GameYears AS SELECT YEAR(ReleaseDate) AS Year, COUNT(*) AS GamesReleased FROM Games GROUP BY YEAR(ReleaseDate); CREATE VIEW TotalGames AS SELECT COUNT(*) AS TotalGames FROM Games;", + "sql": "SELECT GameYears.Year, GameYears.GamesReleased, TotalGames.TotalGames FROM GameYears INNER JOIN TotalGames ON GameYears.Year \u003d TotalGames.TotalGames;", + "sql_explanation": "The SQL query joins the GameYears view and the TotalGames view on the Year column, selecting all years, the number of games released in each year, and the total number of games released." +}, { + "id": "1937", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many unique players have played the game \"StarshipWarriors\" across all regions?", + "sql_context": "CREATE TABLE Games (GameID INT, GameName VARCHAR(255));CREATE TABLE Players (PlayerID INT, PlayerName VARCHAR(255), GameID INT);", + "sql": "SELECT COUNT(DISTINCT p.PlayerID) as UniquePlayers FROM Games g JOIN Players p ON g.GameID \u003d p.GameID WHERE g.GameName \u003d \u0027StarshipWarriors\u0027;", + "sql_explanation": "This query joins the \"Games\" table with the \"Players\" table using the GameID. It then filters for the game \"StarshipWarriors\" and calculates the number of unique players who have played this game." +}, { + "id": "2606", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display player names and their average score for \u0027Overwatch\u0027.", + "sql_context": "CREATE TABLE Players (PlayerID INT, Name VARCHAR(50)); INSERT INTO Players (PlayerID, Name) VALUES (1, \u0027John Doe\u0027); INSERT INTO Players (PlayerID, Name) VALUES (2, \u0027Jane Smith\u0027); CREATE TABLE Overwatch_Scores (PlayerID INT, Score INT); INSERT INTO Overwatch_Scores (PlayerID, Score) VALUES (1, 100); INSERT INTO Overwatch_Scores (PlayerID, Score) VALUES (1, 120); INSERT INTO Overwatch_Scores (PlayerID, Score) VALUES (2, 150); INSERT INTO Overwatch_Scores (PlayerID, Score) VALUES (2, 160);", + "sql": "SELECT p.Name, AVG(os.Score) FROM Players p INNER JOIN Overwatch_Scores os ON p.PlayerID \u003d os.PlayerID GROUP BY p.Name;", + "sql_explanation": "The SQL query joins the Players and Overwatch_Scores tables, and then uses the GROUP BY clause to group the results by player name, and the AVG function to calculate the average score for each player." +}, { + "id": "35", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new employee records from a recent diverse hiring wave.", + "sql_context": "CREATE TABLE Employees (Employee_ID INT, First_Name VARCHAR(20), Last_Name VARCHAR(20), Department VARCHAR(20), Country VARCHAR(20), Join_Date DATE);", + "sql": "INSERT INTO Employees (Employee_ID, First_Name, Last_Name, Department, Country, Join_Date) VALUES (101, \u0027Sophia\u0027, \u0027Mendoza\u0027, \u0027Marketing\u0027, \u0027Mexico\u0027, \u00272022-06-15\u0027), (102, \u0027Tariq\u0027, \u0027Ahmed\u0027, \u0027IT\u0027, \u0027Pakistan\u0027, \u00272022-07-02\u0027), (103, \u0027Emi\u0027, \u0027Watanabe\u0027, \u0027Finance\u0027, \u0027Japan\u0027, \u00272022-07-10\u0027), (104, \u0027Alex\u0027, \u0027Shaw\u0027, \u0027Sales\u0027, \u0027Canada\u0027, \u00272022-07-15\u0027), (105, \u0027Biyu\u0027, \u0027Zhang\u0027, \u0027Engineering\u0027, \u0027China\u0027, \u00272022-07-25\u0027);", + "sql_explanation": "Add new employee records from a recent diverse hiring wave." +}, { + "id": "183", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of employees who have completed technical and non-technical training, by department?", + "sql_context": "CREATE TABLE departments (dept_id INT, dept_name TEXT); INSERT INTO departments (dept_id, dept_name) VALUES (1, \u0027HR\u0027), (2, \u0027IT\u0027), (3, \u0027Sales\u0027); CREATE TABLE employees (employee_id INT, name TEXT, dept_id INT, technical_training BOOLEAN, non_technical_training BOOLEAN); INSERT INTO employees (employee_id, name, dept_id, technical_training, non_technical_training) VALUES (1, \u0027Alice\u0027, 1, TRUE, TRUE), (2, \u0027Bob\u0027, 2, FALSE, TRUE), (3, \u0027Charlie\u0027, 1, TRUE, FALSE), (4, \u0027Dave\u0027, 2, TRUE, TRUE), (5, \u0027Eve\u0027, 1, FALSE, FALSE);", + "sql": "SELECT dept_name, SUM(CASE WHEN technical_training THEN 1 ELSE 0 END) AS num_technical_trained, SUM(CASE WHEN non_technical_training THEN 1 ELSE 0 END) AS num_non_technical_trained FROM employees JOIN departments ON employees.dept_id \u003d departments.dept_id GROUP BY dept_name;", + "sql_explanation": "The SQL query joins the employees and departments tables based on the department ID. It then pivots the technical and non-technical training data using the CASE statement and counts the number of employees who have completed each type of training in each department. The results are grouped by department name." +}, { + "id": "239", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average salary of developers in the IT department, grouped by their employment type?", + "sql_context": "CREATE TABLE Employees (EmployeeID int, FirstName varchar(50), LastName varchar(50), Department varchar(50), EmploymentType varchar(50), Salary int); INSERT INTO Employees (EmployeeID, FirstName, LastName, Department, EmploymentType, Salary) VALUES (1, \u0027John\u0027, \u0027Doe\u0027, \u0027IT\u0027, \u0027Full-time\u0027, 70000), (2, \u0027Jane\u0027, \u0027Doe\u0027, \u0027IT\u0027, \u0027Part-time\u0027, 35000), (3, \u0027Jim\u0027, \u0027Smith\u0027, \u0027IT\u0027, \u0027Contractor\u0027, 90000); CREATE TABLE Departments (Department varchar(50), Manager varchar(50)); INSERT INTO Departments (Department, Manager) VALUES (\u0027IT\u0027, \u0027John Doe\u0027);", + "sql": "SELECT Employees.EmploymentType, AVG(Employees.Salary) as Avg_Salary FROM Employees INNER JOIN Departments ON Employees.Department \u003d Departments.Department WHERE Employees.Department \u003d \u0027IT\u0027 AND Employees.JobTitle \u003d \u0027Developer\u0027 GROUP BY Employees.EmploymentType;", + "sql_explanation": "The SQL query joins the Employees and Departments tables on the Department column. It filters for the IT department and developers, groups the results by employment type, and calculates the average salary for each group." +}, { + "id": "321", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the recruiters that have hired the most employees in the HR department?", + "sql_context": "CREATE TABLE Employees (Employee_ID INT PRIMARY KEY, First_Name VARCHAR(30), Last_Name VARCHAR(30), Department VARCHAR(30), Recruiter_ID INT); CREATE TABLE Recruiters (Recruiter_ID INT PRIMARY KEY, First_Name VARCHAR(30), Last_Name VARCHAR(30)); INSERT INTO Employees (Employee_ID, First_Name, Last_Name, Department, Recruiter_ID) VALUES (1, \u0027Grace\u0027, \u0027Thomas\u0027, \u0027HR\u0027, 1001); INSERT INTO Employees (Employee_ID, First_Name, Last_Name, Department, Recruiter_ID) VALUES (2, \u0027Harry\u0027, \u0027Miller\u0027, \u0027HR\u0027, 1002); INSERT INTO Recruiters (Recruiter_ID, First_Name, Last_Name) VALUES (1001, \u0027Ivan\u0027, \u0027Taylor\u0027); INSERT INTO Recruiters (Recruiter_ID, First_Name, Last_Name) VALUES (1002, \u0027Judy\u0027, \u0027Washington\u0027);", + "sql": "SELECT Recruiters.First_Name, Recruiters.Last_Name, COUNT(*) as Number_of_Hires FROM Employees INNER JOIN Recruiters ON Employees.Recruiter_ID \u003d Recruiters.Recruiter_ID WHERE Department \u003d \u0027HR\u0027 GROUP BY Recruiter_ID ORDER BY Number_of_Hires DESC;", + "sql_explanation": "This SQL query identifies the recruiters that have hired the most employees in the HR department." +}, { + "id": "329", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all managers and their respective departments who have not completed diversity and inclusion training.", + "sql_context": "CREATE TABLE Employees (EmployeeID INT, FirstName VARCHAR(50), LastName VARCHAR(50), Department VARCHAR(50), Position VARCHAR(50)); CREATE TABLE DiversityTraining (EmployeeID INT, TrainingID INT, Completed DATE); INSERT INTO Employees (EmployeeID, FirstName, LastName, Department, Position) VALUES (1, \u0027John\u0027, \u0027Doe\u0027, \u0027IT\u0027, \u0027Manager\u0027), (2, \u0027Jane\u0027, \u0027Smith\u0027, \u0027HR\u0027, \u0027Manager\u0027); INSERT INTO DiversityTraining (EmployeeID, TrainingID, Completed) VALUES (1, 1, \u00272021-06-01\u0027);", + "sql": "SELECT Employees.FirstName, Employees.LastName, Employees.Department FROM Employees LEFT JOIN DiversityTraining ON Employees.EmployeeID \u003d DiversityTraining.EmployeeID WHERE Employees.Position \u003d \u0027Manager\u0027 AND DiversityTraining.Completed IS NULL;", + "sql_explanation": "This query lists all managers and their respective departments who have not completed diversity and inclusion training. It first creates two tables: Employees and DiversityTraining. Then, the query performs a LEFT JOIN on these two tables based on the EmployeeID. The WHERE clause filters the results to only show managers (Position\u003d\u0027Manager\u0027) who have not completed the training (DiversityTraining.Completed IS NULL)." +}, { + "id": "1101", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the percentage of female employees in each department.", + "sql_context": "CREATE TABLE departments (dept_id INT, dept_name VARCHAR(255));CREATE TABLE employees (emp_id INT, emp_name VARCHAR(255), dept_id INT, gender VARCHAR(10)); INSERT INTO departments (dept_id, dept_name) VALUES (1, \u0027HR\u0027), (2, \u0027IT\u0027); INSERT INTO employees (emp_id, emp_name, dept_id, gender) VALUES (1, \u0027John Doe\u0027, 1, \u0027Male\u0027), (2, \u0027Jane Smith\u0027, 1, \u0027Female\u0027), (3, \u0027Alice Johnson\u0027, 2, \u0027Female\u0027), (4, \u0027Bob Brown\u0027, 2, \u0027Male\u0027);", + "sql": "SELECT dept_name, (COUNT(*) FILTER (WHERE gender \u003d \u0027Female\u0027) * 100.0 / COUNT(*)) as pct_female FROM departments d JOIN employees e ON d.dept_id \u003d e.dept_id GROUP BY dept_name;", + "sql_explanation": "This query calculates the percentage of female employees in each department. It joins the departments and employees tables, groups the results by department and gender, and calculates the percentage of female employees in each department." +}, { + "id": "1229", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the job titles with the lowest average salaries", + "sql_context": "CREATE TABLE Employees (id INT, job_title VARCHAR(50), salary DECIMAL(10, 2)); CREATE TABLE Departments (id INT, employee_id INT, department_name VARCHAR(50));", + "sql": "SELECT job_title, AVG(salary) AS avg_salary FROM Employees JOIN Departments ON Employees.id \u003d Departments.employee_id GROUP BY job_title ORDER BY avg_salary ASC LIMIT 5;", + "sql_explanation": "The SQL query joins the Employees and Departments table on the employee_id column. It then calculates the average salary for each job_title and orders the results in ascending order, returning the top 5 job titles with the lowest average salaries." +}, { + "id": "1426", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of employees by department and job level?", + "sql_context": "CREATE TABLE departments (dept_id INT, dept_name TEXT); INSERT INTO departments (dept_id, dept_name) VALUES (1, \u0027HR\u0027), (2, \u0027IT\u0027), (3, \u0027Sales\u0027); CREATE TABLE employees (employee_id INT, name TEXT, salary INT, dept_id INT, job_level TEXT); INSERT INTO employees (employee_id, name, salary, dept_id, job_level) VALUES (1, \u0027Alice\u0027, 50000, 1, \u0027Manager\u0027), (2, \u0027Bob\u0027, 60000, 2, \u0027Senior\u0027), (3, \u0027Charlie\u0027, 55000, 1, \u0027Junior\u0027), (4, \u0027Dave\u0027, 65000, 2, \u0027Senior\u0027), (5, \u0027Eve\u0027, 52000, 1, \u0027Manager\u0027);", + "sql": "SELECT dept_name, job_level, COUNT(*) AS num_employees FROM employees JOIN departments ON employees.dept_id \u003d departments.dept_id GROUP BY dept_name, job_level;", + "sql_explanation": "The SQL query joins the employees and departments tables based on the department ID. It then groups the employees by department and job level, and counts the number of employees in each group using the COUNT function." +}, { + "id": "1542", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all employees who have changed departments in the \u0027hr\u0027 schema\u0027s \u0027employee_moves\u0027 table and the \u0027hr\u0027 schema\u0027s \u0027employee_details\u0027 table", + "sql_context": "CREATE TABLE hr.employee_moves (id INT, employee_id INT, old_dept VARCHAR(50), new_dept VARCHAR(50), move_date DATE); CREATE TABLE hr.employee_details (id INT, employee_id INT, first_name VARCHAR(50), last_name VARCHAR(50), department VARCHAR(50));", + "sql": "SELECT e.first_name, e.last_name FROM hr.employee_details e INNER JOIN hr.employee_moves m ON e.employee_id \u003d m.employee_id WHERE m.old_dept !\u003d m.new_dept;", + "sql_explanation": "The SQL query performs an inner join between the \u0027employee_details\u0027 and \u0027employee_moves\u0027 tables on the \u0027employee_id\u0027 column. It then filters the results to only include rows where the \u0027old_dept\u0027 (old department) and \u0027new_dept\u0027 (new department) columns have different values." +}, { + "id": "2133", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average salary for each job position from the \u0027salary\u0027 and \u0027position\u0027 tables", + "sql_context": "CREATE TABLE salary (id INT, employee_id INT, amount DECIMAL(5,2)); CREATE TABLE position (id INT, title VARCHAR(50), department_id INT, salary INT);", + "sql": "SELECT position.title, AVG(salary.amount) FROM position INNER JOIN salary ON position.id \u003d salary.employee_id GROUP BY position.title;", + "sql_explanation": "The SQL query performs an inner join between the \u0027salary\u0027 and \u0027position\u0027 tables based on the \u0027id\u0027 column, groups the results by the job title, and calculates the average salary for each job position." +}, { + "id": "2164", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many employees identify as a gender other than male or female?", + "sql_context": "CREATE TABLE Genders (Gender VARCHAR(10)); INSERT INTO Genders (Gender) VALUES (\u0027Male\u0027), (\u0027Female\u0027), (\u0027Non-binary\u0027);", + "sql": "SELECT COUNT(*) FROM Employees INNER JOIN Genders ON Employees.Gender \u003d Genders.Gender WHERE Gender !\u003d \u0027Male\u0027 AND Gender !\u003d \u0027Female\u0027;", + "sql_explanation": "The SQL query joins the Employees and Genders tables on the Gender column, then filters for records where Gender is neither \u0027Male\u0027 nor \u0027Female\u0027, and finally counts the number of records. The result is a single value: the count of employees identifying as a gender other than male or female." +}, { + "id": "3006", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total salary expense for the HR department?", + "sql_context": "CREATE TABLE departments (id INT, name VARCHAR(50), budget FLOAT); INSERT INTO departments (id, name, budget) VALUES (1, \u0027HR\u0027, 300000.00), (2, \u0027IT\u0027, 500000.00); CREATE TABLE employees (id INT, name VARCHAR(50), department VARCHAR(50), salary FLOAT); INSERT INTO employees (id, name, department, salary) VALUES (1, \u0027John Doe\u0027, \u0027HR\u0027, 60000.00), (2, \u0027Jane Smith\u0027, \u0027HR\u0027, 70000.00);", + "sql": "SELECT SUM(e.salary) FROM employees e INNER JOIN departments d ON e.department \u003d d.name WHERE d.name \u003d \u0027HR\u0027;", + "sql_explanation": "This SQL query calculates the total salary expense for the HR department. It uses the SUM function to find the total salary expense and joins the employees and departments tables to get the budget for the HR department." +}, { + "id": "84", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of resources depleted by coal mining operations, and which sites have depleted more than 80% of their total resources?", + "sql_context": "CREATE TABLE mining_sites (id INT, site_name TEXT, resource_type TEXT, total_resources_available INT);CREATE TABLE resources_depleted (site_id INT, amount_depleted INT);", + "sql": "SELECT r.resource_type, s.site_name, SUM(r.amount_depleted) as total_depleted, s.total_resources_available FROM mining_sites s JOIN resources_depleted r ON s.id \u003d r.site_id WHERE r.resource_type \u003d \u0027coal\u0027 GROUP BY r.resource_type, s.site_name HAVING SUM(r.amount_depleted) / s.total_resources_available \u003e 0.8;", + "sql_explanation": "The SQL query joins the mining_sites table with the resources_depleted table on the site_id column. It then filters the results to only include coal mining operations. The query groups the results by the resource_type and site_name columns and calculates the total amount of resources depleted for each combination. The HAVING clause filters the results to only include coal mining sites where more than 80% of the total resources have been depleted." +}, { + "id": "415", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total water usage per month for the last year?", + "sql_context": "CREATE TABLE date (date_id INT, date DATE); INSERT INTO date (date_id, date) VALUES (1, \u00272022-01-01\u0027), (2, \u00272022-02-01\u0027), (3, \u00272022-03-01\u0027), (4, \u00272022-04-01\u0027), (5, \u00272022-05-01\u0027), (6, \u00272022-06-01\u0027), (7, \u00272022-07-01\u0027), (8, \u00272022-08-01\u0027), (9, \u00272022-09-01\u0027), (10, \u00272022-10-01\u0027), (11, \u00272022-11-01\u0027), (12, \u00272022-12-01\u0027); CREATE TABLE water_usage (usage_id INT, date_id INT, water_usage_m3 FLOAT); INSERT INTO water_usage (usage_id, date_id, water_usage_m3) VALUES (1, 1, 1000), (2, 1, 1200), (3, 2, 1500), (4, 2, 1800), (5, 3, 2000), (6, 3, 2200), (7, 4, 1500), (8, 4, 1700), (9, 5, 1200), (10, 5, 1400), (11, 6, 1000), (12, 6, 1100), (13, 7, 1500), (14, 7, 1800), (15, 8, 2000), (16, 8, 2200), (17, 9, 1500), (18, 9, 1700), (19, 10, 1200), (20, 10, 1400), (21, 11, 1000), (22, 11, 1100), (23, 12, 1500), (24, 12, 1800);", + "sql": "SELECT EXTRACT(MONTH FROM d.date) as month, AVG(w.water_usage_m3) as avg_water_usage_m3 FROM water_usage w JOIN date d ON w.date_id \u003d d.date_id WHERE d.date BETWEEN \u00272021-01-01\u0027 AND \u00272022-12-31\u0027 GROUP BY EXTRACT(MONTH FROM d.date);", + "sql_explanation": "This SQL query joins the water_usage and date tables, filters the data for the last year, groups water usage by month, and calculates the average water usage for each month." +}, { + "id": "431", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of resources extracted by the top 3 mining sites in the last year?", + "sql_context": "CREATE TABLE MiningSites(id INT, name VARCHAR(30), location VARCHAR(30)); CREATE TABLE ResourceExtraction(site_id INT, date DATE, resources_extracted INT);", + "sql": "SELECT m.name, SUM(re.resources_extracted) FROM MiningSites m JOIN ResourceExtraction re ON m.id \u003d re.site_id WHERE date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) GROUP BY m.id ORDER BY SUM(re.resources_extracted) DESC LIMIT 3;", + "sql_explanation": "The SQL query calculates the total amount of resources extracted by the top 3 mining sites in the last year by joining the MiningSites and ResourceExtraction tables, filtering for rows with dates within the last year, grouping by mining site, sorting by the total resources extracted in descending order, and limiting the results to the top 3 mining sites." +}, { + "id": "460", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all mining equipment and their maintenance dates, with maintenance dates more than 6 months ago", + "sql_context": "CREATE TABLE Equipment (id INT, name VARCHAR(255), type VARCHAR(255), location VARCHAR(255)); INSERT INTO Equipment (id, name, type, location) VALUES (1, \u0027Bulldozer\u0027, \u0027Heavy\u0027, \u0027Asia Pacific\u0027), (2, \u0027Excavator\u0027, \u0027Heavy\u0027, \u0027North America\u0027), (3, \u0027Crane\u0027, \u0027Medium\u0027, \u0027Asia Pacific\u0027), (4, \u0027Pickup Truck\u0027, \u0027Light\u0027, \u0027South America\u0027); CREATE TABLE Maintenance (id INT, equipment_id INT, date DATE); INSERT INTO Maintenance (id, equipment_id, date) VALUES (1, 1, \u00272022-01-01\u0027), (2, 1, \u00272022-04-01\u0027), (3, 3, \u00272022-02-15\u0027), (4, 3, \u00272022-07-20\u0027), (5, 2, \u00272021-06-15\u0027), (6, 4, \u00272022-08-01\u0027);", + "sql": "SELECT Equipment.name, Maintenance.date FROM Equipment INNER JOIN Maintenance ON Equipment.id \u003d Maintenance.equipment_id WHERE Equipment.location \u003d \u0027Asia Pacific\u0027 AND Maintenance.date \u003c DATE_SUB(CURDATE(), INTERVAL 6 MONTH);", + "sql_explanation": "This SQL query performs an inner join on the \u0027Equipment\u0027 and \u0027Maintenance\u0027 tables, matching rows based on the \u0027equipment_id\u0027 column. The WHERE clause filters the results to only show equipment located in the Asia Pacific region and maintenance dates more than 6 months ago using the DATE_SUB() function. The result is a list of all mining equipment and their maintenance dates, with maintenance dates more than 6 months ago." +}, { + "id": "539", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total amount of resources depleted for each mining operation.", + "sql_context": "CREATE TABLE mining_operations (id INT, name VARCHAR(50)); CREATE TABLE resource_depletion (id INT, mining_operation_id INT, amount_depleted FLOAT);", + "sql": "SELECT mining_operations.name, SUM(resource_depletion.amount_depleted) FROM mining_operations JOIN resource_depletion ON mining_operations.id \u003d resource_depletion.mining_operation_id GROUP BY mining_operations.name;", + "sql_explanation": "This query calculates the total amount of resources depleted for each mining operation by joining the \u0027mining_operations\u0027 and \u0027resource_depletion\u0027 tables on the \u0027id\u0027 column, and then using the SUM function to sum the \u0027amount_depleted\u0027 column for each mining operation." +}, { + "id": "842", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which mining operations have no CO2 emissions data?", + "sql_context": "CREATE TABLE mining_operations (operation_id INT, operation_name VARCHAR(50), location VARCHAR(50)); CREATE TABLE co2_emissions (operation_id INT, co2_emissions_tonnes INT); INSERT INTO mining_operations (operation_id, operation_name, location) VALUES (1, \u0027Operation A\u0027, \u0027USA\u0027), (2, \u0027Operation B\u0027, \u0027Canada\u0027), (3, \u0027Operation C\u0027, \u0027Mexico\u0027), (4, \u0027Operation D\u0027, \u0027USA\u0027); INSERT INTO co2_emissions (operation_id, co2_emissions_tonnes) VALUES (1, 1000), (2, 1500), (3, 500);", + "sql": "SELECT mining_operations.operation_name FROM mining_operations LEFT JOIN co2_emissions ON mining_operations.operation_id \u003d co2_emissions.operation_id WHERE co2_emissions.operation_id IS NULL;", + "sql_explanation": "The SQL query finds mining operations with no CO2 emissions data by using a LEFT JOIN to combine the mining_operations and co2_emissions tables and then filtering the results where the co2_emissions.operation_id is NULL." +}, { + "id": "1204", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many employees of underrepresented communities work in each mining site?", + "sql_context": "CREATE TABLE mining_sites (site_id INT, site_name TEXT, location TEXT); INSERT INTO mining_sites (site_id, site_name, location) VALUES (1, \u0027Sila Copper\u0027, \u0027Nairobi, Kenya\u0027), (2, \u0027Tumazoz Silver\u0027, \u0027Tangier, Morocco\u0027), (3, \u0027Kavango Coal\u0027, \u0027Windhoek, Namibia\u0027); CREATE TABLE employees (employee_id INT, name TEXT, gender TEXT, community TEXT, job_title TEXT, mining_site_id INT); INSERT INTO employees (employee_id, name, gender, community, job_title, mining_site_id) VALUES (1, \u0027Aisha Mohamed\u0027, \u0027Female\u0027, \u0027Historically marginalized\u0027, \u0027Miner\u0027, 1), (2, \u0027Ali Omar\u0027, \u0027Male\u0027, \u0027Indigenous\u0027, \u0027Engineer\u0027, 1), (3, \u0027Fatima Ahmed\u0027, \u0027Female\u0027, \u0027Refugee\u0027, \u0027Manager\u0027, 2), (4, \u0027Ahmed Hassan\u0027, \u0027Male\u0027, \u0027LGBTQIA+\u0027, \u0027Miner\u0027, 2), (5, \u0027Zainab Nassir\u0027, \u0027Female\u0027, \u0027Person with disability\u0027, \u0027Engineer\u0027, 3);", + "sql": "SELECT site_name, community, COUNT(*) AS employee_count FROM employees JOIN mining_sites ON employees.mining_site_id \u003d mining_sites.site_id GROUP BY site_name, community;", + "sql_explanation": "The SQL query joins the employees and mining_sites tables on the mining_site_id. It then groups the records by site_name and community and calculates the count of records for each group." +}, { + "id": "1205", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the number of employees, average tenure, and mines with more than 10 years of operation for mines in the African continent.", + "sql_context": "CREATE TABLE production_data (id INT PRIMARY KEY, mine_id INT, year INT, monthly_production INT);CREATE TABLE reclamation_data (id INT PRIMARY KEY, mine_id INT, year INT, reclamation_cost INT);CREATE TABLE mine_employees (id INT PRIMARY KEY, mine_id INT, employee_id INT, employment_start_date DATE, employment_end_date DATE);CREATE TABLE employee_demographics (id INT PRIMARY KEY, employee_id INT, gender VARCHAR(255), ethnicity VARCHAR(255));CREATE VIEW employee_stats AS SELECT mine_id, COUNT(employee_id) as employee_count, AVG(DATEDIFF(employment_end_date, employment_start_date))/365 as avg_tenure FROM mine_employees GROUP BY mine_id;CREATE VIEW operation_duration AS SELECT mine_id, COUNT(DISTINCT year) as operation_years FROM production_data GROUP BY mine_id;", + "sql": "SELECT e.mine_id, e.employee_count, e.avg_tenure, o.operation_years FROM employee_stats e JOIN operation_duration o ON e.mine_id \u003d o.mine_id WHERE o.operation_years \u003e 10;", + "sql_explanation": "This query calculates the employee count, average tenure, and operation years for each mine in the African continent, filtered for mines with more than 10 years of operation." +}, { + "id": "12", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 3 companies by R\u0026D expenditures in the oncology department that have the highest sales growth between 2017 and 2018, excluding companies from North America.", + "sql_context": "CREATE TABLE companies (id INT, name VARCHAR(255), department VARCHAR(255), expenditures FLOAT, sales FLOAT, company_location VARCHAR(255)); INSERT INTO companies (id, name, department, expenditures, sales, company_location) VALUES (1, \u0027Global Pharma\u0027, \u0027Oncology\u0027, 15000000, 50000000, \u0027Europe\u0027), (2, \u0027BioTech Asia\u0027, \u0027Oncology\u0027, 12000000, 40000000, \u0027Asia\u0027), (3, \u0027Pharma Oceania\u0027, \u0027Cardiology\u0027, 9000000, 30000000, \u0027Oceania\u0027), (4, \u0027American Health\u0027, \u0027Oncology\u0027, 10000000, 60000000, \u0027North America\u0027), (5, \u0027South American Bio\u0027, \u0027Oncology\u0027, 11000000, 45000000, \u0027South America\u0027);", + "sql": "SELECT a.name, a.expenditures, a.sales, ((a.sales / b.sales - 1) * 100) AS sales_growth FROM companies a INNER JOIN companies b ON a.name \u003d b.name AND a.department \u003d b.department AND a.company_location \u003d b.company_location WHERE a.department \u003d \u0027Oncology\u0027 AND a.company_location NOT IN (\u0027North America\u0027) AND b.department \u003d \u0027Oncology\u0027 AND b.company_location NOT IN (\u0027North America\u0027) GROUP BY a.name, a.expenditures, a.sales ORDER BY a.expenditures DESC, sales_growth DESC LIMIT 3;", + "sql_explanation": "The SQL query lists the top 3 companies by R\u0026D expenditures in the oncology department that have the highest sales growth between 2017 and 2018, excluding companies from North America, by using a self-join to compare the sales of each company in the oncology department between 2017 and 2018. The outer query then selects the top 3 companies by R\u0026D expenditures and sales growth by grouping the results by name, expenditures, and sales, and ordering them by expenditures in descending order and sales growth in descending order. The sales growth is calculated as the percentage change in sales between 2017 and 2018." +}, { + "id": "180", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the percentage of drugs that have been approved out of all drugs in clinical trials.", + "sql_context": "CREATE TABLE clinical_trials (id INT PRIMARY KEY, trial_name VARCHAR(50), drug_name VARCHAR(50), trial_status VARCHAR(20), start_date DATE); CREATE TABLE drug_approval (id INT PRIMARY KEY, drug_name VARCHAR(50), approval_date DATE);", + "sql": "SELECT COUNT(DISTINCT ca.drug_name) as total_drugs, COUNT(DISTINCT da.drug_name) as approved_drugs, (COUNT(DISTINCT da.drug_name) * 100.0 / COUNT(DISTINCT ca.drug_name)) as approval_percentage FROM clinical_trials ca LEFT JOIN drug_approval da ON ca.drug_name \u003d da.drug_name;", + "sql_explanation": "Joins clinical_trials and drug_approval tables, calculates the total number of drugs and the number of approved drugs, and determines the percentage of approved drugs." +}, { + "id": "270", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total sales for each drug in the Canadian market that has an approved clinical trial?", + "sql_context": "CREATE TABLE drug_sales (drug_name TEXT, year INTEGER, sales INTEGER, market TEXT); INSERT INTO drug_sales (drug_name, year, sales, market) VALUES (\u0027DrugA\u0027, 2018, 1000000, \u0027Canada\u0027); INSERT INTO drug_sales (drug_name, year, sales, market) VALUES (\u0027DrugB\u0027, 2018, 2000000, \u0027Canada\u0027); INSERT INTO drug_sales (drug_name, year, sales, market) VALUES (\u0027DrugA\u0027, 2019, 1200000, \u0027Canada\u0027); INSERT INTO drug_sales (drug_name, year, sales, market) VALUES (\u0027DrugB\u0027, 2019, 2500000, \u0027Canada\u0027); CREATE TABLE clinical_trials (drug_name TEXT, trial_status TEXT, market TEXT); INSERT INTO clinical_trials (drug_name, trial_status, market) VALUES (\u0027DrugA\u0027, \u0027Approved\u0027, \u0027Canada\u0027); INSERT INTO clinical_trials (drug_name, trial_status, market) VALUES (\u0027DrugB\u0027, \u0027Rejected\u0027, \u0027Canada\u0027);", + "sql": "SELECT drug_sales.drug_name, SUM(drug_sales.sales) FROM drug_sales JOIN clinical_trials ON drug_sales.drug_name \u003d clinical_trials.drug_name WHERE clinical_trials.market \u003d \u0027Canada\u0027 AND clinical_trials.trial_status \u003d \u0027Approved\u0027 GROUP BY drug_sales.drug_name;", + "sql_explanation": "This query calculates the total sales for each drug in the Canadian market that has an approved clinical trial by joining the drug sales and clinical trials data on the drug_name column, filtering for rows where the market is \u0027Canada\u0027 and the trial_status is \u0027Approved\u0027, and grouping the sales data by drug_name, then summing up the sales values for each group." +}, { + "id": "772", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify drugs approved in the EU, but not yet available in the UK market.", + "sql_context": "CREATE TABLE drugs (drug_id INT, drug_name VARCHAR(255), manufacturer VARCHAR(255), approval_status VARCHAR(255), market_availability VARCHAR(255)); INSERT INTO drugs (drug_id, drug_name, manufacturer, approval_status, market_availability) VALUES (1, \u0027DrugX\u0027, \u0027ManufacturerA\u0027, \u0027Approved\u0027, \u0027Available in EU\u0027), (2, \u0027DrugY\u0027, \u0027ManufacturerB\u0027, \u0027Approved\u0027, \u0027Not available in UK\u0027); CREATE TABLE sales (sale_id INT, drug_id INT, sale_amount DECIMAL(10,2), sale_tax DECIMAL(10,2), country VARCHAR(255)); INSERT INTO sales (sale_id, drug_id, sale_amount, sale_tax, country) VALUES (1, 1, 0.00, 0.00, \u0027UK\u0027);", + "sql": "SELECT d.drug_name FROM drugs d JOIN sales s ON d.drug_id \u003d s.drug_id WHERE d.approval_status \u003d \u0027Approved\u0027 AND d.market_availability \u003d \u0027Available in EU\u0027 AND s.country !\u003d \u0027UK\u0027 GROUP BY d.drug_name;", + "sql_explanation": "The SQL query joins the drugs and sales tables based on drug_id. It filters for approved drugs available in the EU market and not available in the UK market, and returns the drug_name for each drug that meets the filter criteria." +}, { + "id": "967", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which manufacturers have more than 2 approved drugs that have sales revenue of more than 10 million in 2020?", + "sql_context": "CREATE TABLE sales (id INT PRIMARY KEY, drug_id INT, country VARCHAR(255), year INT, revenue DECIMAL(10,2)); CREATE TABLE drugs (id INT PRIMARY KEY, name VARCHAR(255), manufacturer VARCHAR(255), approval_date DATE);", + "sql": "SELECT manufacturer, COUNT(id) as total_drugs FROM drugs d JOIN sales s ON d.id \u003d s.drug_id WHERE s.year \u003d 2020 AND s.revenue \u003e 10000000 GROUP BY manufacturer HAVING total_drugs \u003e 2;", + "sql_explanation": "This SQL query retrieves the manufacturer and the count of approved drugs for manufacturers with more than 2 approved drugs that have sales revenue of more than 10 million in 2020. It joins the sales and drugs tables on the drug_id and id columns respectively. The result is then grouped by manufacturer and applies a filter with the HAVING clause to only include manufacturers with more than 2 drugs that have sales revenue of more than 10 million in 2020." +}, { + "id": "1271", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of Phase I, II, and III clinical trials for each drug in the neurology category?", + "sql_context": "CREATE TABLE drugs (id INT PRIMARY KEY, name VARCHAR(255), manufacturer VARCHAR(255), category VARCHAR(255)); CREATE TABLE clinical_trials (id INT PRIMARY KEY, name VARCHAR(255), drug_id INT, phase VARCHAR(10), start_date DATE, end_date DATE, FOREIGN KEY (drug_id) REFERENCES drugs(id)); INSERT INTO clinical_trials (id, name, drug_id, phase, start_date, end_date) VALUES (4, \u0027TrialB\u0027, 2, \u0027Phase I\u0027, \u00272020-01-01\u0027, \u00272020-12-31\u0027), (5, \u0027TrialC\u0027, 2, \u0027Phase II\u0027, \u00272020-01-01\u0027, \u00272020-12-31\u0027);", + "sql": "SELECT drugs.name, phase, COUNT(*) FROM drugs INNER JOIN clinical_trials ON drugs.id \u003d clinical_trials.drug_id WHERE category \u003d \u0027Neurology\u0027 GROUP BY drugs.name, phase;", + "sql_explanation": "This SQL query selects the number of Phase I, II, and III clinical trials for each drug in the neurology category by joining the drugs and clinical_trials tables on the drug_id foreign key, using the COUNT and GROUP BY functions, and filtering for clinical trials records that have a drug_id that exists in the drugs table and where the category is Neurology." +}, { + "id": "956", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many water treatment facilities are there in each country, and what is their distribution by continent?", + "sql_context": "CREATE TABLE facilities (id INT, facility_name VARCHAR(50), country VARCHAR(50), total_employees INT); INSERT INTO facilities (id, facility_name, country, total_employees) VALUES (1, \u0027Water Treatment Plant 1\u0027, \u0027Brazil\u0027, 25); INSERT INTO facilities (id, facility_name, country, total_employees) VALUES (2, \u0027Water Treatment Plant 2\u0027, \u0027India\u0027, 30);", + "sql": "SELECT country, COUNT(*) as facility_count, CONTINENT(location) as continent FROM facilities JOIN countries ON facilities.country \u003d countries.country_name GROUP BY country, continent;", + "sql_explanation": "The SQL query calculates the number of water treatment facilities in each country and categorizes them by continent using the CONTINENT() function." +}, { + "id": "1214", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum water consumption per day in each country?", + "sql_context": "CREATE TABLE countries (country_name VARCHAR(50), country_abbr VARCHAR(5), population INT); INSERT INTO countries (country_name, country_abbr, population) VALUES (\u0027Brazil\u0027, \u0027BR\u0027, 210147125), (\u0027India\u0027, \u0027IN\u0027, 1352617328), (\u0027Indonesia\u0027, \u0027ID\u0027, 273523615); CREATE TABLE water_consumption (country_abbr VARCHAR(5), consumption_gallons INT, consumption_date DATE); INSERT INTO water_consumption (country_abbr, consumption_gallons, consumption_date) VALUES (\u0027BR\u0027, 98345200, \u00272022-01-01\u0027), (\u0027IN\u0027, 87345200, \u00272022-01-02\u0027), (\u0027ID\u0027, 76345200, \u00272022-01-03\u0027);", + "sql": "SELECT c.country_name, MAX(w.consumption_gallons) as max_consumption FROM water_consumption w JOIN countries c ON w.country_abbr \u003d c.country_abbr GROUP BY c.country_name;", + "sql_explanation": "This query joins the countries and water_consumption tables on the country_abbr column. It then calculates the maximum water consumption per day for each country by grouping the results by the country_name column and selecting the maximum value of the consumption_gallons column." +}, { + "id": "1318", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average water waste per household in each city in 2021, along with the total water consumption in each city?", + "sql_context": "CREATE TABLE wastewater_treatment (household_id INT, city VARCHAR(30), year INT, waste_amount FLOAT); CREATE TABLE water_consumption (city VARCHAR(30), year INT, consumption FLOAT);", + "sql": "SELECT w.city, AVG(w.waste_amount), SUM(c.consumption) FROM wastewater_treatment w INNER JOIN water_consumption c ON w.city\u003dc.city WHERE w.year\u003d2021 GROUP BY w.city;", + "sql_explanation": "This query calculates the average water waste per household and the total water consumption in each city in 2021 by joining the wastewater_treatment and water_consumption tables on the city column, where the year in wastewater_treatment is 2021, and grouping the results by city." +}, { + "id": "2350", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many wastewater treatment plants are there in the Asian region?", + "sql_context": "CREATE TABLE regions (id INT, name VARCHAR(50), PRIMARY KEY(id)); INSERT INTO regions (id, name) VALUES (1, \u0027North America\u0027), (2, \u0027South America\u0027), (3, \u0027Asia\u0027), (4, \u0027Europe\u0027), (5, \u0027Africa\u0027); CREATE TABLE wastewater_treatment_plants (id INT, region INT, PRIMARY KEY(id), FOREIGN KEY (region) REFERENCES regions(id)); INSERT INTO wastewater_treatment_plants (id, region) VALUES (1, 1), (2, 1), (3, 3), (4, 3), (5, 3);", + "sql": "SELECT COUNT(*) as num_plants FROM wastewater_treatment_plants wwtp JOIN regions r ON wwtp.region \u003d r.id WHERE r.name \u003d \u0027Asia\u0027;", + "sql_explanation": "This query joins the wastewater_treatment_plants and regions tables on their common column (region). It then filters for rows where the region is \u0027Asia\u0027 and counts the number of wastewater treatment plants in that region." +}, { + "id": "333", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the number of unique visitors who attended both literary events and culinary workshops in the same month.", + "sql_context": "CREATE TABLE literary_events (id INT, visitor_id INT, visit_date DATE); CREATE TABLE culinary_workshops (id INT, visitor_id INT, visit_date DATE);", + "sql": "SELECT COUNT(DISTINCT visitor_id) AS unique_visitors FROM literary_events INNER JOIN culinary_workshops ON literary_events.visitor_id \u003d culinary_workshops.visitor_id AND MONTH(literary_events.visit_date) \u003d MONTH(culinary_workshops.visit_date);", + "sql_explanation": "The SQL query uses an INNER JOIN to find visitors who attended both literary events and culinary workshops in the same month. It then counts the number of unique visitors using the COUNT DISTINCT clause." +}, { + "id": "442", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many events received funding from the \"National Endowment for the Arts\" in the last 5 years?", + "sql_context": "CREATE TABLE events (event_id INT, event_name VARCHAR(50), event_date DATE); CREATE TABLE funding_sources (funding_id INT, event_id INT, source_name VARCHAR(50), funding_date DATE); INSERT INTO events (event_id, event_name, event_date) VALUES (1, \u0027Art in the Park\u0027, \u00272022-06-01\u0027), (2, \u0027Music Under the Stars\u0027, \u00272022-07-01\u0027); INSERT INTO funding_sources (funding_id, event_id, source_name, funding_date) VALUES (1, 1, \u0027National Endowment for the Arts\u0027, \u00272021-04-01\u0027), (2, 2, \u0027Local Arts Foundation\u0027, \u00272022-02-01\u0027);", + "sql": "SELECT COUNT(DISTINCT e.event_id) AS event_count FROM events e INNER JOIN funding_sources fs ON e.event_id \u003d fs.event_id WHERE fs.source_name \u003d \u0027National Endowment for the Arts\u0027 AND e.event_date \u003e\u003d DATEADD(year, -5, GETDATE());", + "sql_explanation": "This query counts the number of unique events that received funding from the \"National Endowment for the Arts\" in the last 5 years. It uses an inner join to combine the events and funding_sources tables, filters for the \"National Endowment for the Arts\" and events in the last 5 years, and then counts the distinct event IDs." +}, { + "id": "631", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many visitors attended \u0027Comedy Night\u0027 events at \u0027Comedy Club\u0027 in 2021?", + "sql_context": "CREATE TABLE if not exists venue (id INT, name VARCHAR(50)); CREATE TABLE if not exists event_calendar (id INT, venue_id INT, event_date DATE, event_name VARCHAR(50)); INSERT INTO venue (id, name) VALUES (1, \u0027Comedy Club\u0027); INSERT INTO event_calendar (id, venue_id, event_date, event_name) VALUES (1, 1, \u00272021-01-01\u0027, \u0027Comedy Night\u0027), (2, 1, \u00272021-06-12\u0027, \u0027Improv Show\u0027), (3, 1, \u00272022-09-28\u0027, \u0027Comedy Night\u0027);", + "sql": "SELECT COUNT(DISTINCT ec.id) FROM event_calendar ec JOIN venue v ON ec.venue_id \u003d v.id WHERE v.name \u003d \u0027Comedy Club\u0027 AND ec.event_name \u003d \u0027Comedy Night\u0027 AND ec.event_date BETWEEN \u00272021-01-01\u0027 AND \u00272021-12-31\u0027;", + "sql_explanation": "This query calculates the total number of unique visitors who attended \u0027Comedy Night\u0027 events at \u0027Comedy Club\u0027 in 2021 by joining the event_calendar and venue tables, filtering the rows based on venue, event name, and date, and counting the distinct id of visitors." +}, { + "id": "854", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total funding received by dance programs in urban areas?", + "sql_context": "CREATE SCHEMA if not exists arts_culture; CREATE TABLE if not exists arts_culture.programs(program_id INT, program_name VARCHAR(50), location VARCHAR(20), category VARCHAR(20)); CREATE TABLE if not exists arts_culture.funding(funding_id INT, program_id INT, amount INT);", + "sql": "SELECT SUM(funding.amount) FROM arts_culture.funding JOIN arts_culture.programs ON funding.program_id \u003d programs.program_id WHERE programs.location \u003d \u0027urban\u0027 AND programs.category \u003d \u0027dance\u0027;", + "sql_explanation": "This query calculates the total funding received by dance programs in urban areas. It does so by joining the \u0027funding\u0027 and \u0027programs\u0027 tables on the \u0027program_id\u0027 column. The WHERE clause filters the records for urban locations and dance programs, and the SUM function calculates the total amount of funding." +}, { + "id": "876", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many total volunteers contributed to community art projects in 2020?", + "sql_context": "CREATE TABLE volunteers (id INT PRIMARY KEY, name VARCHAR(50), hours_contributed INT, contribution_year INT); CREATE TABLE projects (id INT PRIMARY KEY, project_name VARCHAR(50), project_type VARCHAR(50));", + "sql": "SELECT SUM(hours_contributed) AS total_volunteer_hours FROM volunteers INNER JOIN projects ON volunteers.id \u003d projects.id WHERE project_type \u003d \u0027Community Art\u0027 AND contribution_year \u003d 2020;", + "sql_explanation": "Sum the total hours contributed by volunteers to community art projects in 2020 by joining the volunteers table and the projects table and filtering by the project type and contribution year." +}, { + "id": "880", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount donated by recurring donors for dance performances in San Francisco?", + "sql_context": "CREATE TABLE donors (donor_id INT, name VARCHAR(50), is_recurring BOOLEAN, city VARCHAR(50)); CREATE TABLE donations (donation_id INT, donor_id INT, amount DECIMAL(10,2), event_type VARCHAR(50)); INSERT INTO donors (donor_id, name, is_recurring, city) VALUES (1, \u0027Jane Smith\u0027, TRUE, \u0027San Francisco\u0027); INSERT INTO donations (donation_id, donor_id, amount, event_type) VALUES (1, 1, 100.00, \u0027Dance\u0027);", + "sql": "SELECT SUM(d.amount) AS total_donated FROM donations d JOIN donors don ON d.donor_id \u003d don.donor_id WHERE don.city \u003d \u0027San Francisco\u0027 AND don.is_recurring \u003d TRUE AND d.event_type \u003d \u0027Dance\u0027;", + "sql_explanation": "This query calculates the total amount donated by recurring donors for dance performances in San Francisco by joining the donations and donors tables, filtering rows where the city is \u0027San Francisco\u0027, the donor is recurring, and the event type is \u0027Dance\u0027, and then calculating the sum of donation amounts." +}, { + "id": "1406", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many visitors from Canada supported our organization through donations in 2022?", + "sql_context": "CREATE TABLE donors (donor_id INT, donation_amount DECIMAL(10,2), donation_date DATE); INSERT INTO donors (donor_id, donation_amount, donation_date) VALUES (1, 100, \u00272022-01-05\u0027), (2, 250, \u00272022-03-20\u0027), (3, 50, \u00272021-12-31\u0027), (4, 75, \u00272022-11-28\u0027); CREATE TABLE countries (country_id INT, country_name VARCHAR(50)); INSERT INTO countries (country_id, country_name) VALUES (1, \u0027Canada\u0027), (2, \u0027United States\u0027), (3, \u0027Mexico\u0027);", + "sql": "SELECT COUNT(*) FROM donors JOIN countries ON donors.donation_date \u003e\u003d \u00272022-01-01\u0027 AND donors.donation_date \u003c \u00272023-01-01\u0027 AND countries.country_name \u003d \u0027Canada\u0027;", + "sql_explanation": "First, we join the donors and countries tables based on the country_name. Then, we filter the records where the donation_date falls within 2022 and the country_name is \u0027Canada\u0027. Lastly, we count the number of these records to find out how many visitors from Canada supported our organization through donations in 2022." +}, { + "id": "1614", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the average age of attendees at jazz events in NY?", + "sql_context": "CREATE TABLE events (id INT, city VARCHAR(50), genre VARCHAR(50)); INSERT INTO events (id, city, genre) VALUES (1, \u0027NY\u0027, \u0027jazz\u0027), (2, \u0027LA\u0027, \u0027rock\u0027); CREATE TABLE attendee_demographics (event_id INT, age INT); INSERT INTO attendee_demographics (event_id, age) VALUES (1, 35), (1, 45), (1, 55), (2, 20), (2, 22);", + "sql": "SELECT AVG(age) FROM attendee_demographics JOIN events ON attendee_demographics.event_id \u003d events.id WHERE events.genre \u003d \u0027jazz\u0027 AND events.city \u003d \u0027NY\u0027;", + "sql_explanation": "The SQL query calculates the average age of attendees at jazz events in NY by joining attendee_demographics and events tables, filtering by genre \u0027jazz\u0027 and city \u0027NY\u0027, and then applying the AVG function to the age column." +}, { + "id": "1668", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which cultural events in the Western region had the highest attendance?", + "sql_context": "CREATE TABLE events (id INT, region VARCHAR(50), event_name VARCHAR(50)); INSERT INTO events (id, region, event_name) VALUES (1, \u0027Midwest\u0027, \u0027Art Festival\u0027), (2, \u0027Northeast\u0027, \u0027Music Gala\u0027), (3, \u0027West Coast\u0027, \u0027Theater Showcase\u0027), (4, \u0027Southeast\u0027, \u0027Dance Recital\u0027); CREATE TABLE attendance (event_id INT, attendees INT); INSERT INTO attendance (event_id, attendees) VALUES (1, 500), (1, 550), (2, 300), (3, 800), (3, 850), (4, 400);", + "sql": "SELECT events.event_name, MAX(attendance.attendees) FROM events JOIN attendance ON events.id \u003d attendance.event_id WHERE events.region \u003d \u0027West Coast\u0027;", + "sql_explanation": "The SQL query identifies the cultural events with the highest attendance in the Western region by joining events and attendance tables, filtering by region \u0027West Coast\u0027, and then applying the MAX function to the attendees column." +}, { + "id": "1775", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of attendees for music events?", + "sql_context": "CREATE TABLE events (event_id INT, event_type VARCHAR(50)); INSERT INTO events (event_id, event_type) VALUES (1, \u0027Dance\u0027), (2, \u0027Theater\u0027), (3, \u0027Music\u0027); CREATE TABLE attendees (attendee_id INT, event_id INT); INSERT INTO attendees (attendee_id, event_id) VALUES (1, 1), (2, 1), (3, 3), (4, 3), (5, 3);", + "sql": "SELECT COUNT(DISTINCT attendees.attendee_id) FROM attendees JOIN events ON attendees.event_id \u003d events.event_id WHERE events.event_type \u003d \u0027Music\u0027;", + "sql_explanation": "Join attendees and events tables on event_id, filter records for music events, and count distinct attendee_id." +}, { + "id": "2736", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all funding sources for \u0027Theater\u0027 programs in 2022.", + "sql_context": "CREATE TABLE if not exists program (id INT, name VARCHAR(50), category VARCHAR(50)); CREATE TABLE if not exists funding (id INT, program_id INT, year INT, amount DECIMAL(10, 2), source VARCHAR(50)); INSERT INTO program (id, name, category) VALUES (1, \u0027Theater 101\u0027, \u0027Theater\u0027), (2, \u0027Broadway Hits\u0027, \u0027Theater\u0027), (3, \u0027Shakespeare Plays\u0027, \u0027Theater\u0027); INSERT INTO funding (id, program_id, year, amount, source) VALUES (1, 1, 2020, 15000, \u0027City Grant\u0027), (2, 1, 2021, 17500, \u0027Private Donor\u0027), (3, 2, 2020, 12000, \u0027Corporate Sponsor\u0027), (4, 2, 2021, 14000, \u0027Government Grant\u0027), (5, 3, 2020, 16000, \u0027Private Donor\u0027), (6, 3, 2021, 18500, \u0027City Grant\u0027), (7, 1, 2022, 20000, \u0027Private Donor\u0027);", + "sql": "SELECT source FROM funding f JOIN program p ON f.program_id \u003d p.id WHERE p.name LIKE \u0027%Theater%\u0027 AND f.year \u003d 2022;", + "sql_explanation": "This query lists all funding sources for \u0027Theater\u0027 programs in 2022 by joining the funding and program tables, filtering the rows based on program name and year, and selecting the source column." +}, { + "id": "3093", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average donation amount for recurring donors?", + "sql_context": "CREATE TABLE Donors (donor_id INT, name VARCHAR(255), country VARCHAR(255), recurring BOOLEAN); CREATE TABLE Donations (donation_id INT, donor_id INT, event_id INT, amount DECIMAL(10, 2));", + "sql": "SELECT AVG(amount) FROM Donations D JOIN Donors DD ON D.donor_id \u003d DD.donor_id WHERE DD.recurring \u003d TRUE;", + "sql_explanation": "This query calculates the average donation amount for recurring donors by joining the Donations and Donors tables and filtering for recurring donors." +}, { + "id": "206", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which countries have the highest number of defense contracts?", + "sql_context": "CREATE TABLE Contract_Country (id INT, country VARCHAR(50), contract_count INT); INSERT INTO Contract_Country (id, country, contract_count) VALUES (1, \u0027USA\u0027, 50), (2, \u0027Canada\u0027, 30); CREATE TABLE Contract_Country_Mapping (contract_id INT, country_id INT); INSERT INTO Contract_Country_Mapping (contract_id, country_id) VALUES (1, 1), (2, 1), (3, 2);", + "sql": "SELECT Contract_Country.country, SUM(Contract_Country_Mapping.contract_id) AS contract_count FROM Contract_Country JOIN Contract_Country_Mapping ON Contract_Country.id \u003d Contract_Country_Mapping.country_id GROUP BY Contract_Country.country ORDER BY contract_count DESC;", + "sql_explanation": "We select the country and the sum of the contract_id from the Contract_Country_Mapping table, join it with the Contract_Country table based on the country_id, group the results by country, and order the results by the contract_count in descending order." +}, { + "id": "2267", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display veteran unemployment rates by gender", + "sql_context": "CREATE TABLE veteran_population (state TEXT, gender TEXT, veterans INT, total_population INT); INSERT INTO veteran_population (state, gender, veterans, total_population) VALUES (\u0027California\u0027, \u0027Male\u0027, 1200000, 25000000), (\u0027California\u0027, \u0027Female\u0027, 800000, 25000000), (\u0027Texas\u0027, \u0027Male\u0027, 900000, 30000000), (\u0027Texas\u0027, \u0027Female\u0027, 600000, 30000000); CREATE TABLE unemployment (state TEXT, gender TEXT, rate FLOAT); INSERT INTO unemployment (state, gender, rate) VALUES (\u0027California\u0027, \u0027Male\u0027, 4.8), (\u0027California\u0027, \u0027Female\u0027, 3.2), (\u0027Texas\u0027, \u0027Male\u0027, 3.0), (\u0027Texas\u0027, \u0027Female\u0027, 2.0);", + "sql": "SELECT v.state, v.gender, u.rate FROM veteran_population v INNER JOIN unemployment u ON v.state \u003d u.state AND v.gender \u003d u.gender;", + "sql_explanation": "* Performs an inner join between veteran_population and unemployment tables on state and gender columns, returning all columns from both tables." +}, { + "id": "30", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of cybersecurity incidents reported worldwide each year for the last 3 years and their respective impact levels?", + "sql_context": "CREATE TABLE CybersecurityImpact (id INT, impact_level TEXT); INSERT INTO CybersecurityImpact (id, impact_level) VALUES (1, \u0027High\u0027), (2, \u0027Medium\u0027), (3, \u0027Low\u0027); CREATE TABLE CybersecurityIncidentsByYear (id INT, year INT, impact_id INT); INSERT INTO CybersecurityIncidentsByYear (id, year, impact_id) VALUES (1, 2021, 1), (2, 2020, 2), (3, 2019, 3);", + "sql": "SELECT YEAR(CybersecurityIncidentsByYear.year) as year, COUNT(CybersecurityIncidentsByYear.id) as total_incidents, AVG(CybersecurityImpact.impact_level) as avg_impact FROM CybersecurityIncidentsByYear INNER JOIN CybersecurityImpact ON CybersecurityIncidentsByYear.impact_id \u003d CybersecurityImpact.id GROUP BY YEAR(CybersecurityIncidentsByYear.year) ORDER BY YEAR(CybersecurityIncidentsByYear.year) DESC LIMIT 3;", + "sql_explanation": "This query calculates the total number of cybersecurity incidents reported worldwide each year for the last 3 years and their respective impact levels by joining the CybersecurityIncidentsByYear table with the CybersecurityImpact table based on the impact_id. It then groups by year, counts the number of incidents, and calculates the average impact level." +}, { + "id": "490", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify national security operations that used military technology in the same year", + "sql_context": "CREATE TABLE NationalSecurity (Id INT PRIMARY KEY, Country VARCHAR(50), Operation VARCHAR(50), Year INT);", + "sql": "SELECT NationalSecurity.Country, NationalSecurity.Operation FROM NationalSecurity INNER JOIN MilitaryTechnology ON NationalSecurity.Country \u003d MilitaryTechnology.Country AND NationalSecurity.Year \u003d MilitaryTechnology.Year;", + "sql_explanation": "The SQL query joins NationalSecurity and MilitaryTechnology on Country and Year. This identifies national security operations that used military technology in the same year." +}, { + "id": "552", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all intelligence operations and their related military technologies?", + "sql_context": "CREATE TABLE intelligence_operations (id INT, operation_name VARCHAR(50), country VARCHAR(50)); CREATE TABLE military_technologies (id INT, technology_name VARCHAR(50), operation_id INT); INSERT INTO intelligence_operations (id, operation_name, country) VALUES (1, \u0027Operation Desert Storm\u0027, \u0027USA\u0027), (2, \u0027Operation Enduring Freedom\u0027, \u0027USA\u0027), (3, \u0027Operation Slipper\u0027, \u0027Australia\u0027); INSERT INTO military_technologies (id, technology_name, operation_id) VALUES (1, \u0027M1 Abrams Tank\u0027, 1), (2, \u0027Predator Drone\u0027, 2), (3, \u0027Joint Strike Fighter\u0027, 2), (4, \u0027Collins Class Submarine\u0027, 3);", + "sql": "SELECT intelligence_operations.operation_name, military_technologies.technology_name FROM intelligence_operations INNER JOIN military_technologies ON intelligence_operations.id \u003d military_technologies.operation_id;", + "sql_explanation": "This SQL query lists all intelligence operations and their related military technologies by using an INNER JOIN on the intelligence_operations and military_technologies tables, joining them using the operation_id." +}, { + "id": "832", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total spending on military technologies for each country in the last decade?", + "sql_context": "CREATE TABLE country (id INT, name VARCHAR(255)); INSERT INTO country (id, name) VALUES (1, \u0027USA\u0027), (2, \u0027China\u0027), (3, \u0027Russia\u0027); CREATE TABLE military_tech (id INT, name VARCHAR(255), country_id INT, cost FLOAT); INSERT INTO military_tech (id, name, country_id, cost) VALUES (1, \u0027J-20\u0027, 2, 120000000), (2, \u0027Sukhoi Su-57\u0027, 3, 80000000);", + "sql": "SELECT c.name, SUM(mt.cost) as total_spending FROM military_tech mt JOIN country c ON mt.country_id \u003d c.id WHERE mt.timestamp \u003e\u003d DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 10 YEAR) GROUP BY c.name;", + "sql_explanation": "This SQL query joins the \u0027military_tech\u0027 table with the \u0027country\u0027 table using the country_id foreign key. It filters records within the last decade and groups them by country name, summing the cost for each country." +}, { + "id": "991", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of cybersecurity incidents per country in the last quarter?", + "sql_context": "CREATE TABLE country (id INT, name VARCHAR(255)); INSERT INTO country (id, name) VALUES (1, \u0027USA\u0027), (2, \u0027Canada\u0027), (3, \u0027Mexico\u0027); CREATE TABLE incident (id INT, country_id INT, timestamp TIMESTAMP); INSERT INTO incident (id, country_id) VALUES (1, 1), (2, 1), (3, 2), (4, 3), (5, 1);", + "sql": "SELECT c.name, COUNT(i.id) as num_incidents FROM incident i JOIN country c ON i.country_id \u003d c.id WHERE i.timestamp \u003e\u003d DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 3 MONTH) GROUP BY c.name;", + "sql_explanation": "This SQL query joins the \u0027incident\u0027 table with the \u0027country\u0027 table using the country_id foreign key. It filters incidents within the last quarter and groups them by country name, counting the number of incidents for each country." +}, { + "id": "1595", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average weight of weapons manufactured by Russian companies?", + "sql_context": "CREATE TABLE manufacturer (id INT PRIMARY KEY, name VARCHAR(50), country VARCHAR(50)); INSERT INTO manufacturer (id, name, country) VALUES (1, \u0027MiG\u0027, \u0027Russia\u0027); INSERT INTO manufacturer (id, name, country) VALUES (2, \u0027Sukhoi\u0027, \u0027Russia\u0027);", + "sql": "SELECT m.country, AVG(w.weight) as avg_weight FROM weapon w JOIN manufacturer m ON w.manufacturer \u003d m.name WHERE m.country \u003d \u0027Russia\u0027 GROUP BY m.country;", + "sql_explanation": "This query joins the weapon and manufacturer tables, filters for Russian manufacturers, groups the results by country, and returns the average weight of weapons for Russia." +}, { + "id": "303", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List members who did both yoga and zumba workouts and their total workouts.", + "sql_context": "CREATE TABLE membership_data (member_id INT, join_date DATE); CREATE TABLE workout_data (workout_id INT, member_id INT, workout_type VARCHAR(20), workout_date DATE);", + "sql": "SELECT m.member_id, m.join_date, COUNT(w.workout_id) as total_workouts FROM membership_data m JOIN workout_data w ON m.member_id \u003d w.member_id WHERE w.workout_type IN (\u0027yoga\u0027, \u0027zumba\u0027) GROUP BY m.member_id HAVING COUNT(DISTINCT w.workout_type) \u003d 2;", + "sql_explanation": "The SQL query performs a JOIN on the membership_data and workout_data tables using member_id. It filters the data for members who did both yoga and zumba workouts using the IN and HAVING COUNT DISTINCT functions. The COUNT function is used to calculate the total number of workouts for each member who did both yoga and zumba workouts." +}, { + "id": "693", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many members have a gym membership and performed a Pilates workout in the last month?", + "sql_context": "CREATE TABLE Members (MemberID INT, GymMembership BOOLEAN); INSERT INTO Members (MemberID, GymMembership) VALUES (1, TRUE); INSERT INTO Members (MemberID, GymMembership) VALUES (2, FALSE); CREATE TABLE Workouts (WorkoutID INT, WorkoutDate DATE, WorkoutType VARCHAR(50), MemberID INT); INSERT INTO Workouts (WorkoutID, WorkoutDate, WorkoutType, MemberID) VALUES (1, \u00272022-02-01\u0027, \u0027Pilates\u0027, 1); INSERT INTO Workouts (WorkoutID, WorkoutDate, WorkoutType, MemberID) VALUES (2, \u00272022-02-10\u0027, \u0027Yoga\u0027, 2);", + "sql": "SELECT COUNT(*) FROM Members INNER JOIN Workouts ON Members.MemberID \u003d Workouts.MemberID WHERE Members.GymMembership \u003d TRUE AND Workouts.WorkoutDate \u003e\u003d \u00272022-02-01\u0027 AND Workouts.WorkoutType \u003d \u0027Pilates\u0027;", + "sql_explanation": "This query counts the number of members who have a gym membership and performed a Pilates workout in the last month. It filters the Workouts table based on the WorkoutDate and WorkoutType columns and then joins the Members table to filter on GymMembership. Lastly, the COUNT function counts the number of records that satisfy the filter condition." +}, { + "id": "711", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many members have an active membership in each state?", + "sql_context": "CREATE TABLE gym_locations (id INT, location_name VARCHAR(50), state VARCHAR(50), city VARCHAR(50), members INT);", + "sql": "SELECT state, COUNT(DISTINCT member_name) AS active_members FROM gym_locations JOIN gym_memberships ON gym_locations.location_name \u003d gym_memberships.location WHERE end_date \u003e CURDATE() GROUP BY state;", + "sql_explanation": "This query joins the gym_locations and gym_memberships tables on the location_name field. It then filters the results to only include members with an end_date greater than the current date, indicating an active membership. Finally, it groups the results by state and counts the number of unique active members." +}, { + "id": "719", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of members who have a gold membership and have used a weight machine in the past month?", + "sql_context": "CREATE TABLE Members (MemberID INT, Age INT, MembershipType VARCHAR(20)); INSERT INTO Members (MemberID, Age, MembershipType) VALUES (1, 25, \u0027Gold\u0027), (2, 30, \u0027Silver\u0027), (3, 35, \u0027Gold\u0027); CREATE TABLE Workout (MemberID INT, Equipment VARCHAR(20), Duration INT); INSERT INTO Workout (MemberID, Equipment, Duration) VALUES (1, \u0027Treadmill\u0027, 60), (2, \u0027Bike\u0027, 45), (3, \u0027Weight Machine\u0027, 30), (1, \u0027Weight Machine\u0027, 45, \u00272022-01-01\u0027), (2, \u0027Treadmill\u0027, 45, \u00272022-01-02\u0027), (3, \u0027Swimming Pool\u0027, 60, \u00272022-01-03\u0027);", + "sql": "SELECT AVG(Members.Age) FROM Members INNER JOIN Workout ON Members.MemberID \u003d Workout.MemberID WHERE Members.MembershipType \u003d \u0027Gold\u0027 AND Workout.Equipment \u003d \u0027Weight Machine\u0027 AND Workout.Duration \u003e 0;", + "sql_explanation": "This query calculates the average age of members with a gold membership who have used a weight machine in the past month. It does this by performing an inner join on the Members and Workout tables using the MemberID as the common key. It then filters the results to only include records where the MembershipType is \u0027Gold\u0027 and the Equipment is \u0027Weight Machine\u0027 and the Duration is greater than 0 (indicating that the equipment was used). Finally, it calculates the average age of the members that meet these criteria." +}, { + "id": "905", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total duration of strength training workouts for members aged 25-35?", + "sql_context": "CREATE TABLE Members (MemberID INT, Name VARCHAR(50), Age INT); INSERT INTO Members (MemberID, Name, Age) VALUES (1, \u0027John Doe\u0027, 30); INSERT INTO Members (MemberID, Name, Age) VALUES (2, \u0027Jane Doe\u0027, 27); CREATE TABLE Workouts (WorkoutID INT, WorkoutDate DATE, WorkoutType VARCHAR(50), MemberID INT, Duration INT); INSERT INTO Workouts (WorkoutID, WorkoutDate, WorkoutType, MemberID, Duration) VALUES (1, \u00272022-02-01\u0027, \u0027Strength Training\u0027, 1, 60); INSERT INTO Workouts (WorkoutID, WorkoutDate, WorkoutType, MemberID, Duration) VALUES (2, \u00272022-02-10\u0027, \u0027Yoga\u0027, 2, 90);", + "sql": "SELECT SUM(Workouts.Duration) FROM Members INNER JOIN Workouts ON Members.MemberID \u003d Workouts.MemberID WHERE Members.Age BETWEEN 25 AND 35 AND Workouts.WorkoutType \u003d \u0027Strength Training\u0027;", + "sql_explanation": "This query calculates the total duration of strength training workouts for members aged 25-35. It filters the Workouts table based on the WorkoutType and MemberID columns and then joins the Members table to filter on age. Lastly, the SUM function calculates the sum of the Duration column for the filtered records." +}, { + "id": "993", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many members have a \u0027Basic\u0027 membership and do not own a smartwatch?", + "sql_context": "CREATE TABLE Members (MemberID INT, Age INT, MembershipType VARCHAR(10)); INSERT INTO Members (MemberID, Age, MembershipType) VALUES (1, 35, \u0027Premium\u0027), (2, 28, \u0027Basic\u0027), (3, 42, \u0027Premium\u0027); CREATE TABLE SmartwatchOwners (MemberID INT); INSERT INTO SmartwatchOwners (MemberID) VALUES (1), (3);", + "sql": "SELECT COUNT(*) FROM Members LEFT JOIN SmartwatchOwners ON Members.MemberID \u003d SmartwatchOwners.MemberID WHERE Members.MembershipType \u003d \u0027Basic\u0027 AND SmartwatchOwners.MemberID IS NULL;", + "sql_explanation": "This query counts the number of members who have a \u0027Basic\u0027 membership and do not own a smartwatch. It does this by performing a left join between the Members and SmartwatchOwners tables on the MemberID column, filtering for \u0027Basic\u0027 membership types and null values in the SmartwatchOwners table, and then counting the total number of records." +}, { + "id": "1025", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List members who did not do any workouts in the first quarter of 2020.", + "sql_context": "CREATE TABLE membership_data (member_id INT, join_date DATE); CREATE TABLE workout_data (workout_id INT, member_id INT, workout_date DATE);", + "sql": "SELECT m.member_id, m.join_date FROM membership_data m LEFT JOIN workout_data w ON m.member_id \u003d w.member_id WHERE QUARTER(w.workout_date) IS NULL AND YEAR(w.workout_date) \u003d 2020;", + "sql_explanation": "The SQL query performs a LEFT JOIN on the membership_data and workout_data tables using member_id. It filters the data for members who did not do any workouts in the first quarter of 2020 using the QUARTER and YEAR functions and the IS NULL condition." +}, { + "id": "1110", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 5 workout types by total duration in minutes for users aged 30-40.", + "sql_context": "CREATE TABLE workouts (id INT, user_id INT, type VARCHAR(20), duration INT, date DATE);", + "sql": "SELECT type, SUM(duration) as total_duration FROM workouts w JOIN users u ON w.user_id \u003d u.id WHERE u.age BETWEEN 30 AND 40 GROUP BY type ORDER BY total_duration DESC LIMIT 5;", + "sql_explanation": "Join workouts and users tables, filter for users aged 30-40, group by workout type, calculate total duration in minutes using SUM(), and return top 5 types by total duration with ORDER BY and LIMIT." +}, { + "id": "1155", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total duration of Yoga classes attended by members in their 30s?", + "sql_context": "CREATE TABLE Members (MemberID INT, Age INT, Gender VARCHAR(10), MembershipType VARCHAR(20)); INSERT INTO Members (MemberID, Age, Gender, MembershipType) VALUES (1, 35, \u0027Female\u0027, \u0027Premium\u0027), (2, 45, \u0027Male\u0027, \u0027Basic\u0027), (3, 28, \u0027Female\u0027, \u0027Premium\u0027); CREATE TABLE ClassAttendance (MemberID INT, Class VARCHAR(20), Duration INT, Date DATE); INSERT INTO ClassAttendance (MemberID, Class, Duration, Date) VALUES (1, \u0027Cycling\u0027, 60, \u00272022-01-01\u0027), (2, \u0027Yoga\u0027, 45, \u00272022-01-02\u0027), (3, \u0027Cycling\u0027, 60, \u00272022-01-03\u0027), (4, \u0027Yoga\u0027, 45, \u00272022-01-04\u0027), (5, \u0027Pilates\u0027, 30, \u00272022-01-05\u0027);", + "sql": "SELECT SUM(Duration) FROM Members JOIN ClassAttendance ON Members.MemberID \u003d ClassAttendance.MemberID WHERE Members.Age BETWEEN 30 AND 39 AND ClassAttendance.Class \u003d \u0027Yoga\u0027;", + "sql_explanation": "We are joining the Members table with the ClassAttendance table based on the MemberID. We then filter the records to only those where the Age is between 30 and 39 and the Class is \u0027Yoga\u0027 and calculate the total duration of the classes using the SUM function." +}, { + "id": "1178", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many members have a Premium membership in the Running club?", + "sql_context": "CREATE TABLE Members (MemberID INT, Age INT, Gender VARCHAR(10), MembershipType VARCHAR(20)); INSERT INTO Members (MemberID, Age, Gender, MembershipType) VALUES (1, 35, \u0027Female\u0027, \u0027Premium\u0027), (2, 45, \u0027Male\u0027, \u0027Basic\u0027), (3, 28, \u0027Female\u0027, \u0027Premium\u0027), (4, 32, \u0027Male\u0027, \u0027Premium\u0027), (5, 48, \u0027Female\u0027, \u0027Basic\u0027); CREATE TABLE ClubMembership (MemberID INT, Club VARCHAR(20)); INSERT INTO ClubMembership (MemberID, Club) VALUES (1, \u0027Cycling\u0027), (2, \u0027Yoga\u0027), (3, \u0027Running\u0027), (4, \u0027Pilates\u0027), (5, \u0027Cycling\u0027);", + "sql": "SELECT COUNT(*) FROM Members JOIN ClubMembership ON Members.MemberID \u003d ClubMembership.MemberID WHERE Members.MembershipType \u003d \u0027Premium\u0027 AND ClubMembership.Club \u003d \u0027Running\u0027;", + "sql_explanation": "We are joining the Members table with the ClubMembership table based on the MemberID. We then filter the records to only those where the MembershipType is \u0027Premium\u0027 and the Club is \u0027Running\u0027 and calculate the count of the records using the COUNT function." +}, { + "id": "1384", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average heart rate of members aged 25-30, during their morning workouts?", + "sql_context": "CREATE TABLE members (id INT, age INT, gender VARCHAR(10)); INSERT INTO members (id, age, gender) VALUES (1, 27, \u0027Female\u0027); CREATE TABLE workouts (id INT, member_id INT, date DATE, heart_rate INT); INSERT INTO workouts (id, member_id, date, heart_rate) VALUES (1, 1, \u00272021-08-01\u0027, 120);", + "sql": "SELECT AVG(heart_rate) FROM members JOIN workouts ON members.id \u003d workouts.member_id WHERE members.age BETWEEN 25 AND 30 AND HOUR(workouts.date) BETWEEN 6 AND 11;", + "sql_explanation": "The SQL query joins the members and workouts tables based on member IDs. It filters rows where the member\u0027s age is between 25 and 30 and the workout took place between 6 am and 11 am. The query then calculates the average heart rate during these workouts." +}, { + "id": "1706", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the unique last names of all personal trainers who have trained more than 3 members in the last week.", + "sql_context": "CREATE TABLE Trainers (TrainerID int, LastName varchar(20)); INSERT INTO Trainers (TrainerID, LastName) VALUES (1, \u0027Smith\u0027); CREATE TABLE Training (TrainerID int, MemberID int, TrainingDate date); INSERT INTO Training (TrainerID, MemberID, TrainingDate) VALUES (1, 1, CURDATE() - INTERVAL 5 DAY);", + "sql": "SELECT DISTINCT LastName FROM Trainers t JOIN Training tn ON t.TrainerID \u003d tn.TrainerID GROUP BY tn.TrainerID HAVING COUNT(DISTINCT tn.MemberID) \u003e 3;", + "sql_explanation": "We perform an inner join between Trainers and Training tables on TrainerID. We filter for training sessions in the last week (assumed to be filtered by date in actual implementation) and count the unique member count for each trainer, listing unique last names with more than 3 members trained." +}, { + "id": "1803", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of members who have attended a \u0027Cycling\u0027 class?", + "sql_context": "CREATE TABLE Members (MemberID INT, Age INT, Gender VARCHAR(10), MembershipType VARCHAR(20)); INSERT INTO Members (MemberID, Age, Gender, MembershipType) VALUES (1, 35, \u0027Female\u0027, \u0027Elite\u0027), (2, 45, \u0027Male\u0027, \u0027Basic\u0027), (3, 28, \u0027Female\u0027, \u0027Premium\u0027), (4, 32, \u0027Male\u0027, \u0027Elite\u0027), (5, 48, \u0027Female\u0027, \u0027Basic\u0027), (6, 38, \u0027Male\u0027, \u0027Elite\u0027), (7, 25, \u0027Female\u0027, \u0027Basic\u0027), (8, 42, \u0027Male\u0027, \u0027Premium\u0027), (9, 50, \u0027Female\u0027, \u0027Elite\u0027), (10, 22, \u0027Male\u0027, \u0027Basic\u0027); CREATE TABLE ClassAttendance (MemberID INT, Class VARCHAR(20), Date DATE); INSERT INTO ClassAttendance (MemberID, Class, Date) VALUES (1, \u0027Cycling\u0027, \u00272022-03-01\u0027), (2, \u0027Yoga\u0027, \u00272022-03-02\u0027), (3, \u0027Cycling\u0027, \u00272022-03-03\u0027), (4, \u0027Yoga\u0027, \u00272022-03-04\u0027), (5, \u0027Pilates\u0027, \u00272022-03-05\u0027), (6, \u0027Cycling\u0027, \u00272022-03-06\u0027), (7, \u0027Yoga\u0027, \u00272022-03-07\u0027), (8, \u0027Cycling\u0027, \u00272022-03-08\u0027), (9, \u0027Yoga\u0027, \u00272022-03-09\u0027), (10, \u0027Cycling\u0027, \u00272022-03-10\u0027);", + "sql": "SELECT AVG(Members.Age) FROM Members JOIN ClassAttendance ON Members.MemberID \u003d ClassAttendance.MemberID WHERE ClassAttendance.Class \u003d \u0027Cycling\u0027;", + "sql_explanation": "We are joining the Members table with the ClassAttendance table based on the MemberID. We then filter the records to only those where the Class is \u0027Cycling\u0027 and calculate the average age of members using the AVG function." +}, { + "id": "1817", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total weight lifted by users in the \u0027Elite\u0027 workout group who lifted more than 150 pounds in a single session?", + "sql_context": "CREATE TABLE workout_groups (id INT, user_id INT, group_label VARCHAR(10)); INSERT INTO workout_groups (id, user_id, group_label) VALUES (1, 21, \u0027Beginner\u0027); INSERT INTO workout_groups (id, user_id, group_label) VALUES (2, 22, \u0027Intermediate\u0027); INSERT INTO workout_groups (id, user_id, group_label) VALUES (3, 23, \u0027Elite\u0027); CREATE TABLE weights (id INT, workout_group_id INT, weight FLOAT); INSERT INTO weights (id, workout_group_id, weight) VALUES (1, 21, 120.5); INSERT INTO weights (id, workout_group_id, weight) VALUES (2, 22, 135.3); INSERT INTO weights (id, workout_group_id, weight) VALUES (3, 23, 200.7);", + "sql": "SELECT SUM(weight) FROM weights JOIN workout_groups ON weights.workout_group_id \u003d workout_groups.id WHERE group_label \u003d \u0027Elite\u0027 AND weight \u003e 150;", + "sql_explanation": "Sum the weight lifted by users in the \u0027Elite\u0027 workout group who lifted more than 150 pounds in a single session." +}, { + "id": "1895", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average duration of yoga workouts for members under 30 years old?", + "sql_context": "CREATE TABLE Workouts (WorkoutID INT, MemberID INT, WorkoutDate DATE, Duration INT, WorkoutType VARCHAR(20)); INSERT INTO Workouts (WorkoutID, MemberID, WorkoutDate, Duration, WorkoutType) VALUES (1, 1, \u00272023-01-01\u0027, 60, \u0027Yoga\u0027), (2, 2, \u00272023-01-02\u0027, 90, \u0027Cycling\u0027), (3, 3, \u00272023-01-03\u0027, 75, \u0027Yoga\u0027);", + "sql": "SELECT AVG(Duration) FROM Workouts INNER JOIN Members ON Workouts.MemberID \u003d Members.MemberID WHERE Members.Age \u003c 30 AND WorkoutType \u003d \u0027Yoga\u0027;", + "sql_explanation": "This query calculates the average duration of yoga workouts for members under 30 years old by joining the Workouts and Members tables on MemberID and filtering it to only include members under 30 years old and yoga workouts. It then calculates the average value of the Duration column using the AVG function." +}, { + "id": "2086", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum distance covered by users in the \u0027Intermediate\u0027 workout group?", + "sql_context": "CREATE TABLE run_data (id INT, user_id INT, distance FLOAT); INSERT INTO run_data (id, user_id, distance) VALUES (1, 17, 4.5); INSERT INTO run_data (id, user_id, distance) VALUES (2, 18, 6.2); CREATE TABLE workout_groups (id INT, user_id INT, group_label VARCHAR(10)); INSERT INTO workout_groups (id, user_id, group_label) VALUES (1, 17, \u0027Intermediate\u0027); INSERT INTO workout_groups (id, user_id, group_label) VALUES (2, 18, \u0027Advanced\u0027);", + "sql": "SELECT MIN(distance) FROM run_data JOIN workout_groups ON run_data.user_id \u003d workout_groups.user_id WHERE group_label \u003d \u0027Intermediate\u0027;", + "sql_explanation": "Find the minimum distance covered in the workout group labeled \u0027Intermediate\u0027." +}, { + "id": "2496", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert a new record of a user who joined on Jan 1, 2022 and last logged in on Jan 5, 2022 into the \"Members\" table", + "sql_context": "CREATE TABLE Members (Id INT PRIMARY KEY, FirstName VARCHAR(50), LastName VARCHAR(50), JoinDate DATETIME, LastLogin DATETIME);", + "sql": "INSERT INTO Members (Id, FirstName, LastName, JoinDate, LastLogin) VALUES (10, \u0027John\u0027, \u0027Doe\u0027, \u00272022-01-01\u0027, \u00272022-01-05\u0027);", + "sql_explanation": "This query inserts a new record of a user who joined on Jan 1, 2022 and last logged in on Jan 5, 2022 into the \"Members\" table. It uses the INSERT INTO statement followed by the table name and VALUES keyword. The VALUES keyword is followed by a pair of parentheses containing the values to be inserted, in this case, the Id, FirstName, LastName, JoinDate, and LastLogin columns." +}, { + "id": "2910", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average heart rate of users aged 25-34 during their workouts?", + "sql_context": "CREATE TABLE Users (id INT, age INT, gender VARCHAR(10)); INSERT INTO Users (id, age, gender) VALUES (1, 27, \u0027Female\u0027), (2, 31, \u0027Male\u0027); CREATE TABLE Workouts (id INT, userId INT, heartRate INT, duration INT); INSERT INTO Workouts (id, userId, heartRate, duration) VALUES (1, 1, 145, 30), (2, 1, 150, 45), (3, 2, 160, 60), (4, 2, 155, 40);", + "sql": "SELECT AVG(heartRate) FROM Workouts JOIN Users ON Workouts.userId \u003d Users.id WHERE Users.age BETWEEN 25 AND 34;", + "sql_explanation": "The SQL query joins the Users and Workouts tables on the user_id field and filters for users aged between 25 and 34. It then calculates the average heart rate for these users using the AVG function." +}, { + "id": "3010", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the latest join date of members who have ever attended a Zumba class?", + "sql_context": "CREATE TABLE Members (MemberID int, JoinDate date); INSERT INTO Members (MemberID, JoinDate) VALUES (1, \u00272021-01-01\u0027), (2, \u00272021-02-01\u0027); CREATE TABLE Classes (ClassID int, MemberID int, ClassType varchar(10), ClassDate date); INSERT INTO Classes (ClassID, MemberID, ClassType, ClassDate) VALUES (1, 1, \u0027Zumba\u0027, \u00272021-01-05\u0027), (2, 2, \u0027Zumba\u0027, \u00272021-02-15\u0027);", + "sql": "SELECT MAX(m.JoinDate) FROM Members m JOIN Classes c ON m.MemberID \u003d c.MemberID WHERE c.ClassType \u003d \u0027Zumba\u0027;", + "sql_explanation": "We find the maximum JoinDate of members who have attended a Zumba class." +}, { + "id": "3330", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the maximum duration of any workout for users aged 40-50.", + "sql_context": "CREATE TABLE workouts (id INT, user_id INT, duration INT, date DATE);", + "sql": "SELECT MAX(duration) FROM workouts w JOIN users u ON w.user_id \u003d u.id WHERE u.age BETWEEN 40 AND 50;", + "sql_explanation": "Join workouts and users tables, filter for users aged 40-50, and find the maximum workout duration using MAX()." +}, { + "id": "4196", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Count the number of users who joined in the second quarter of 2021.", + "sql_context": "CREATE TABLE Users (user_id INT, join_date DATE); INSERT INTO Users (user_id, join_date) VALUES (1, \u00272021-04-01\u0027), (2, \u00272021-05-15\u0027), (3, \u00272021-06-30\u0027), (4, \u00272021-07-01\u0027);", + "sql": "SELECT COUNT(*) FROM Users WHERE join_date BETWEEN \u00272021-04-01\u0027 AND \u00272021-06-30\u0027;", + "sql_explanation": "The SQL query counts the number of users who joined between the start and end dates of the second quarter of 2021 by filtering the Users table based on \u0027join_date\u0027 within the specified date range." +}, { + "id": "251", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the number of research grants awarded to each department in the past year.", + "sql_context": "CREATE TABLE Departments(DepartmentID INT, Department VARCHAR(255)); INSERT INTO Departments VALUES (1, \u0027Computer Science\u0027); CREATE TABLE ResearchGrants(GranteeID INT, DepartmentID INT, GrantAmount DECIMAL(10, 2), GrantDate DATE); INSERT INTO ResearchGrants VALUES (1, 1, 50000.00, \u00272021-01-01\u0027);", + "sql": "SELECT Departments.Department, COUNT(ResearchGrants.GranteeID) FROM Departments INNER JOIN ResearchGrants ON Departments.DepartmentID \u003d ResearchGrants.DepartmentID WHERE ResearchGrants.GrantDate \u003e\u003d DATEADD(year, -1, GETDATE()) GROUP BY Departments.Department;", + "sql_explanation": "Join the Departments and ResearchGrants tables on DepartmentID. Then, filter the records based on the grant date within the past year and calculate the number of research grants awarded to each department. Finally, group the result by department." +}, { + "id": "408", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the research interests of professors who have advised the most graduate students in the past two years, and the number of students they have advised?", + "sql_context": "CREATE TABLE professor_advising (id INT, professor TEXT, num_students INT, year INT); INSERT INTO professor_advising (id, professor, num_students, year) VALUES (13, \u0027Alice\u0027, 7, 2021); INSERT INTO professor_advising (id, professor, num_students, year) VALUES (14, \u0027Bob\u0027, 6, 2020);", + "sql": "SELECT professor, research_interest, num_students FROM professors p JOIN professor_advising pa ON p.name \u003d pa.professor WHERE year BETWEEN 2020 AND 2021 GROUP BY professor, research_interest, num_students ORDER BY num_students DESC;", + "sql_explanation": "This query performs a join between the professors and professor_advising tables on the professor column, selecting the professor, research_interest, and num_students columns. The results are then filtered for rows where the year is between 2020 and 2021, grouped by the professor, research_interest, and num_students columns, and ordered by the num_students column in descending order." +}, { + "id": "485", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What percentage of research grants are awarded to underrepresented minority faculty members?", + "sql_context": "CREATE TABLE faculty (id INT, name VARCHAR(100), department VARCHAR(50), gender VARCHAR(50), race VARCHAR(50)); INSERT INTO faculty VALUES (1, \u0027Jane Smith\u0027, \u0027Computer Science\u0027, \u0027Female\u0027, \u0027African American\u0027); CREATE TABLE grants (id INT, faculty_id INT, amount DECIMAL(10,2)); INSERT INTO grants VALUES (1, 1, 50000);", + "sql": "SELECT 100.0 * SUM(CASE WHEN faculty.race IN (\u0027African American\u0027, \u0027Hispanic\u0027, \u0027Native American\u0027) THEN grants.amount ELSE 0 END) / SUM(grants.amount) AS percentage FROM grants JOIN faculty ON grants.faculty_id \u003d faculty.id;", + "sql_explanation": "The SQL query calculates the percentage of research grants awarded to underrepresented minority faculty members. It first calculates the sum of grants awarded to underrepresented minority faculty members using a CASE statement. Then, it calculates the total sum of all grants awarded. Finally, it divides the first sum by the second sum to get the percentage and multiplies it by 100.0 to convert it to a percentage." +}, { + "id": "626", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the total number of publications by female graduate students in the Computer Science department in the last 3 years.", + "sql_context": "CREATE TABLE students (id INT, name VARCHAR(50), gender VARCHAR(50), department VARCHAR(50), start_year INT); INSERT INTO students (id, name, gender, department, start_year) VALUES (1, \u0027Charlie\u0027, \u0027Male\u0027, \u0027Mathematics\u0027, 2018); INSERT INTO students (id, name, gender, department, start_year) VALUES (2, \u0027Dana\u0027, \u0027Female\u0027, \u0027Computer Science\u0027, 2019); CREATE TABLE publications (id INT, student_id INT, year INT, title VARCHAR(100)); INSERT INTO publications (id, student_id, year, title) VALUES (1, 1, 2020, \u0027Theory of Algebra\u0027); INSERT INTO publications (id, student_id, year, title) VALUES (2, 2, 2021, \u0027Machine Learning Algorithms\u0027);", + "sql": "SELECT COUNT(p.id) FROM publications p JOIN students s ON p.student_id \u003d s.id WHERE s.department \u003d \u0027Computer Science\u0027 AND s.gender \u003d \u0027Female\u0027 AND p.year BETWEEN YEAR(CURRENT_DATE) - 3 AND YEAR(CURRENT_DATE);", + "sql_explanation": "Join the publications and students tables, filter for female Computer Science graduate students and publications in the past 3 years, and calculate the total number of publications." +}, { + "id": "964", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of graduate students in each department who received research grant funding", + "sql_context": "CREATE TABLE Department (id INT, name VARCHAR(255)); INSERT INTO Department (id, name) VALUES (1, \u0027Computer Science\u0027), (2, \u0027Physics\u0027), (3, \u0027English\u0027); CREATE TABLE Student (id INT, department_id INT, research_grant_funding INT); INSERT INTO Student (id, department_id, research_grant_funding) VALUES (1, 1, 50000), (2, 2, NULL), (3, 1, 60000), (4, 3, 40000);", + "sql": "SELECT d.name as department, COUNT(s.id) as num_students_funded FROM Department d JOIN Student s ON d.id \u003d s.department_id WHERE s.research_grant_funding IS NOT NULL GROUP BY d.name;", + "sql_explanation": "This SQL query joins the Department and Student tables on the department_id foreign key and filters students who received research grant funding by checking for NULL values. It then groups the results by department and calculates the number of students in each department who received research grant funding." +}, { + "id": "1740", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of research grants awarded to each department for a specific year?", + "sql_context": "CREATE TABLE department (id INT, name TEXT); CREATE TABLE research_grants (id INT, department_id INT, amount INT, year INT);", + "sql": "SELECT d.name, r.year, SUM(r.amount) FROM department d JOIN research_grants r ON d.id \u003d r.department_id WHERE r.year \u003d 2020 GROUP BY d.name, r.year;", + "sql_explanation": "The SQL query joins the department and research_grants tables on their respective ID columns, then filters the results to only include grants awarded in the year 2020 using the WHERE clause, sums the amount column for each department and year using the SUM function and groups the results by department name and year using GROUP BY." +}, { + "id": "2040", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the total number of research grants awarded to graduate students in the \u0027Computer Science\u0027 department.", + "sql_context": "CREATE TABLE Students (StudentID int, Department varchar(50)); INSERT INTO Students (StudentID, Department) VALUES (1, \u0027Computer Science\u0027); INSERT INTO Students (StudentID, Department) VALUES (2, \u0027Electrical Engineering\u0027); CREATE TABLE Grants (GrantID int, StudentID int); INSERT INTO Grants (GrantID, StudentID) VALUES (1, 1); INSERT INTO Grants (GrantID, StudentID) VALUES (2, 2);", + "sql": "SELECT COUNT(*) FROM Students INNER JOIN Grants ON Students.StudentID \u003d Grants.StudentID WHERE Students.Department \u003d \u0027Computer Science\u0027;", + "sql_explanation": "The SQL query performs an inner join between the Students and Grants tables, based on the StudentID. It then filters the results to only include rows where the Department is \u0027Computer Science\u0027. Finally, it uses the COUNT() function to count the number of rows in the result set." +}, { + "id": "2260", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of research grants awarded per faculty member in the \"Mathematics\" department?", + "sql_context": "CREATE TABLE faculties (faculty_id INT, name VARCHAR(50), department VARCHAR(20)); INSERT INTO faculties (faculty_id, name, department) VALUES (1, \u0027Jose Hernandez\u0027, \u0027Mathematics\u0027), (2, \u0027Sofia Rodriguez\u0027, \u0027Mathematics\u0027), (3, \u0027Ali Al-Sayed\u0027, \u0027Physics\u0027); CREATE TABLE research_grants (grant_id INT, title VARCHAR(50), amount DECIMAL(10,2), principal_investigator VARCHAR(50), faculty_id INT, start_date DATE, end_date DATE); INSERT INTO research_grants (grant_id, title, amount, principal_investigator, faculty_id, start_date, end_date) VALUES (1, \u0027Project C\u0027, 50000, \u0027Jose Hernandez\u0027, 1, \u00272022-01-01\u0027, \u00272024-12-31\u0027), (2, \u0027Project D\u0027, 100000, \u0027Sofia Rodriguez\u0027, 2, \u00272021-07-01\u0027, \u00272023-06-30\u0027);", + "sql": "SELECT AVG(rg.amount) FROM research_grants rg JOIN faculties f ON rg.faculty_id \u003d f.faculty_id WHERE f.department \u003d \u0027Mathematics\u0027;", + "sql_explanation": "This query calculates the average amount of research grants awarded per faculty member in the \"Mathematics\" department. It joins the research_grants and faculties tables on the faculty_id column, and filters the data based on the department. It then uses the AVG function to calculate the average amount of grants." +}, { + "id": "2444", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the graduate students in the College of Medicine who have not published any papers.", + "sql_context": "CREATE TABLE students (id INT, name VARCHAR(50), department VARCHAR(50)); CREATE TABLE papers (id INT, student_id INT, title VARCHAR(100)); INSERT INTO students VALUES (1, \u0027Grace\u0027, \u0027Medicine\u0027), (2, \u0027Hannah\u0027, \u0027Medicine\u0027), (3, \u0027Ivan\u0027, \u0027Medicine\u0027); INSERT INTO papers VALUES (1, 1, \u0027Paper 1\u0027), (2, 1, \u0027Paper 2\u0027), (3, 2, \u0027Paper 3\u0027);", + "sql": "SELECT students.id, students.name FROM students LEFT JOIN papers ON students.id \u003d papers.student_id WHERE papers.id IS NULL;", + "sql_explanation": "This query performs a left join of the students and papers tables on the students.id and papers.student_id columns. It then filters the records where the papers.id column is null, indicating that there is no match in the papers table. The DISTINCT keyword is not used in this case because it is not necessary to remove any duplicate records." +}, { + "id": "2547", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total funding for research grants in the College of Education where the grant amount is greater than $70,000.", + "sql_context": "CREATE TABLE grants (id INT, college VARCHAR(50), amount DECIMAL(10,2)); INSERT INTO grants (id, college, amount) VALUES (1, \u0027Education\u0027, 75000); INSERT INTO grants (id, college, amount) VALUES (2, \u0027Engineering\u0027, 40000); CREATE TABLE colleges (id INT, name VARCHAR(50)); INSERT INTO colleges (id, name) VALUES (1, \u0027Education\u0027); INSERT INTO colleges (id, name) VALUES (2, \u0027Engineering\u0027);", + "sql": "SELECT SUM(g.amount) FROM grants g JOIN colleges c ON g.college \u003d c.name WHERE c.name \u003d \u0027Education\u0027 AND g.amount \u003e 70000;", + "sql_explanation": "Join the grants and colleges tables, filter for the Education college and grants with an amount greater than $70,000, and calculate the total funding for these grants." +}, { + "id": "2572", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of research grants awarded by funding agency?", + "sql_context": "CREATE TABLE funding_agency (id INT, name TEXT); CREATE TABLE research_grants (id INT, funding_agency_id INT, amount INT);", + "sql": "SELECT f.name, SUM(r.amount) FROM funding_agency f JOIN research_grants r ON f.id \u003d r.funding_agency_id GROUP BY f.name;", + "sql_explanation": "The SQL query joins the funding_agency and research_grants tables on their respective ID columns, then sums the amount column for each funding agency using the SUM function and groups the results by funding agency name using GROUP BY." +}, { + "id": "2946", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average salary for each department?", + "sql_context": "CREATE TABLE departments (id INT, name TEXT, budget INT); INSERT INTO departments (id, name, budget) VALUES (1, \u0027Computer Science\u0027, 1000000), (2, \u0027Mathematics\u0027, 750000); CREATE TABLE faculty (id INT, name TEXT, department TEXT, salary INT); INSERT INTO faculty (id, name, department, salary) VALUES (1, \u0027John Doe\u0027, \u0027Computer Science\u0027, 80000), (2, \u0027Jane Smith\u0027, \u0027Mathematics\u0027, 70000);", + "sql": "SELECT d.name, AVG(f.salary) FROM departments d INNER JOIN faculty f ON d.name \u003d f.department GROUP BY d.name;", + "sql_explanation": "This query performs an inner join between the departments and faculty tables, joining on the name columns. It then groups the results by the name column from the departments table and calculates the average of the salary column from the faculty table for each group, providing the average salary for each department." +}, { + "id": "2949", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Delete all grant records for faculty members who do not have a research interest in \u0027Physics\u0027.", + "sql_context": "CREATE TABLE grants (id INT, title TEXT, amount FLOAT, faculty_name VARCHAR(50)); CREATE TABLE faculty (id INT, name VARCHAR(50), research_interest TEXT); INSERT INTO grants (id, title, amount, faculty_name) VALUES (1, \u0027Fundamentals of Organic Chemistry\u0027, 50000, \u0027Alice\u0027); INSERT INTO grants (id, title, amount, faculty_name) VALUES (2, \u0027Advanced Physical Chemistry\u0027, 75000, \u0027Bob\u0027); INSERT INTO faculty (id, name, research_interest) VALUES (1, \u0027Alice\u0027, \u0027Chemistry\u0027); INSERT INTO faculty (id, name, research_interest) VALUES (2, \u0027Bob\u0027, \u0027Physics\u0027);", + "sql": "DELETE g FROM grants g INNER JOIN faculty f ON g.faculty_name \u003d f.name WHERE f.research_interest !\u003d \u0027Physics\u0027;", + "sql_explanation": "This query performs an inner join between the grants and faculty tables on the faculty_name column. It then filters the resulting table to only include rows where the research_interest is not Physics. It deletes all rows from the grants table that are in the resulting table." +}, { + "id": "3150", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum amount of grant funding received by a single faculty member in the Physics department in a single year?", + "sql_context": "CREATE TABLE grants (id INT, faculty_id INT, year INT, amount DECIMAL(10,2)); INSERT INTO grants (id, faculty_id, year, amount) VALUES (1, 1, 2020, 25000); INSERT INTO grants (id, faculty_id, year, amount) VALUES (2, 2, 2019, 30000); CREATE TABLE faculty (id INT, name VARCHAR(50), department VARCHAR(50)); INSERT INTO faculty (id, name, department) VALUES (1, \u0027Eva\u0027, \u0027Physics\u0027); INSERT INTO faculty (id, name, department) VALUES (2, \u0027Frank\u0027, \u0027Chemistry\u0027);", + "sql": "SELECT MAX(g.amount) FROM grants g JOIN faculty f ON g.faculty_id \u003d f.id WHERE f.department \u003d \u0027Physics\u0027;", + "sql_explanation": "Join the grants and faculty tables, filter for Physics faculty members, and calculate the maximum grant amount awarded to a single faculty member in a single year." +}, { + "id": "3660", + "domain": "higher education", + "domain_description": "Graduate student records, research grant data, academic publishing statistics, and faculty diversity metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the research grant titles that do not have a corresponding publication?", + "sql_context": "CREATE TABLE grant (id INT, title VARCHAR(100)); CREATE TABLE publication (id INT, title VARCHAR(100), grant_id INT);", + "sql": "SELECT g.title FROM grant g LEFT JOIN publication p ON g.title \u003d p.title WHERE p.id IS NULL;", + "sql_explanation": "Perform a left join on the grant and publication tables on the title columns, then select the title column from the grant table where the id column in the publication table is NULL." +}, { + "id": "29", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of aid provided by each government, for community development projects in Southeast Asia, in the last 10 years, and the average duration of the projects?", + "sql_context": "CREATE TABLE community_development_projects (project_id INT, government_id INT, start_date DATE, end_date DATE, aid DECIMAL(10,2)); INSERT INTO community_development_projects VALUES (1, 1, \u00272011-01-01\u0027, \u00272013-12-31\u0027, 50000); INSERT INTO community_development_projects VALUES (2, 1, \u00272014-01-01\u0027, \u00272016-12-31\u0027, 75000); INSERT INTO community_development_projects VALUES (3, 2, \u00272015-01-01\u0027, \u00272017-12-31\u0027, 100000); INSERT INTO community_development_projects VALUES (4, 2, \u00272018-01-01\u0027, \u00272020-12-31\u0027, 80000);", + "sql": "SELECT government.name as government, SUM(aid) as total_aid, AVG(DATEDIFF(end_date, start_date) / 365) as avg_project_duration FROM community_development_projects JOIN government ON community_development_projects.government_id \u003d government.government_id WHERE government.region \u003d \u0027Southeast Asia\u0027 AND community_development_projects.start_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 10 YEAR) GROUP BY government.name;", + "sql_explanation": "This query first joins the \u0027community_development_projects\u0027 and \u0027government\u0027 tables on the government_id field. It then filters the data for governments located in Southeast Asia and projects that started in the last 10 years. The query then groups the results by government, calculates the total aid, and calculates the average duration of the projects." +}, { + "id": "872", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many total items were delivered to \u0027region\u0027 South America in January 2022?", + "sql_context": "CREATE TABLE delivery (delivery_id INT, region VARCHAR(50), delivery_date DATE); INSERT INTO delivery (delivery_id, region, delivery_date) VALUES (1, \u0027South America\u0027, \u00272022-01-05\u0027), (2, \u0027North America\u0027, \u00272022-01-10\u0027), (3, \u0027South America\u0027, \u00272022-01-15\u0027); CREATE TABLE item (item_id INT, delivery_id INT); INSERT INTO item (item_id, delivery_id) VALUES (1, 1), (2, 1), (3, 2), (4, 3);", + "sql": "SELECT COUNT(i.item_id) FROM item i JOIN delivery d ON i.delivery_id \u003d d.delivery_id WHERE d.region \u003d \u0027South America\u0027 AND d.delivery_date \u003e\u003d \u00272022-01-01\u0027 AND d.delivery_date \u003c \u00272022-02-01\u0027;", + "sql_explanation": "This query calculates the total number of items delivered to South America in January 2022. It does so by joining the item and delivery tables on the delivery_id field. It then filters for deliveries to South America and within the month of January 2022 and finally calculates the total number of items using the COUNT function." +}, { + "id": "1050", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum number of days between deliveries to the same location?", + "sql_context": "CREATE TABLE delivery (delivery_id INT, delivery_date DATE, location VARCHAR(100)); INSERT INTO delivery (delivery_id, delivery_date, location) VALUES (1, \u00272022-01-01\u0027, \u0027Location A\u0027), (2, \u00272022-01-05\u0027, \u0027Location A\u0027), (3, \u00272022-01-10\u0027, \u0027Location B\u0027);", + "sql": "SELECT MIN(DATEDIFF(d2.delivery_date, d1.delivery_date)) FROM delivery d1 JOIN delivery d2 ON d1.location \u003d d2.location AND d1.delivery_date \u003c d2.delivery_date GROUP BY location;", + "sql_explanation": "This query calculates the minimum number of days between deliveries to the same location. It does so by joining the delivery table to itself on the location field and calculating the difference between delivery dates using the DATEDIFF function. It then groups by location and calculates the minimum difference using the MIN function." +}, { + "id": "1839", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the names of volunteers who have not been assigned to any tasks in the \u0027volunteers\u0027 and \u0027assignments\u0027 tables.", + "sql_context": "CREATE TABLE volunteers (id INT, name VARCHAR(50)); INSERT INTO volunteers (id, name) VALUES (1, \u0027Volunteer1\u0027), (2, \u0027Volunteer2\u0027), (3, \u0027Volunteer3\u0027); CREATE TABLE assignments (id INT, volunteer_id INT, task VARCHAR(50)); INSERT INTO assignments (id, volunteer_id, task) VALUES (1, 1, \u0027Task1\u0027), (2, 2, \u0027Task2\u0027);", + "sql": "SELECT volunteers.name FROM volunteers LEFT JOIN assignments ON volunteers.id \u003d assignments.volunteer_id WHERE assignments.volunteer_id IS NULL;", + "sql_explanation": "This query performs a left join between the \u0027volunteers\u0027 and \u0027assignments\u0027 tables on the \u0027id\u0027 and \u0027volunteer_id\u0027 columns, respectively, and returns the names of volunteers who have not been assigned to any tasks." +}, { + "id": "2510", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of food assistance provided to refugees?", + "sql_context": "CREATE TABLE refugees (id INT, camp_id INT, food_assistance_given BOOLEAN); INSERT INTO refugees (id, camp_id, food_assistance_given) VALUES (1, 1001, TRUE), (2, 1001, FALSE), (3, 1002, TRUE); CREATE TABLE food_assistance (id INT, camp_id INT, amount FLOAT); INSERT INTO food_assistance (id, camp_id, amount) VALUES (1001, 1001, 500), (1002, 1001, 700), (1003, 1002, 900);", + "sql": "SELECT SUM(amount) FROM food_assistance fa JOIN refugees r ON fa.camp_id \u003d r.camp_id WHERE r.food_assistance_given \u003d TRUE;", + "sql_explanation": "Calculates the sum of \u0027amount\u0027 in the \u0027food_assistance\u0027 table where \u0027food_assistance_given\u0027 is TRUE in the \u0027refugees\u0027 table." +}, { + "id": "2578", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average donation amount per donor from \u0027country_of_origin\u0027 France?", + "sql_context": "CREATE TABLE donor (donor_id INT, name VARCHAR(100), country_of_origin VARCHAR(50)); INSERT INTO donor (donor_id, name, country_of_origin) VALUES (1, \u0027John Doe\u0027, \u0027France\u0027), (2, \u0027Jane Smith\u0027, \u0027USA\u0027); CREATE TABLE donation (donation_id INT, donor_id INT, amount DECIMAL(10,2)); INSERT INTO donation (donation_id, donor_id, amount) VALUES (1, 1, 50.00), (2, 1, 100.00), (3, 2, 75.00);", + "sql": "SELECT AVG(d.amount) FROM donation d JOIN donor don ON d.donor_id \u003d don.donor_id WHERE don.country_of_origin \u003d \u0027France\u0027;", + "sql_explanation": "This query calculates the average donation amount per donor from France. It does so by joining the donation and donor tables on the donor_id field. It then filters for donors from France and finally calculates the average donation amount for those donors using the AVG function." +}, { + "id": "2700", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the name of the projects and their budget in South America?", + "sql_context": "CREATE TABLE if not exists countries (id INT PRIMARY KEY, name VARCHAR(50), continent VARCHAR(50)); INSERT INTO countries (id, name, continent) VALUES (1, \u0027Argentina\u0027, \u0027South America\u0027); INSERT INTO countries (id, name, continent) VALUES (2, \u0027Brazil\u0027, \u0027South America\u0027); CREATE TABLE if not exists projects (id INT PRIMARY KEY, name VARCHAR(50), country_id INT, budget DECIMAL(10,2)); INSERT INTO projects (id, name, country_id, budget) VALUES (1, \u0027Disaster Response\u0027, 1, 50000.00); INSERT INTO projects (id, name, country_id, budget) VALUES (2, \u0027Community Development\u0027, 1, 70000.00); INSERT INTO projects (id, name, country_id, budget) VALUES (3, \u0027Refugee Support\u0027, 2, 60000.00);", + "sql": "SELECT p.name, p.budget FROM projects p JOIN countries c ON p.country_id \u003d c.id WHERE c.continent \u003d \u0027South America\u0027;", + "sql_explanation": "This query selects the name and budget of projects from the \u0027projects\u0027 table that are located in countries from the \u0027countries\u0027 table, where the continent is \u0027South America\u0027. The JOIN clause merges the \u0027projects\u0027 and \u0027countries\u0027 tables on the \u0027country_id\u0027 and \u0027id\u0027 columns, respectively." +}, { + "id": "80", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total waste quantity generated and the total number of circular economy initiatives, for each location and material, for the fourth quarter of 2024?", + "sql_context": "CREATE TABLE WasteGeneration (Date date, Location text, Material text, Quantity integer);CREATE TABLE CircularEconomyInitiatives (Location text, Initiative text, StartDate date);", + "sql": "SELECT wg.Location, wg.Material, SUM(wg.Quantity) as TotalWasteQuantity, COUNT(DISTINCT cei.Initiative) as NumberOfInitiatives FROM WasteGeneration wg LEFT JOIN CircularEconomyInitiatives cei ON wg.Location \u003d cei.Location WHERE wg.Date \u003e\u003d \u00272024-10-01\u0027 AND wg.Date \u003c \u00272025-01-01\u0027 GROUP BY wg.Location, wg.Material;", + "sql_explanation": "This query calculates the total waste quantity generated and the total number of distinct circular economy initiatives for each location and material for the fourth quarter of 2024. It performs a left join between the WasteGeneration and CircularEconomyInitiatives tables on the Location column, and filters the results for records with a date between October 1st, 2024 and December 31st, 2024. Then, it groups the results by location and material and calculates the sum of Quantity and the count of distinct records for Initiative for each group." +}, { + "id": "131", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total waste quantity generated per location and material, and the total landfill capacity for each location, for the year 2020?", + "sql_context": "CREATE TABLE WasteGeneration (Date date, Location text, Material text, Quantity integer);CREATE TABLE LandfillCapacity (Location text, Capacity integer);", + "sql": "SELECT wg.Location, wg.Material, SUM(wg.Quantity) as TotalWasteQuantity, lc.Capacity as TotalLandfillCapacity FROM WasteGeneration wg JOIN LandfillCapacity lc ON wg.Location \u003d lc.Location WHERE wg.Date \u003e\u003d \u00272020-01-01\u0027 AND wg.Date \u003c \u00272021-01-01\u0027 GROUP BY wg.Location, wg.Material, lc.Capacity;", + "sql_explanation": "This query calculates the total waste quantity generated and the total landfill capacity for each location and material for the year 2020. It performs an inner join between the WasteGeneration and LandfillCapacity tables on the Location column, and filters the results for records with a date in 2020. Then, it groups the results by location, material, and capacity and calculates the sum of Quantity and the value of Capacity for each group." +}, { + "id": "220", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total weight of cannabis concentrate sold by dispensaries owned by historically underrepresented communities in the state of California in the month of January 2022?", + "sql_context": "CREATE TABLE Dispensaries (id INT, name VARCHAR(255), city VARCHAR(255), state VARCHAR(255), owner_group VARCHAR(255));CREATE TABLE Inventory (id INT, dispensary_id INT, weight DECIMAL(10, 2), product_type VARCHAR(255), month INT, year INT);INSERT INTO Dispensaries (id, name, city, state, owner_group) VALUES (1, \u0027NuWay\u0027, \u0027Los Angeles\u0027, \u0027CA\u0027, \u0027Historically Underrepresented\u0027);INSERT INTO Inventory (id, dispensary_id, weight, product_type, month, year) VALUES (1, 1, 75, \u0027concentrate\u0027, 1, 2022);", + "sql": "SELECT d.name, SUM(i.weight) as total_weight FROM Dispensaries d JOIN Inventory i ON d.id \u003d i.dispensary_id WHERE d.state \u003d \u0027CA\u0027 AND d.owner_group \u003d \u0027Historically Underrepresented\u0027 AND i.product_type \u003d \u0027concentrate\u0027 AND i.month \u003d 1 AND i.year \u003d 2022 GROUP BY d.name;", + "sql_explanation": "This query joins the Dispensaries and Inventory tables on the dispensary_id and dispensary.id foreign keys. It then filters for concentrate sales in California in January 2022 from dispensaries owned by historically underrepresented communities, and groups by dispensary name to calculate the total weight of cannabis concentrate sold." +}, { + "id": "315", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue for each producer in the Southeast, considering their quantity, price, and the corresponding dispensary\u0027s sales?", + "sql_context": "CREATE TABLE producers (id INT PRIMARY KEY, name TEXT, region TEXT, product TEXT, quantity INT, price FLOAT); INSERT INTO producers (id, name, region, product, quantity, price) VALUES (1, \u0027Southeast Growers\u0027, \u0027Southeast\u0027, \u0027Cannabis Flower\u0027, 1000, 15), (2, \u0027Southern Harvest\u0027, \u0027Southeast\u0027, \u0027Cannabis Pre-rolls\u0027, 700, 12); CREATE TABLE dispensaries (id INT PRIMARY KEY, name TEXT, region TEXT, sales INT); INSERT INTO dispensaries (id, name, region, sales) VALUES (1, \u0027Southeast Dispensary\u0027, \u0027Southeast\u0027, 2000);", + "sql": "SELECT producers.name, dispensaries.name, SUM(producers.quantity * dispensaries.sales * producers.price) as total_revenue FROM producers INNER JOIN dispensaries ON producers.region \u003d dispensaries.region GROUP BY producers.name, dispensaries.name;", + "sql_explanation": "Calculate the total revenue for each producer in the Southeast by summing the product of their quantity, the dispensary\u0027s sales, and their price where the region matches." +}, { + "id": "448", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the average price per gram of cannabis flower sold by each dispensary in the city of Toronto in the month of February 2022?", + "sql_context": "CREATE TABLE Dispensaries (id INT, name VARCHAR(255), city VARCHAR(255), state VARCHAR(255));CREATE TABLE Inventory (id INT, dispensary_id INT, price DECIMAL(10, 2), product_type VARCHAR(255), grams INT, month INT, year INT);INSERT INTO Dispensaries (id, name, city, state) VALUES (1, \u0027CannaCorp\u0027, \u0027Toronto\u0027, \u0027ON\u0027);INSERT INTO Inventory (id, dispensary_id, price, product_type, grams, month, year) VALUES (1, 1, 20, \u0027flower\u0027, 3.5, 2, 2022);", + "sql": "SELECT d.name, AVG(i.price/i.grams) as avg_price_per_gram FROM Dispensaries d JOIN Inventory i ON d.id \u003d i.dispensary_id WHERE d.city \u003d \u0027Toronto\u0027 AND i.product_type \u003d \u0027flower\u0027 AND i.month \u003d 2 AND i.year \u003d 2022 GROUP BY d.name;", + "sql_explanation": "This query joins the Dispensaries and Inventory tables on the dispensary_id and dispensary.id foreign keys. It then filters for flower sales in Toronto in February 2022, groups by dispensary name to calculate the average price per gram of cannabis flower sold by each dispensary." +}, { + "id": "583", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total weight of cannabis vape cartridges sold by dispensaries in the city of Seattle in the month of April 2022?", + "sql_context": "CREATE TABLE Dispensaries (id INT, name VARCHAR(255), city VARCHAR(255), state VARCHAR(255));CREATE TABLE Inventory (id INT, dispensary_id INT, weight DECIMAL(10, 2), product_type VARCHAR(255), month INT, year INT);INSERT INTO Dispensaries (id, name, city, state) VALUES (1, \u0027Seattle Cannabis Co\u0027, \u0027Seattle\u0027, \u0027WA\u0027);INSERT INTO Inventory (id, dispensary_id, weight, product_type, month, year) VALUES (1, 1, 50, \u0027vape\u0027, 4, 2022);", + "sql": "SELECT d.name, SUM(i.weight) as total_weight FROM Dispensaries d JOIN Inventory i ON d.id \u003d i.dispensary_id WHERE d.city \u003d \u0027Seattle\u0027 AND i.product_type \u003d \u0027vape\u0027 AND i.month \u003d 4 AND i.year \u003d 2022 GROUP BY d.name;", + "sql_explanation": "This query joins the Dispensaries and Inventory tables on the dispensary_id and dispensary.id foreign keys. It then filters for vape cartridge sales in Seattle in April 2022, and groups by dispensary name to calculate the total weight of cannabis vape cartridges sold by each dispensary." +}, { + "id": "655", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total quantity of cannabis sold in Washington dispensaries in the first half of 2022.", + "sql_context": "CREATE TABLE products (type VARCHAR(10), quantity INT); INSERT INTO products (type, quantity) VALUES (\u0027flower\u0027, 1200), (\u0027concentrate\u0027, 1500), (\u0027edible\u0027, 800); CREATE TABLE dispensaries (state VARCHAR(20), sales INT); INSERT INTO dispensaries (state, sales) VALUES (\u0027Washington\u0027, 2300), (\u0027Washington\u0027, 2700); CREATE TABLE time_periods (half INT); INSERT INTO time_periods (half) VALUES (1), (2);", + "sql": "SELECT SUM(products.quantity) FROM products JOIN dispensaries ON TRUE WHERE products.type IN (\u0027flower\u0027, \u0027concentrate\u0027, \u0027edible\u0027) AND dispensaries.state \u003d \u0027Washington\u0027 AND time_periods.half BETWEEN 1 AND 2;", + "sql_explanation": "Join the products and dispensaries tables, filter for flower, concentrate, and edible products, Washington dispensaries, and the first half of 2022, then sum the total quantity of cannabis sold." +}, { + "id": "677", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 3 product categories with the highest total sales revenue", + "sql_context": "CREATE TABLE sales_data_3 (sale_id INT, product_id INT, sale_date DATE, price DECIMAL(5,2), quantity INT); INSERT INTO sales_data_3 (sale_id, product_id, sale_date, price, quantity) VALUES (11, 1, \u00272021-07-01\u0027, 12.50, 10), (12, 2, \u00272021-08-02\u0027, 13.00, 15), (13, 3, \u00272021-09-03\u0027, 12.75, 12), (14, 4, \u00272021-10-04\u0027, 45.00, 5), (15, 5, \u00272021-11-05\u0027, 35.00, 3);", + "sql": "SELECT category, SUM(price * quantity) AS total_sales_revenue FROM sales_data_3 JOIN products ON sales_data_3.product_id \u003d products.product_id GROUP BY category ORDER BY total_sales_revenue DESC LIMIT 3;", + "sql_explanation": "This SQL query finds the top 3 product categories with the highest total sales revenue. It does this by joining the sales_data_3 table with the products table on the product_id column and then grouping the result by the category column. It then calculates the sum of the price times quantity (total sales revenue) for each group and orders the result by the total sales revenue in descending order. It finally limits the result to the top 3 rows." +}, { + "id": "894", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total weight of cannabis sold by each dispensary in Washington in Q3 2021?", + "sql_context": "CREATE TABLE Dispensaries (id INT, name TEXT, state TEXT); INSERT INTO Dispensaries (id, name, state) VALUES (1, \u0027Dispensary A\u0027, \u0027Washington\u0027), (2, \u0027Dispensary B\u0027, \u0027Washington\u0027); CREATE TABLE Sales (dispensary_id INT, date DATE, weight_sold INT); INSERT INTO Sales (dispensary_id, date, weight_sold) VALUES (1, \u00272021-07-01\u0027, 50), (1, \u00272021-07-02\u0027, 60), (1, \u00272021-08-01\u0027, 55), (2, \u00272021-07-01\u0027, 40), (2, \u00272021-07-03\u0027, 45), (2, \u00272021-08-01\u0027, 42);", + "sql": "SELECT d.name, SUM(s.weight_sold) as total_weight_sold FROM Dispensaries d INNER JOIN Sales s ON d.id \u003d s.dispensary_id WHERE s.date BETWEEN \u00272021-07-01\u0027 AND \u00272021-09-30\u0027 GROUP BY d.name;", + "sql_explanation": "This query joins the Dispensaries and Sales tables on the dispensary_id field. It then filters the Sales table to only include sales that occurred in Q3 2021. Finally, it groups the results by dispensary name and calculates the total weight of cannabis sold by each dispensary in Q3 2021." +}, { + "id": "918", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total revenue of sativa-based products sold in Oregon dispensaries in Q1 2022.", + "sql_context": "CREATE TABLE products (type VARCHAR(10), category VARCHAR(10), price DECIMAL(5,2), quantity INT); INSERT INTO products (type, category, price, quantity) VALUES (\u0027oil\u0027, \u0027sativa\u0027, 70, 50), (\u0027flower\u0027, \u0027sativa\u0027, 100, 75), (\u0027edible\u0027, \u0027sativa\u0027, 60, 40); CREATE TABLE dispensaries (state VARCHAR(20), sales INT); INSERT INTO dispensaries (state, sales) VALUES (\u0027Oregon\u0027, 1800), (\u0027Oregon\u0027, 2000); CREATE TABLE time_periods (quarter INT); INSERT INTO time_periods (quarter) VALUES (1), (2);", + "sql": "SELECT SUM(products.price * products.quantity) FROM products JOIN dispensaries ON TRUE WHERE products.category \u003d \u0027sativa\u0027 AND dispensaries.state \u003d \u0027Oregon\u0027 AND time_periods.quarter \u003d 1;", + "sql_explanation": "Join the products and dispensaries tables, filter for sativa-based products and Oregon dispensaries, and Q1 2022, then sum the total revenue of sativa-based products sold." +}, { + "id": "1360", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total revenue for each dispensary in California in Q1 2022?", + "sql_context": "CREATE TABLE Dispensaries (id INT, name TEXT, state TEXT); INSERT INTO Dispensaries (id, name, state) VALUES (1, \u0027Dispensary A\u0027, \u0027California\u0027); INSERT INTO Dispensaries (id, name, state) VALUES (2, \u0027Dispensary B\u0027, \u0027California\u0027); CREATE TABLE Sales (dispid INT, date DATE, revenue DECIMAL(10,2)); INSERT INTO Sales (dispid, date, revenue) VALUES (1, \u00272022-01-01\u0027, 15000); INSERT INTO Sales (dispid, date, revenue) VALUES (1, \u00272022-01-02\u0027, 16000); INSERT INTO Sales (dispid, date, revenue) VALUES (2, \u00272022-01-01\u0027, 12000);", + "sql": "SELECT d.name, SUM(s.revenue) as q1_revenue FROM Dispensaries d JOIN Sales s ON d.id \u003d s.dispid WHERE s.date BETWEEN \u00272022-01-01\u0027 AND \u00272022-03-31\u0027 GROUP BY d.name;", + "sql_explanation": "Joins Dispensaries and Sales tables, filters for Q1 2022, and groups by dispensary name to calculate total revenue." +}, { + "id": "1905", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which materials were used in each bridge and their total quantities?", + "sql_context": "CREATE TABLE bridges (id INT, name VARCHAR(255), location VARCHAR(255), budget FLOAT); CREATE TABLE bridge_resources (id INT, bridge_id INT, type VARCHAR(255), quantity INT); INSERT INTO bridge_resources (id, bridge_id, type, quantity) VALUES (1, 1, \u0027Steel\u0027, 150000);", + "sql": "SELECT b.name, r.type, SUM(r.quantity) as total_quantity FROM bridges b JOIN bridge_resources r ON b.id \u003d r.bridge_id GROUP BY b.name, r.type;", + "sql_explanation": "The SQL query joins the \u0027bridges\u0027 table with the \u0027bridge_resources\u0027 table on the bridge_id. Then, the query sums the quantities for each bridge name and resource type, providing a total quantity for each material used in each bridge project." +}, { + "id": "2052", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average maximum temperature for each region where a weather station is located?", + "sql_context": "CREATE TABLE WeatherStations (id INT, name TEXT, region TEXT); CREATE TABLE TemperatureReadings (id INT, weatherStationId INT, maxTemp DECIMAL(5,2), readingDate DATE);", + "sql": "SELECT ws.region, AVG(tr.maxTemp) FROM WeatherStations ws JOIN TemperatureReadings tr ON ws.id \u003d tr.weatherStationId GROUP BY ws.region;", + "sql_explanation": "The SQL query joins the WeatherStations and TemperatureReadings tables based on the weatherStationId. It then calculates the average maximum temperature (maxTemp) for each region using the AVG function and groups the results by region." +}, { + "id": "3275", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all the bridges and their construction material from the \u0027bridges\u0027 and \u0027construction_materials\u0027 tables.", + "sql_context": "CREATE TABLE bridges (id INT, name VARCHAR(255), location VARCHAR(255)); CREATE TABLE construction_materials (bridge_id INT, material VARCHAR(255));", + "sql": "SELECT b.name, cm.material FROM bridges b LEFT JOIN construction_materials cm ON b.id \u003d cm.bridge_id;", + "sql_explanation": "This query executes a left join on \u0027bridges\u0027 and \u0027construction_materials\u0027 tables, based on their \u0027id\u0027 and \u0027bridge_id\u0027 columns respectively. It retrieves all bridge names and their corresponding construction materials." +}, { + "id": "108", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all agricultural innovation metrics in the \u0027rural_development\u0027 database, along with the number of farmers who have adopted each innovation.", + "sql_context": "CREATE TABLE agricultural_innovations (innovation_id INT, innovation_name VARCHAR(50), region VARCHAR(50)); CREATE TABLE farmers_adopting_innovations (farmer_id INT, innovation_id INT, region VARCHAR(50)); INSERT INTO agricultural_innovations (innovation_id, innovation_name, region) VALUES (1, \u0027Drip Irrigation\u0027, \u0027Midwest\u0027), (2, \u0027Precision Agriculture\u0027, \u0027Southeast\u0027); INSERT INTO farmers_adopting_innovations (farmer_id, innovation_id, region) VALUES (1, 1, \u0027Midwest\u0027), (2, 1, \u0027Midwest\u0027), (3, 2, \u0027Southeast\u0027);", + "sql": "SELECT agricultural_innovations.innovation_name, COUNT(farmers_adopting_innovations.farmer_id) FROM agricultural_innovations LEFT JOIN farmers_adopting_innovations ON agricultural_innovations.innovation_id \u003d farmers_adopting_innovations.innovation_id GROUP BY agricultural_innovations.innovation_name;", + "sql_explanation": "This SQL query lists all agricultural innovation metrics by using a left join to combine the \u0027agricultural_innovations\u0027 and \u0027farmers_adopting_innovations\u0027 tables on the \u0027innovation_id\u0027 column. The result is then grouped by the \u0027innovation_name\u0027 column and the number of farmers who have adopted each innovation is calculated by using the COUNT function on the \u0027farmer_id\u0027 column." +}, { + "id": "1197", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the number of agricultural innovation initiatives for historically underrepresented communities.", + "sql_context": "CREATE TABLE Communities (id INT, name VARCHAR(255), type VARCHAR(255)); INSERT INTO Communities (id, name, type) VALUES (1, \u0027C1\u0027, \u0027Historically Underrepresented\u0027), (2, \u0027C2\u0027, \u0027Mainstream\u0027), (3, \u0027C3\u0027, \u0027Historically Underrepresented\u0027); CREATE TABLE Innovations (id INT, community_id INT, innovation_name VARCHAR(255), date DATE); INSERT INTO Innovations (id, community_id, innovation_name, date) VALUES (1, 1, \u0027Solar-Powered Irrigation\u0027, \u00272021-03-01\u0027), (2, 3, \u0027Drought-Resistant Crops\u0027, \u00272020-09-15\u0027), (3, 2, \u0027Precision Agriculture\u0027, \u00272019-07-01\u0027);", + "sql": "SELECT COUNT(Innovations.id) FROM Innovations INNER JOIN Communities ON Innovations.community_id \u003d Communities.id WHERE Communities.type \u003d \u0027Historically Underrepresented\u0027;", + "sql_explanation": "We start with an INNER JOIN between the Innovations and Communities tables, using the community_id to match innovations to communities. Then, we use the COUNT() function to count the number of innovations with a community type of \u0027Historically Underrepresented\u0027." +}, { + "id": "2313", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of farmers who have adopted precision agriculture techniques in the Mekong Delta region in Vietnam?", + "sql_context": "CREATE TABLE Farmers (FarmerID INT, Name VARCHAR(50), Age INT, Gender VARCHAR(10), Location VARCHAR(50)); INSERT INTO Farmers (FarmerID, Name, Age, Gender, Location) VALUES (1, \u0027Nguyen Van A\u0027, 45, \u0027Male\u0027, \u0027Mekong Delta\u0027); INSERT INTO Farmers (FarmerID, Name, Age, Gender, Location) VALUES (2, \u0027Tran Thi B\u0027, 38, \u0027Female\u0027, \u0027Mekong Delta\u0027); CREATE TABLE PrecisionAgriculture (FarmerID INT, Technique VARCHAR(50), Year INT); INSERT INTO PrecisionAgriculture (FarmerID, Technique, Year) VALUES (1, \u0027Drip Irrigation\u0027, 2020); INSERT INTO PrecisionAgriculture (FarmerID, Technique, Year) VALUES (2, \u0027Soil Sensing\u0027, 2019);", + "sql": "SELECT COUNT(*) FROM Farmers F INNER JOIN PrecisionAgriculture PA ON F.FarmerID \u003d PA.FarmerID WHERE F.Location \u003d \u0027Mekong Delta\u0027;", + "sql_explanation": "This SQL query joins the Farmers table and the PrecisionAgriculture table on FarmerID, filters for farmers from the Mekong Delta region, and then counts the number of records, which gives us the total number of farmers who have adopted precision agriculture techniques." +}, { + "id": "450", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the total cost of all astrophysics research projects led by researchers from the Canadian Space Agency, grouped by research publication year.", + "sql_context": "CREATE TABLE AstrophysicsResearch (id INT, title VARCHAR(500), abstract TEXT, publication_date DATE, lead_researcher INT, institution VARCHAR(500)); CREATE TABLE Researchers (id INT, name VARCHAR(50), age INT, gender VARCHAR(10), nationality VARCHAR(50), affiliation VARCHAR(500)); INSERT INTO Researchers (id, name, nationality) VALUES (1, \u0027Sarah Lee\u0027, \u0027Canadian\u0027); INSERT INTO AstrophysicsResearch (id, title, publication_date, lead_researcher) VALUES (1, \u0027Project A\u0027, \u00272020-01-01\u0027, 1);", + "sql": "SELECT YEAR(publication_date) AS publication_year, SUM(r.total_cost) AS total_cost FROM AstrophysicsResearch r JOIN Researchers re ON r.lead_researcher \u003d re.id WHERE re.nationality \u003d \u0027Canadian\u0027 GROUP BY YEAR(publication_date);", + "sql_explanation": "This query joins the AstrophysicsResearch and Researchers tables on lead_researcher. It then filters for researchers from the Canadian Space Agency, groups the results by publication year, and calculates the total cost of research projects for each year." +}, { + "id": "525", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many missions have been aborted by each spacecraft manufacturer?", + "sql_context": "CREATE TABLE spacecraft_manufacturers (manufacturer_id INT, name VARCHAR(100)); CREATE TABLE space_missions (mission_id INT, spacecraft_manufacturer_id INT, mission_status VARCHAR(10)); INSERT INTO spacecraft_manufacturers (manufacturer_id, name) VALUES (1, \u0027SpaceX\u0027), (2, \u0027NASA\u0027), (3, \u0027Blue Origin\u0027); INSERT INTO space_missions (mission_id, spacecraft_manufacturer_id, mission_status) VALUES (1, 1, \u0027success\u0027), (2, 1, \u0027aborted\u0027), (3, 2, \u0027success\u0027), (4, 2, \u0027aborted\u0027), (5, 3, \u0027success\u0027);", + "sql": "SELECT sm.name, COUNT(*) as aborted_missions FROM spacecraft_manufacturers sm INNER JOIN space_missions smm ON sm.manufacturer_id \u003d smm.spacecraft_manufacturer_id WHERE smm.mission_status \u003d \u0027aborted\u0027 GROUP BY sm.name;", + "sql_explanation": "This SQL query counts the number of missions that have been aborted by each spacecraft manufacturer. It does this by joining the spacecraft_manufacturers and space_missions tables on their common column spacecraft_manufacturer_id, and then filtering the results to only include rows where the mission_status is \u0027aborted\u0027. Finally, it groups the results by the name of the manufacturer and returns the count of these rows." +}, { + "id": "555", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all spacecraft that have been used in missions to Jupiter\u0027s moons.", + "sql_context": "CREATE TABLE Spacecraft (SpacecraftID INT, Name VARCHAR(50), Manufacturer VARCHAR(50), LaunchDate DATE); CREATE TABLE SpaceMissions (MissionID INT, SpacecraftID INT, Destination VARCHAR(50)); INSERT INTO Spacecraft VALUES (1, \u0027Juno\u0027, \u0027NASA\u0027, \u00272011-08-05\u0027); INSERT INTO SpaceMissions VALUES (1, 1, \u0027Jupiter\u0027);", + "sql": "SELECT Spacecraft.Name FROM Spacecraft INNER JOIN SpaceMissions ON Spacecraft.SpacecraftID \u003d SpaceMissions.SpacecraftID WHERE SpaceMissions.Destination LIKE \u0027%Jupiter%\u0027 AND SpaceMissions.Destination LIKE \u0027%moons%\u0027;", + "sql_explanation": "This query lists all spacecraft that have been used in missions to Jupiter\u0027s moons by joining the Spacecraft and SpaceMissions tables on the SpacecraftID column, filtering for rows where the Destination column contains both the word \u0027Jupiter\u0027 and the word \u0027moons\u0027, and selecting the Name column from the Spacecraft table." +}, { + "id": "1007", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum mass of spacecraft that visited each planet?", + "sql_context": "CREATE TABLE SpacecraftVisits (spacecraft_id INT, planet VARCHAR(50), visit_date DATE); CREATE TABLE Spacecraft (id INT, name VARCHAR(50), manufacturer VARCHAR(50), mass FLOAT); INSERT INTO Spacecraft (id, name, manufacturer, mass) VALUES (1, \u0027Voyager 1\u0027, \u0027SpaceCorp\u0027, 770.0); INSERT INTO SpacecraftVisits (spacecraft_id, planet, visit_date) VALUES (1, \u0027Jupiter\u0027, \u00272022-09-01\u0027); INSERT INTO Spacecraft (id, name, manufacturer, mass) VALUES (2, \u0027Cassini\u0027, \u0027NASA\u0027, 5650.0); INSERT INTO SpacecraftVisits (spacecraft_id, planet, visit_date) VALUES (2, \u0027Saturn\u0027, \u00272022-09-02\u0027);", + "sql": "SELECT SpacecraftVisits.planet, MAX(Spacecraft.mass) FROM SpacecraftVisits INNER JOIN Spacecraft ON SpacecraftVisits.spacecraft_id \u003d Spacecraft.id GROUP BY SpacecraftVisits.planet;", + "sql_explanation": "This query finds the maximum mass of spacecraft that visited each planet by performing an inner join between the \u0027SpacecraftVisits\u0027 and \u0027Spacecraft\u0027 tables on the \u0027spacecraft_id\u0027 and \u0027id\u0027 columns, respectively. It then groups the results by the \u0027planet\u0027 column and calculates the maximum mass of spacecraft for each group." +}, { + "id": "1313", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the astronauts that have not been on any mission for \u0027SpacePioneers\u0027?", + "sql_context": "CREATE TABLE Astronauts (id INT, name VARCHAR(50), organization VARCHAR(50)); CREATE TABLE Missions (id INT, astronaut_id INT, company VARCHAR(50), mission_type VARCHAR(50)); INSERT INTO Astronauts (id, name, organization) VALUES (1, \u0027Alice\u0027, \u0027SpacePioneers\u0027), (2, \u0027Bob\u0027, \u0027SpacePioneers\u0027), (3, \u0027Charlie\u0027, \u0027SpacePioneers\u0027); INSERT INTO Missions (id, astronaut_id, company) VALUES (1, 1, \u0027SpacePioneers\u0027), (2, 1, \u0027SpacePioneers\u0027), (3, 2, \u0027SpacePioneers\u0027);", + "sql": "SELECT a.name FROM Astronauts a LEFT JOIN Missions m ON a.id \u003d m.astronaut_id AND a.organization \u003d m.company WHERE m.id IS NULL AND a.organization \u003d \u0027SpacePioneers\u0027;", + "sql_explanation": "The SQL query identifies the astronauts who have not been on any mission for \u0027SpacePioneers\u0027 by performing a left join of Astronauts and Missions tables on astronaut_id and company, filtering for the organization \u0027SpacePioneers\u0027, and then selecting the names of the astronauts whose id is null in the Missions table." +}, { + "id": "1757", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total cost of astronaut medical treatments?", + "sql_context": "CREATE TABLE astronauts (id INT, name VARCHAR(50));CREATE TABLE medical_treatments (id INT, astronaut_id INT, cost INT); INSERT INTO astronauts VALUES (1, \u0027Melissa Lewis\u0027); INSERT INTO medical_treatments VALUES (1, 1), (2, 1), (3, 1); INSERT INTO medical_treatments VALUES (1, 1, 5000), (2, 1, 7000), (3, 1, 10000);", + "sql": "SELECT SUM(medical_treatments.cost) as total_cost FROM medical_treatments INNER JOIN astronauts ON medical_treatments.astronaut_id \u003d astronauts.id;", + "sql_explanation": "We calculate the total cost of astronaut medical treatments by performing a sum of the cost column." +}, { + "id": "2057", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the earliest launch date of a space mission involving an astronaut from Japan?", + "sql_context": "CREATE TABLE Astronauts (AstronautID INT, Name VARCHAR(50), Nationality VARCHAR(50));CREATE TABLE SpaceMissions (MissionID INT, AstronautID INT, Name VARCHAR(50), LaunchDate DATE); INSERT INTO Astronauts (AstronautID, Name, Nationality) VALUES (1, \u0027Takao Doi\u0027, \u0027Japan\u0027), (2, \u0027Naoko Yamazaki\u0027, \u0027Japan\u0027), (3, \u0027Sōichi Noguchi\u0027, \u0027Japan\u0027); INSERT INTO SpaceMissions (MissionID, AstronautID, Name, LaunchDate) VALUES (1, 1, \u0027STS-87\u0027, \u00271997-09-19\u0027), (2, 2, \u0027STS-131\u0027, \u00272010-04-05\u0027), (3, 3, \u0027STS-114\u0027, \u00272005-07-26\u0027);", + "sql": "SELECT MIN(sm.LaunchDate) FROM SpaceMissions sm INNER JOIN Astronauts a ON sm.AstronautID \u003d a.AstronautID WHERE a.Nationality \u003d \u0027Japan\u0027;", + "sql_explanation": "The SQL query first joins the SpaceMissions and Astronauts tables on the AstronautID. Then, it filters the records where the Nationality is \u0027Japan\u0027. Lastly, it calculates the earliest launch date by using the MIN function." +}, { + "id": "2309", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total cost of space missions led by astronauts from the United States?", + "sql_context": "CREATE TABLE Astronauts (AstronautID INT, Name VARCHAR(50), Nationality VARCHAR(50));CREATE TABLE SpaceMissions (MissionID INT, AstronautID INT, Name VARCHAR(50), Cost FLOAT); INSERT INTO Astronauts (AstronautID, Name, Nationality) VALUES (1, \u0027Mark Watney\u0027, \u0027USA\u0027), (2, \u0027Melissa Lewis\u0027, \u0027USA\u0027); INSERT INTO SpaceMissions (MissionID, AstronautID, Name, Cost) VALUES (1, 1, \u0027Ares 3\u0027, 2000000), (2, 2, \u0027Hermes 1\u0027, 3000000);", + "sql": "SELECT SUM(sm.Cost) FROM SpaceMissions sm INNER JOIN Astronauts a ON sm.AstronautID \u003d a.AstronautID WHERE a.Nationality \u003d \u0027USA\u0027;", + "sql_explanation": "The SQL query first joins the SpaceMissions and Astronauts tables on the AstronautID. Then, it filters the records where the Nationality is \u0027USA\u0027. Lastly, it calculates the total cost of space missions by using the SUM function." +}, { + "id": "2651", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum medical risk score of astronauts who have flown missions to Mars?", + "sql_context": "CREATE TABLE Astronauts (ID INT, Name VARCHAR(255), MedicalRisk INT); CREATE TABLE Missions (ID INT, Destination VARCHAR(255)); INSERT INTO Astronauts (ID, Name, MedicalRisk) VALUES (1, \u0027Astronaut1\u0027, 10), (2, \u0027Astronaut2\u0027, 20), (3, \u0027Astronaut3\u0027, 30); INSERT INTO Missions (ID, Destination) VALUES (1, \u0027Mars\u0027), (2, \u0027Moon\u0027), (3, \u0027Mars\u0027);", + "sql": "SELECT MAX(MedicalRisk) FROM Astronauts INNER JOIN Missions ON Astronauts.ID \u003d Missions.ID WHERE Destination \u003d \u0027Mars\u0027;", + "sql_explanation": "The SQL query calculates the maximum medical risk score of astronauts who have flown missions to Mars by joining the Astronauts and Missions tables on the ID column, filtering for missions to Mars, and then calculating the maximum medical risk score." +}, { + "id": "90", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average daily transaction count for the last month, split by customer gender and day of the week?", + "sql_context": "CREATE TABLE transactions (transaction_id INT, customer_id INT, product_id INT, category_id INT, transaction_date DATE, amount DECIMAL(10,2), gender VARCHAR(10)); CREATE TABLE customers (customer_id INT, age INT, name VARCHAR(255));", + "sql": "SELECT c.gender, DATE_FORMAT(t.transaction_date, \u0027%W\u0027) as day_of_week, AVG(COUNT(t.transaction_id)) as avg_daily_transaction_count FROM customers c INNER JOIN transactions t ON c.customer_id \u003d t.customer_id WHERE t.transaction_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH) GROUP BY c.gender, day_of_week;", + "sql_explanation": "This query starts by selecting customer gender, day of the week, and the average daily transaction count. It then joins customers and transactions tables on customer ID. The WHERE clause filters transactions within the past month. The GROUP BY clause groups the result by customer gender and day of the week. The subquery counts the number of transactions per day." +}, { + "id": "171", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average transaction amount in EUR by city for the month of May 2022?", + "sql_context": "CREATE TABLE customers (customer_id INT, customer_city VARCHAR(30)); CREATE TABLE transactions (transaction_id INT, customer_id INT, transaction_amount DECIMAL(10,2), transaction_date DATE, transaction_currency VARCHAR(3));", + "sql": "SELECT customer_city, AVG(transaction_amount) as average_transaction_amount FROM customers JOIN transactions ON customers.customer_id \u003d transactions.customer_id WHERE transaction_date BETWEEN \u00272022-05-01\u0027 AND \u00272022-05-31\u0027 AND transaction_currency \u003d \u0027EUR\u0027 GROUP BY customer_city;", + "sql_explanation": "The SQL query calculates the average transaction amount in EUR by city for the month of May 2022. It does this by summing the transaction_amount column, grouping the results by the customer_city column, and filtering the transactions to only those that occurred in the month of May 2022 and were in EUR." +}, { + "id": "192", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total transaction amount by customer name for the year 2021, sorted in descending order?", + "sql_context": "CREATE TABLE customers (customer_id INT, customer_name VARCHAR(50), account_number VARCHAR(20), primary_contact VARCHAR(50)); CREATE TABLE transactions (transaction_id INT, customer_id INT, transaction_type VARCHAR(20), transaction_amount DECIMAL(10,2), transaction_date DATE);", + "sql": "SELECT c.customer_name, SUM(t.transaction_amount) as total_transaction_amount FROM customers c JOIN transactions t ON c.customer_id \u003d t.customer_id WHERE transaction_date BETWEEN \u00272021-01-01\u0027 AND \u00272021-12-31\u0027 GROUP BY c.customer_name ORDER BY total_transaction_amount DESC;", + "sql_explanation": "The SQL query calculates the total transaction amount for each customer by name for the year 2021, sorted in descending order. It does this by joining the customers and transactions tables on the customer_id column, summing the transaction_amount column, and grouping the results by the customer_name column. The WHERE clause filters the transactions to only those that occurred in the year 2021. The results are then ordered in descending order by total transaction amount." +}, { + "id": "229", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average daily transaction value for the last week, split by customer location and payment method?", + "sql_context": "CREATE TABLE transactions (transaction_id INT, customer_id INT, product_id INT, category_id INT, transaction_date DATE, amount DECIMAL(10,2), payment_method VARCHAR(50)); CREATE TABLE customers (customer_id INT, age INT, gender VARCHAR(10), location VARCHAR(255));", + "sql": "SELECT c.location, t.payment_method, AVG(t.amount) as avg_daily_transaction_value FROM transactions t INNER JOIN customers c ON t.customer_id \u003d c.customer_id WHERE t.transaction_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 WEEK) GROUP BY c.location, t.payment_method;", + "sql_explanation": "This query starts by selecting customer location, payment method, and the average transaction amount. It then joins transactions and customers tables on customer ID. The WHERE clause filters transactions within the last week. The GROUP BY clause groups the result by customer location and payment method." +}, { + "id": "396", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total transaction amounts and average returns for all strategies with the word \"growth\" in their name.", + "sql_context": "CREATE TABLE Transactions (TransactionID INT, TransactionAmount DECIMAL(10,2), StrategyID INT); INSERT INTO Transactions (TransactionID, TransactionAmount, StrategyID) VALUES (1, 1000.00, 1); INSERT INTO Transactions (TransactionID, TransactionAmount, StrategyID) VALUES (2, 1500.00, 1); INSERT INTO Transactions (TransactionID, TransactionAmount, StrategyID) VALUES (3, 800.00, 2); INSERT INTO Transactions (TransactionID, TransactionAmount, StrategyID) VALUES (4, 1200.00, 3); CREATE TABLE InvestmentStrategies (StrategyID INT, StrategyName VARCHAR(100), Returns DECIMAL(10,2)); INSERT INTO InvestmentStrategies (StrategyID, StrategyName, Returns) VALUES (1, \u0027Equity Growth\u0027, 12.5); INSERT INTO InvestmentStrategies (StrategyID, StrategyName, Returns) VALUES (2, \u0027Value Growth\u0027, 10.2); INSERT INTO InvestmentStrategies (StrategyID, StrategyName, Returns) VALUES (3, \u0027Dividend Growth\u0027, 9.1);", + "sql": "SELECT SUM(TransactionAmount) AS TotalTransactionAmount, AVG(Returns) AS AverageReturns FROM Transactions INNER JOIN InvestmentStrategies ON Transactions.StrategyID \u003d InvestmentStrategies.StrategyID WHERE StrategyName LIKE \u0027%growth%\u0027;", + "sql_explanation": "Joining the Transactions and InvestmentStrategies tables, then filtering for strategies with \"growth\" in their name and calculating the total transaction amount and average returns." +}, { + "id": "592", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total transaction amount by currency for each day in the past week?", + "sql_context": "CREATE TABLE transactions (transaction_id INT, customer_id INT, amount DECIMAL(10,2), transaction_date DATE, currency VARCHAR(50)); CREATE VIEW daily_transactions AS SELECT transaction_date, SUM(amount) as total_amount FROM transactions GROUP BY transaction_date;", + "sql": "SELECT dt.transaction_date, t.currency, SUM(t.amount) as currency_total FROM daily_transactions dt INNER JOIN transactions t ON dt.transaction_date \u003d t.transaction_date GROUP BY dt.transaction_date, t.currency;", + "sql_explanation": "The SQL query calculates the total transaction amount by currency for each day in the past week by joining the \u0027transactions\u0027 table and the \u0027daily_transactions\u0027 view on the \u0027transaction_date\u0027 column. It then groups them by \u0027transaction_date\u0027 and \u0027currency\u0027 to calculate the total transaction amount by currency for each day." +}, { + "id": "641", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total investment in the technology sector for customers in the northern region?", + "sql_context": "CREATE TABLE energy_investments (customer_id INT, investment FLOAT, investment_type VARCHAR(10), region VARCHAR(10)); INSERT INTO energy_investments (customer_id, investment, investment_type, region) VALUES (1, 5000, \u0027renewable\u0027, \u0027north\u0027), (2, 3000, \u0027non-renewable\u0027, \u0027south\u0027), (3, 7000, \u0027renewable\u0027, \u0027west\u0027); CREATE TABLE customer_data (customer_id INT, name VARCHAR(20), region VARCHAR(10)); INSERT INTO customer_data (customer_id, name, region) VALUES (1, \u0027John Doe\u0027, \u0027north\u0027), (2, \u0027Jane Smith\u0027, \u0027south\u0027), (3, \u0027Mary Johnson\u0027, \u0027west\u0027);", + "sql": "SELECT SUM(investment) FROM energy_investments INNER JOIN customer_data ON energy_investments.customer_id \u003d customer_data.customer_id WHERE investment_type \u003d \u0027technology\u0027 AND customer_data.region \u003d \u0027north\u0027;", + "sql_explanation": "This SQL query calculates the total investment in the technology sector for customers in the northern region by summing the investment column where investment_type is \u0027technology\u0027 and the customer region is \u0027north\u0027. The query uses an inner join to combine the energy_investments and customer_data tables on the customer_id column." +}, { + "id": "750", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which investment strategies have a return on investment (ROI) greater than 5% and have at least one client utilizing them?", + "sql_context": "CREATE TABLE InvestmentStrategies (StrategyID int, StrategyName varchar(50), ROI decimal(5,2)); INSERT INTO InvestmentStrategies (StrategyID, StrategyName, ROI) VALUES (1, \u0027Conservative\u0027, 2), (2, \u0027Moderate\u0027, 3), (3, \u0027Aggressive\u0027, 5), (4, \u0027High Risk\u0027, 10); CREATE TABLE ClientStrategies (ClientID int, StrategyID int); INSERT INTO ClientStrategies (ClientID, StrategyID) VALUES (10, 1), (11, 1), (12, 2), (13, 3), (14, 2), (15, 4);", + "sql": "SELECT i.StrategyName, i.ROI FROM InvestmentStrategies i INNER JOIN ClientStrategies cs ON i.StrategyID \u003d cs.StrategyID WHERE i.ROI \u003e 5 GROUP BY i.StrategyName, i.ROI HAVING COUNT(cs.ClientID) \u003e 0;", + "sql_explanation": "The SQL query performs an inner join between the InvestmentStrategies and ClientStrategies tables based on the StrategyID. It then filters for investment strategies with an ROI greater than 5% and utilizes the HAVING clause to only return strategies that have at least one client utilizing them. The results are grouped by strategy name and ROI." +}, { + "id": "850", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total amount of deposits made by customers from California and Texas?", + "sql_context": "CREATE TABLE customer (customer_id INT, first_name VARCHAR(50), last_name VARCHAR(50), state VARCHAR(50)); INSERT INTO customer (customer_id, first_name, last_name, state) VALUES (1, \u0027John\u0027, \u0027Doe\u0027, \u0027NY\u0027), (2, \u0027Jane\u0027, \u0027Smith\u0027, \u0027NJ\u0027), (3, \u0027Maria\u0027, \u0027Garcia\u0027, \u0027CA\u0027), (4, \u0027David\u0027, \u0027Lee\u0027, \u0027TX\u0027); CREATE TABLE transactions (transaction_id INT, customer_id INT, transaction_date DATE, transaction_type VARCHAR(50), transaction_amount DECIMAL(10, 2)); INSERT INTO transactions (transaction_id, customer_id, transaction_date, transaction_type, transaction_amount) VALUES (1, 1, \u00272022-01-01\u0027, \u0027Withdrawal\u0027, 100), (2, 1, \u00272022-01-05\u0027, \u0027Deposit\u0027, 200), (3, 2, \u00272022-01-07\u0027, \u0027Withdrawal\u0027, 50), (4, 3, \u00272022-01-09\u0027, \u0027Deposit\u0027, 300), (5, 4, \u00272022-01-11\u0027, \u0027Withdrawal\u0027, 75), (6, 4, \u00272022-01-15\u0027, \u0027Deposit\u0027, 150);", + "sql": "SELECT SUM(transaction_amount) FROM transactions INNER JOIN customer ON transactions.customer_id \u003d customer.customer_id WHERE customer.state IN (\u0027CA\u0027, \u0027TX\u0027) AND transaction_type \u003d \u0027Deposit\u0027;", + "sql_explanation": "The SQL query joins the \"transactions\" and \"customer\" tables on the \"customer_id\" column. It then filters the results to only include deposits made by customers from California and Texas using the WHERE clause and the \"state\" column from the \"customer\" table. The SUM function is used to calculate the total amount of deposits." +}, { + "id": "893", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many customers from each country have an account balance greater than 10000?", + "sql_context": "CREATE TABLE customers (id INT, name VARCHAR(255), country VARCHAR(255)); INSERT INTO customers (id, name, country) VALUES (1, \u0027John Doe\u0027, \u0027USA\u0027), (2, \u0027Jane Smith\u0027, \u0027Canada\u0027), (3, \u0027Marie Lee\u0027, \u0027France\u0027); CREATE TABLE accounts (id INT, customer_id INT, balance DECIMAL(10, 2)); INSERT INTO accounts (id, customer_id, balance) VALUES (1, 1, 12000.00), (2, 1, 18000.00), (3, 2, 6000.00), (4, 3, 1500.00);", + "sql": "SELECT customers.country, COUNT(DISTINCT customers.id) FROM customers INNER JOIN accounts ON customers.id \u003d accounts.customer_id WHERE accounts.balance \u003e 10000 GROUP BY customers.country;", + "sql_explanation": "This query calculates the number of customers from each country who have an account balance greater than 10000 by joining the customers table with the accounts table on the customer_id column. It then filters the results to only include customers with a balance greater than 10000 using the WHERE clause and groups the results by country. Finally, it counts the number of unique customers in each group." +}, { + "id": "952", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of transactions for each investment strategy?", + "sql_context": "CREATE TABLE investment_strategies (strategy_id INT, strategy_name VARCHAR(50), client_id INT); INSERT INTO investment_strategies (strategy_id, strategy_name, client_id) VALUES (1, \u0027Equity\u0027, 1), (2, \u0027Fixed Income\u0027, 2), (3, \u0027Real Estate\u0027, 3), (4, \u0027Equity\u0027, 1), (5, \u0027Fixed Income\u0027, 2), (6, \u0027Equity\u0027, 3); CREATE TABLE transactions (transaction_id INT, strategy_id INT, amount DECIMAL(10,2)); INSERT INTO transactions (transaction_id, strategy_id, amount) VALUES (1, 1, 500.00), (2, 1, 1000.00), (3, 2, 250.00), (4, 3, 10000.00), (5, 3, 500.00), (6, 1, 250.00);", + "sql": "SELECT strategy_name, COUNT(*) AS total_transactions FROM investment_strategies JOIN transactions ON investment_strategies.client_id \u003d transactions.strategy_id GROUP BY strategy_name;", + "sql_explanation": "This query calculates the total number of transactions for each investment strategy. It does this by joining the investment_strategies and transactions tables on the client_id and strategy_id fields, respectively, and then grouping by strategy_name and calculating the count of transactions." +}, { + "id": "1100", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total transaction amount for each customer in the Southeast region?", + "sql_context": "CREATE TABLE customers (id INT, name VARCHAR(50), region VARCHAR(50)); INSERT INTO customers (id, name, region) VALUES (1, \u0027John Doe\u0027, \u0027Southeast\u0027), (2, \u0027Jane Smith\u0027, \u0027Northeast\u0027); CREATE TABLE transactions (customer_id INT, transaction_amount DECIMAL(10,2)); INSERT INTO transactions (customer_id, transaction_amount) VALUES (1, 200.00), (1, 300.00), (2, 100.00);", + "sql": "SELECT c.name, SUM(t.transaction_amount) as total_transaction_amount FROM customers c JOIN transactions t ON c.id \u003d t.customer_id WHERE c.region \u003d \u0027Southeast\u0027 GROUP BY c.name;", + "sql_explanation": "This query joins the customers and transactions tables on the customer_id field. It then filters for customers in the Southeast region and calculates the total transaction amount for each customer by grouping by the name field and summing the transaction_amount field." +}, { + "id": "1296", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the count of clients with credit card accounts in the Atlanta branch?", + "sql_context": "CREATE TABLE clients (client_id INT, name TEXT, dob DATE, branch TEXT);CREATE TABLE accounts (account_id INT, client_id INT, account_type TEXT);INSERT INTO clients VALUES (2, \u0027Jessica Smith\u0027, \u00271995-06-28\u0027, \u0027Atlanta\u0027);INSERT INTO accounts VALUES (102, 2, \u0027Credit Card\u0027);", + "sql": "SELECT COUNT(*) FROM clients INNER JOIN accounts ON clients.client_id \u003d accounts.client_id WHERE accounts.account_type \u003d \u0027Credit Card\u0027 AND clients.branch \u003d \u0027Atlanta\u0027;", + "sql_explanation": "The SQL query calculates the count of clients with credit card accounts in the Atlanta branch by counting all records after filtering for clients with credit card accounts and branch as Atlanta using inner join between clients and accounts table." +}, { + "id": "1312", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the Checking accounts without any transactions?", + "sql_context": "CREATE TABLE Accounts (AccountId INT, AccountType VARCHAR(20)); INSERT INTO Accounts (AccountId, AccountType) VALUES (1, \u0027Checking\u0027), (2, \u0027Savings\u0027); CREATE TABLE Transactions (TransactionId INT, AccountId INT, TransactionType VARCHAR(20), Amount DECIMAL(10,2)); INSERT INTO Transactions (TransactionId, AccountId, TransactionType, Amount) VALUES (1, 1, \u0027Deposit\u0027, 1000.00);", + "sql": "SELECT A.AccountId, A.AccountType FROM Accounts A LEFT JOIN Transactions T ON A.AccountId \u003d T.AccountId WHERE A.AccountType \u003d \u0027Checking\u0027 AND T.TransactionId IS NULL;", + "sql_explanation": "Select Checking accounts without any transactions by using a LEFT JOIN and filtering for NULL TransactionId." +}, { + "id": "1667", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount of transactions for each customer in the Southeast region?", + "sql_context": "CREATE TABLE customers (customer_id INT, name VARCHAR(50), region VARCHAR(20)); INSERT INTO customers (customer_id, name, region) VALUES (1, \u0027John Doe\u0027, \u0027Southeast\u0027), (2, \u0027Jane Smith\u0027, \u0027Northeast\u0027); CREATE TABLE transactions (transaction_id INT, customer_id INT, amount DECIMAL(10, 2)); INSERT INTO transactions (transaction_id, customer_id, amount) VALUES (1, 1, 100.00), (2, 1, 200.00), (3, 2, 50.00);", + "sql": "SELECT c.name, SUM(t.amount) FROM customers c INNER JOIN transactions t ON c.customer_id \u003d t.customer_id WHERE c.region \u003d \u0027Southeast\u0027 GROUP BY c.name;", + "sql_explanation": "This SQL query joins the customers and transactions tables on the customer_id field. It then filters for customers in the \u0027Southeast\u0027 region and calculates the total amount of transactions for each customer by grouping by the name field and summing the amount field." +}, { + "id": "1704", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total investment value in the European market for clients with a last name starting with \u0027B\u0027?", + "sql_context": "CREATE TABLE clients (client_id INT, name VARCHAR(50), region VARCHAR(50));CREATE TABLE investments (investment_id INT, client_id INT, market VARCHAR(50), value INT);INSERT INTO clients (client_id, name, region) VALUES (1, \u0027John Doe\u0027, \u0027North America\u0027), (2, \u0027Barbara Black\u0027, \u0027Europe\u0027);INSERT INTO investments (investment_id, client_id, market, value) VALUES (1, 1, \u0027US\u0027, 50000), (2, 2, \u0027Europe\u0027, 120000);", + "sql": "SELECT SUM(i.value) FROM clients c INNER JOIN investments i ON c.client_id \u003d i.client_id WHERE SUBSTRING(c.name, 1, 1) \u003d \u0027B\u0027 AND i.market \u003d \u0027Europe\u0027;", + "sql_explanation": "We perform an inner join between the clients and investments tables, filtering for rows where the first character of the client\u0027s name is \u0027B\u0027 and the market is Europe. Then, we calculate the sum of the investment values for these clients." +}, { + "id": "1766", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all clients with their age and the total number of investments they made?", + "sql_context": "CREATE TABLE clients (client_id INT, name TEXT, age INT, gender TEXT); INSERT INTO clients VALUES (1, \u0027John Doe\u0027, 35, \u0027Male\u0027), (2, \u0027Jane Smith\u0027, 45, \u0027Female\u0027), (3, \u0027Bob Johnson\u0027, 50, \u0027Male\u0027); CREATE TABLE investments (client_id INT, investment_type TEXT); INSERT INTO investments VALUES (1, \u0027Stocks\u0027), (1, \u0027Bonds\u0027), (2, \u0027Stocks\u0027), (2, \u0027Mutual Funds\u0027), (3, \u0027Mutual Funds\u0027), (3, \u0027Real Estate\u0027);", + "sql": "SELECT c.age, COUNT(i.investment_type) AS num_investments FROM clients c LEFT JOIN investments i ON c.client_id \u003d i.client_id GROUP BY c.client_id;", + "sql_explanation": "We perform a left join between the clients and investments tables based on the client_id. Then, we group the result by client_id and count the number of investments for each client using COUNT(*). Finally, we display the age column from the clients table and the calculated number of investments in the num_investments column." +}, { + "id": "1916", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average transaction amount for customers in the United Kingdom?", + "sql_context": "CREATE TABLE customers (customer_id INT, name TEXT, country TEXT); INSERT INTO customers (customer_id, name, country) VALUES (1, \u0027John Doe\u0027, \u0027UK\u0027), (2, \u0027Jane Smith\u0027, \u0027Canada\u0027); CREATE TABLE transactions (transaction_id INT, customer_id INT, amount DECIMAL(10, 2)); INSERT INTO transactions (transaction_id, customer_id, amount) VALUES (1, 1, 100.00), (2, 1, 200.00), (3, 2, 50.00);", + "sql": "SELECT AVG(amount) FROM transactions INNER JOIN customers ON transactions.customer_id \u003d customers.customer_id WHERE customers.country \u003d \u0027UK\u0027;", + "sql_explanation": "The SQL query joins the transactions and customers tables, filters for customers from the United Kingdom, and calculates the average amount of transactions." +}, { + "id": "1956", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total value of all loans issued to customers in London?", + "sql_context": "CREATE TABLE customers (id INT, name VARCHAR(50), city VARCHAR(50)); INSERT INTO customers (id, name, city) VALUES (1, \u0027John Doe\u0027, \u0027London\u0027); CREATE TABLE loans (id INT, customer_id INT, product VARCHAR(50), quantity INT, interest_rate DECIMAL(10,2), loaned_at TIMESTAMP); INSERT INTO loans (id, customer_id, product, quantity, interest_rate, loaned_at) VALUES (1, 1, \u0027Car Loan\u0027, 10000, 0.05, NOW());", + "sql": "SELECT SUM(l.quantity * l.interest_rate) as total_value FROM loans l INNER JOIN customers c ON l.customer_id \u003d c.id WHERE c.city \u003d \u0027London\u0027;", + "sql_explanation": "Joining the loans and customers tables on the customer_id foreign key, then calculating the total value of all loans issued to customers in London." +}, { + "id": "2071", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum transaction amount for clients in the Americas region?", + "sql_context": "CREATE TABLE clients (client_id INT, region VARCHAR(20)); INSERT INTO clients (client_id, region) VALUES (1, \u0027North America\u0027), (2, \u0027South America\u0027), (3, \u0027Europe\u0027); CREATE TABLE transactions (transaction_id INT, client_id INT, amount DECIMAL(10,2)); INSERT INTO transactions (transaction_id, client_id, amount) VALUES (1, 1, 500.00), (2, 1, 1000.00), (3, 2, 250.00), (4, 3, 10000.00);", + "sql": "SELECT MAX(amount) FROM transactions JOIN clients ON transactions.client_id \u003d clients.client_id WHERE clients.region \u003d \u0027North America\u0027;", + "sql_explanation": "This query calculates the maximum transaction amount for clients in the Americas region. It does this by joining the transactions and clients tables on the client_id field, then filtering for clients in the Americas region, and finally calculating the maximum of the amount field." +}, { + "id": "2117", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average transaction value for each customer in the \"credit_card\" table, grouped by their country?", + "sql_context": "CREATE TABLE customer (customer_id INT, name VARCHAR(50), country VARCHAR(50)); CREATE TABLE credit_card (transaction_id INT, customer_id INT, value DECIMAL(10,2), timestamp TIMESTAMP);", + "sql": "SELECT c.country, AVG(cc.value) as avg_value FROM customer c JOIN credit_card cc ON c.customer_id \u003d cc.customer_id GROUP BY c.country;", + "sql_explanation": "First, we join the \"customer\" and \"credit_card\" tables on the \"customer_id\" column. Then, we group the results by the \"country\" column and calculate the average transaction value for each group using the AVG() function." +}, { + "id": "2529", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum transaction amount for a customer in the Asia-Pacific region?", + "sql_context": "CREATE TABLE regions (customer_id INT, name VARCHAR(50), region VARCHAR(20)); INSERT INTO regions (customer_id, name, region) VALUES (1, \u0027John Doe\u0027, \u0027Asia-Pacific\u0027), (2, \u0027Jane Smith\u0027, \u0027Europe\u0027), (3, \u0027Alice Johnson\u0027, \u0027Asia-Pacific\u0027), (4, \u0027Bob Brown\u0027, \u0027Americas\u0027); CREATE TABLE transactions_5 (transaction_id INT, customer_id INT, amount DECIMAL(10, 2)); INSERT INTO transactions_5 (transaction_id, customer_id, amount) VALUES (1, 1, 500.00), (2, 1, 750.00), (3, 2, 300.00), (4, 3, 1000.00);", + "sql": "SELECT MAX(amount) FROM transactions_5 t JOIN regions r ON t.customer_id \u003d r.customer_id WHERE r.region \u003d \u0027Asia-Pacific\u0027;", + "sql_explanation": "The SQL query joins the transactions_5 table with the regions table on the customer_id field. It then filters the results to only include transactions for customers in the Asia-Pacific region and calculates the maximum transaction amount for those customers." +}, { + "id": "3098", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average balance for customers from Spain?", + "sql_context": "CREATE TABLE customers (id INT, name VARCHAR(255), country VARCHAR(255)); INSERT INTO customers (id, name, country) VALUES (1, \u0027John Doe\u0027, \u0027USA\u0027), (2, \u0027Jane Smith\u0027, \u0027Canada\u0027), (3, \u0027Jim Brown\u0027, \u0027UK\u0027), (4, \u0027Pedro Rodriguez\u0027, \u0027Spain\u0027); CREATE TABLE accounts (id INT, customer_id INT, balance DECIMAL(10, 2)); INSERT INTO accounts (id, customer_id, balance) VALUES (1, 1, 12000.00), (2, 1, 8000.00), (3, 2, 5000.00), (4, 4, 2000.00), (5, 4, 1500.00);", + "sql": "SELECT AVG(a.balance) FROM accounts a JOIN customers c ON a.customer_id \u003d c.id WHERE c.country \u003d \u0027Spain\u0027;", + "sql_explanation": "This query calculates the average balance for customers from Spain. It first joins the accounts table with the customers table on the customer_id column. Then, it filters the results to only include customers from Spain by using the WHERE clause and checks if the country column equals \u0027Spain\u0027. Finally, it calculates the average balance using the AVG function." +}, { + "id": "1566", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display carbon offset programs and the total carbon offsets achieved from the Programs and Offsets tables", + "sql_context": "CREATE TABLE Programs (id INT, name VARCHAR(50), sector VARCHAR(10));CREATE TABLE Offsets (id INT, program_id INT, offsets INT);", + "sql": "SELECT p.name, SUM(o.offsets) as total_offsets FROM Programs p INNER JOIN Offsets o ON p.id \u003d o.program_id WHERE p.sector \u003d \u0027carbon_offset\u0027 GROUP BY p.id;", + "sql_explanation": "We join the Programs table with the Offsets table on the program_id field. We filter the results to only include carbon offset programs and then group the results by program_id, summing the total carbon offsets achieved for each program." +}, { + "id": "1923", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all the energy efficiency programs in the \u0027South\u0027 region, along with their start and end dates, if applicable.", + "sql_context": "CREATE TABLE Region (RegionID INT, RegionName VARCHAR(100)); INSERT INTO Region VALUES (1, \u0027North\u0027), (2, \u0027South\u0027), (3, \u0027East\u0027), (4, \u0027West\u0027); CREATE TABLE Program (ProgramID INT, ProgramName VARCHAR(100), RegionID INT, StartDate DATE, EndDate DATE); INSERT INTO Program VALUES (1, \u0027Energy Efficiency Program 1\u0027, 2), (2, \u0027Energy Efficiency Program 2\u0027, 2), (3, \u0027Energy Efficiency Program 3\u0027, 2), (4, \u0027Energy Efficiency Program 4\u0027, 2);", + "sql": "SELECT ProgramName, RegionName, StartDate, EndDate FROM Program JOIN Region ON Program.RegionID \u003d Region.RegionID WHERE RegionName \u003d \u0027South\u0027;", + "sql_explanation": "The SQL query filters the Program table based on the RegionName column, selecting only the rows where the region is \u0027South\u0027. It returns the ProgramName, RegionName, StartDate, and EndDate columns for the matching rows by joining Program and Region tables." +}, { + "id": "238", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average data usage for mobile and broadband services for customers in the \u0027Suburban\u0027 region.", + "sql_context": "CREATE TABLE suburban_usage(id INT, subscriber_id INT, data_usage INT); INSERT INTO suburban_usage(id, subscriber_id, data_usage) VALUES (1, 1, 1000), (2, 1, 1200), (3, 2, 1500), (4, 3, 800);", + "sql": "SELECT subscribers.region, AVG(suburban_usage.data_usage) FROM subscribers JOIN suburban_usage ON subscribers.id \u003d suburban_usage.subscriber_id WHERE subscribers.region \u003d \u0027Suburban\u0027 AND subscribers.service IN (\u0027Mobile\u0027, \u0027Broadband\u0027) GROUP BY subscribers.region;", + "sql_explanation": "This query calculates the average data usage for mobile and broadband services for customers in the \u0027Suburban\u0027 region. It joins the subscribers and suburban_usage tables on subscriber_id, filters for \u0027Suburban\u0027 region and \u0027Mobile\u0027 or \u0027Broadband\u0027 services, groups the result by region, and calculates the average data usage." +}, { + "id": "250", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 5 most used mobile devices among prepaid customers in the state of California, and how many subscribers use each device?", + "sql_context": "CREATE TABLE mobile_devices (device_id INT, device_name VARCHAR(50), mobile_services INT, state VARCHAR(20)); CREATE TABLE mobile_customers (customer_id INT, device_id INT, plan_type VARCHAR(10));", + "sql": "SELECT device_name, COUNT(*) as num_subscribers FROM mobile_devices JOIN mobile_customers ON mobile_devices.device_id \u003d mobile_customers.device_id WHERE plan_type \u003d \u0027prepaid\u0027 AND state \u003d \u0027California\u0027 GROUP BY device_name ORDER BY num_subscribers DESC LIMIT 5;", + "sql_explanation": "We find the top 5 most used mobile devices among prepaid customers in California by selecting the device_name and device_id columns from the mobile_devices table, joining with the mobile_customers table on device_id, filtering for rows with plan_type \u0027prepaid\u0027 and state \u0027California\u0027, grouping by device_name, counting the number of occurrences for each device, and ordering by this count in descending order, limiting the output to the top 5 devices." +}, { + "id": "290", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue generated from mobile and broadband subscribers in the city of Chicago?", + "sql_context": "CREATE TABLE mobile_subscribers (subscriber_id INT, monthly_fee FLOAT, plan_type VARCHAR(10), city VARCHAR(20)); INSERT INTO mobile_subscribers (subscriber_id, monthly_fee, plan_type, city) VALUES (1, 50, \u0027postpaid\u0027, \u0027Chicago\u0027), (2, 30, \u0027prepaid\u0027, \u0027Chicago\u0027); CREATE TABLE broadband_subscribers (subscriber_id INT, monthly_fee FLOAT, state VARCHAR(20)); INSERT INTO broadband_subscribers (subscriber_id, monthly_fee, state) VALUES (1, 60, \u0027Illinois\u0027), (2, 80, \u0027Illinois\u0027);", + "sql": "SELECT SUM(mobile_subscribers.monthly_fee + broadband_subscribers.monthly_fee) FROM mobile_subscribers INNER JOIN broadband_subscribers ON mobile_subscribers.subscriber_id \u003d broadband_subscribers.subscriber_id WHERE mobile_subscribers.city \u003d \u0027Chicago\u0027;", + "sql_explanation": "This SQL query calculates the total revenue generated from both mobile and broadband subscribers in the city of Chicago by performing an inner join on the mobile_subscribers and broadband_subscribers tables on the subscriber_id column, and then filtering the result to only include rows with city of \u0027Chicago\u0027, and finally calculating the sum of the monthly_fee columns from both tables." +}, { + "id": "808", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average data usage, in GB, for customers in each region, in the last 6 months?", + "sql_context": "CREATE TABLE customers (customer_id INT, name VARCHAR(50), region VARCHAR(50), data_usage FLOAT, usage_date DATE); INSERT INTO customers (customer_id, name, region, data_usage, usage_date) VALUES (1, \u0027John Doe\u0027, \u0027North\u0027, 45.6, \u00272022-01-01\u0027), (2, \u0027Jane Smith\u0027, \u0027South\u0027, 30.9, \u00272022-02-01\u0027), (3, \u0027Mike Johnson\u0027, \u0027East\u0027, 60.7, \u00272022-03-01\u0027); CREATE TABLE regions (region_id INT, region_name VARCHAR(50)); INSERT INTO regions (region_id, region_name) VALUES (1, \u0027North\u0027), (2, \u0027South\u0027), (3, \u0027East\u0027), (4, \u0027West\u0027);", + "sql": "SELECT r.region_name, AVG(c.data_usage) as avg_data_usage FROM customers c JOIN regions r ON c.region \u003d r.region_name WHERE c.usage_date \u003e\u003d DATEADD(month, -6, GETDATE()) GROUP BY r.region_name;", + "sql_explanation": "The SQL query calculates the average data usage for customers in each region in the last 6 months by joining the customers and regions tables, filtering the records by usage_date, grouping by region_name, and using the AVG function on the data_usage column." +}, { + "id": "871", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average data usage (in GB) for prepaid mobile customers in the \u0027South\u0027 region?", + "sql_context": "CREATE TABLE usage (id INT, subscriber_id INT, data_usage DECIMAL(10,2)); INSERT INTO usage (id, subscriber_id, data_usage) VALUES (1, 1, 2.5), (2, 2, 3.0), (3, 3, 1.5); CREATE TABLE subscribers (id INT, type VARCHAR(10), region VARCHAR(10)); INSERT INTO subscribers (id, type, region) VALUES (1, \u0027postpaid\u0027, \u0027North\u0027), (2, \u0027prepaid\u0027, \u0027South\u0027), (3, \u0027postpaid\u0027, \u0027East\u0027);", + "sql": "SELECT AVG(usage.data_usage) AS avg_data_usage FROM usage INNER JOIN subscribers ON usage.subscriber_id \u003d subscribers.id WHERE subscribers.type \u003d \u0027prepaid\u0027 AND subscribers.region \u003d \u0027South\u0027;", + "sql_explanation": "The SQL query joins the usage and subscribers tables on the subscriber_id column. It then filters for prepaid customers in the \u0027South\u0027 region and calculates the average data usage by taking the average of the data_usage column." +}, { + "id": "1064", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average monthly data usage per mobile subscriber for each state, excluding those subscribers with a monthly data usage of 0?", + "sql_context": "CREATE TABLE states (id INT, name VARCHAR(255));CREATE TABLE mobile_subscribers (id INT, state_id INT, monthly_data_usage DECIMAL(10,2));", + "sql": "SELECT s.name, AVG(ms.monthly_data_usage) as avg_data_usage FROM states s INNER JOIN mobile_subscribers ms ON s.id \u003d ms.state_id WHERE ms.monthly_data_usage \u003e 0 GROUP BY s.name;", + "sql_explanation": "This SQL query performs an inner join between the \u0027states\u0027 and \u0027mobile_subscribers\u0027 tables based on the state_id. It then filters the records to only include mobile subscribers with a monthly data usage greater than 0. The query calculates the average monthly data usage per mobile subscriber for each state by grouping the results based on the state name and applying the AVG function. The final result set includes the state name and the average monthly data usage per mobile subscriber." +}, { + "id": "1075", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average monthly data usage for mobile subscribers in each region, and the total number of network devices installed in each region?", + "sql_context": "CREATE TABLE mobile_subscribers (id INT, region VARCHAR(20), data_usage INT, usage_date DATE); CREATE TABLE network_devices (id INT, region VARCHAR(20), install_date DATE);", + "sql": "SELECT m.region, AVG(m.data_usage) AS avg_data_usage, COUNT(n.id) AS num_devices FROM mobile_subscribers m INNER JOIN network_devices n ON m.region \u003d n.region GROUP BY m.region;", + "sql_explanation": "This query calculates the average monthly data usage for mobile subscribers and the total number of network devices installed in each region by joining the mobile_subscribers and network_devices tables on the region column, grouping the results by region, and calculating the average data usage and count of devices for each group." +}, { + "id": "1173", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of network devices installed in each region and the total number of mobile subscribers in those regions?", + "sql_context": "CREATE TABLE network_devices (id INT, region VARCHAR(20), install_date DATE); CREATE TABLE mobile_subscribers (id INT, region VARCHAR(20), data_usage INT, usage_date DATE);", + "sql": "SELECT n.region, COUNT(n.id) AS num_devices, COUNT(m.id) AS num_subscribers FROM network_devices n INNER JOIN mobile_subscribers m ON n.region \u003d m.region GROUP BY n.region;", + "sql_explanation": "This query counts the number of network devices and the number of mobile subscribers in each region by joining the network_devices and mobile_subscribers tables on the region column, grouping the results by region, and calculating the count of devices and subscribers for each group." +}, { + "id": "1188", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum connection speed in Mbps for broadband customers in the West region?", + "sql_context": "CREATE TABLE subscribers (id INT, service VARCHAR(10), region VARCHAR(10)); INSERT INTO subscribers (id, service, region) VALUES (1, \u0027broadband\u0027, \u0027West\u0027), (2, \u0027mobile\u0027, \u0027West\u0027); CREATE TABLE speeds (subscriber_id INT, connection_speed INT); INSERT INTO speeds (subscriber_id, connection_speed) VALUES (1, 550), (2, 450);", + "sql": "SELECT MIN(connection_speed) FROM speeds JOIN subscribers ON speeds.subscriber_id \u003d subscribers.id WHERE subscribers.service \u003d \u0027broadband\u0027 AND subscribers.region \u003d \u0027West\u0027;", + "sql_explanation": "The SQL query calculates the minimum connection speed in Mbps for broadband customers in the West region. It first joins the subscribers and speeds tables based on the subscriber_id field. Then, it filters the data where the service is \u0027broadband\u0027 and the region is \u0027West\u0027. Finally, it calculates the minimum connection_speed for the filtered data." +}, { + "id": "1574", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total investment made in network infrastructure in the African region?", + "sql_context": "CREATE TABLE network_investments (investment_id INT, amount FLOAT, country VARCHAR(20)); CREATE TABLE regions (region_id INT, region_name VARCHAR(20)); INSERT INTO network_investments (investment_id, amount, country) VALUES (1, 200000.0, \u0027Nigeria\u0027), (2, 300000.0, \u0027Egypt\u0027), (3, 150000.0, \u0027South Africa\u0027); INSERT INTO regions (region_id, region_name) VALUES (1, \u0027Africa\u0027), (2, \u0027Europe\u0027), (3, \u0027Asia\u0027);", + "sql": "SELECT SUM(amount) FROM network_investments INNER JOIN regions ON network_investments.country \u003d regions.region_name WHERE regions.region_name \u003d \u0027Africa\u0027;", + "sql_explanation": "We perform an inner join between the network_investments and regions tables on country and region_name, respectively. Then, we filter the records where the region_name is equal to \u0027Africa\u0027. Lastly, we calculate the sum of the amount column to determine the total investment made in the African region." +}, { + "id": "1964", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which customers have not made any data usage in the state of Florida?", + "sql_context": "CREATE TABLE customers (customer_id INT, name VARCHAR(50), state VARCHAR(50)); INSERT INTO customers (customer_id, name, state) VALUES (1, \u0027John Smith\u0027, \u0027Florida\u0027); INSERT INTO customers (customer_id, name, state) VALUES (2, \u0027Jane Doe\u0027, \u0027Florida\u0027); CREATE TABLE data_usage (usage_id INT, customer_id INT, data_amount FLOAT, usage_date DATE); INSERT INTO data_usage (usage_id, customer_id, data_amount, usage_date) VALUES (1, 1, 30, \u00272022-01-01\u0027);", + "sql": "SELECT c.name FROM customers c LEFT JOIN data_usage du ON c.customer_id \u003d du.customer_id WHERE c.state \u003d \u0027Florida\u0027 AND du.usage_id IS NULL;", + "sql_explanation": "This SQL query returns the names of customers who have not made any data usage in the state of Florida. It starts by selecting the name column from the customers table and performs a left join with the data_usage table on the customer_id column. The query then filters the results by the state column being \u0027Florida\u0027 and the usage_id column being null." +}, { + "id": "2157", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List broadband subscribers who joined after mobile subscribers with the same region_id.", + "sql_context": "CREATE TABLE mobile_subscribers (subscriber_id INT, region_id INT, join_date DATE); INSERT INTO mobile_subscribers (subscriber_id, region_id, join_date) VALUES (1, 1, \u00272021-01-01\u0027), (2, 2, \u00272021-03-01\u0027), (3, 3, \u00272021-02-01\u0027), (4, 4, \u00272021-04-01\u0027), (5, 1, \u00272021-01-15\u0027), (6, 2, \u00272021-03-15\u0027), (7, 3, \u00272021-02-15\u0027), (8, 4, \u00272021-04-15\u0027); CREATE TABLE broadband_subscribers (subscriber_id INT, region_id INT, join_date DATE); INSERT INTO broadband_subscribers (subscriber_id, region_id, join_date) VALUES (9, 1, \u00272021-01-20\u0027), (10, 2, \u00272021-03-20\u0027), (11, 3, \u00272021-02-20\u0027), (12, 4, \u00272021-04-20\u0027), (13, 1, \u00272021-01-25\u0027), (14, 2, \u00272021-03-25\u0027), (15, 3, \u00272021-02-25\u0027), (16, 4, \u00272021-04-25\u0027);", + "sql": "SELECT b.* FROM broadband_subscribers b INNER JOIN mobile_subscribers m ON b.region_id \u003d m.region_id WHERE b.join_date \u003e m.join_date;", + "sql_explanation": "Join mobile and broadband subscribers on the region_id and return broadband subscribers who joined after mobile subscribers with the same region_id." +}, { + "id": "2738", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum data usage for mobile subscribers in each state?", + "sql_context": "CREATE TABLE mobile_subscribers (subscriber_id INT, data_usage FLOAT, state VARCHAR(255)); INSERT INTO mobile_subscribers (subscriber_id, data_usage, state) VALUES (1, 50.5, \u0027Texas\u0027), (2, 60.3, \u0027Texas\u0027), (3, 40.2, \u0027Texas\u0027), (4, 25, \u0027Florida\u0027), (5, 32, \u0027Florida\u0027); CREATE TABLE states (state_id INT, state VARCHAR(255)); INSERT INTO states (state_id, state) VALUES (1, \u0027Texas\u0027), (2, \u0027Florida\u0027), (3, \u0027New Jersey\u0027);", + "sql": "SELECT MIN(ms.data_usage), s.state FROM mobile_subscribers ms JOIN states s ON ms.state \u003d s.state GROUP BY s.state;", + "sql_explanation": "This query calculates the minimum data usage for mobile subscribers in each state by joining the mobile_subscribers table with the states table on the state column and grouping the results by the state column. It calculates the minimum data usage for each state by using the MIN function on the data_usage column." +}, { + "id": "3895", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "data retrieval", + "sql_task_type_description": "basic data fetching queries", + "sql_prompt": "List all mobile subscribers who joined in the first quarter of 2021 and their respective joining dates.", + "sql_context": "CREATE TABLE mobile_subscribers (subscriber_id INT, join_date DATE); INSERT INTO mobile_subscribers (subscriber_id, join_date) VALUES (1, \u00272021-01-01\u0027), (2, \u00272021-03-01\u0027), (3, \u00272020-12-01\u0027);", + "sql": "SELECT * FROM mobile_subscribers WHERE join_date BETWEEN \u00272021-01-01\u0027 AND \u00272021-03-31\u0027;", + "sql_explanation": "This query uses the BETWEEN keyword to select rows where the join_date is between \u00272021-01-01\u0027 and \u00272021-03-31\u0027 (inclusive) and then selects all columns using the * wildcard." +}, { + "id": "269", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the sale amount and contractor name for each equipment type with a high geopolitical risk level?", + "sql_context": "CREATE TABLE GeopoliticalRiskFactors (id INT PRIMARY KEY, equipment VARCHAR(50), risk_level VARCHAR(50)); INSERT INTO GeopoliticalRiskFactors (id, equipment, risk_level) VALUES (4, \u0027Patriot\u0027, \u0027High\u0027); INSERT INTO EquipmentSales (id, contractor, equipment, sale_date, sale_amount) VALUES (6, \u0027Lockheed Martin\u0027, \u0027Javelin\u0027, \u00272021-02-01\u0027, 70000000);", + "sql": "SELECT EquipmentSales.equipment, EquipmentSales.sale_amount, EquipmentSales.contractor FROM EquipmentSales INNER JOIN GeopoliticalRiskFactors ON EquipmentSales.equipment \u003d GeopoliticalRiskFactors.equipment WHERE GeopoliticalRiskFactors.risk_level \u003d \u0027High\u0027;", + "sql_explanation": "This query retrieves the sale amount and contractor name for each equipment type with a high geopolitical risk level by joining the EquipmentSales and GeopoliticalRiskFactors tables on the equipment column. It then filters the results to only include records with a high risk level." +}, { + "id": "623", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all defense projects and their associated military equipment sales, if any, ordered by the defense project name in ascending order.", + "sql_context": "CREATE TABLE defense_projects (id INT PRIMARY KEY, project_name VARCHAR(255), status VARCHAR(255), planned_start_date DATE); CREATE TABLE military_sales (id INT PRIMARY KEY, project_name VARCHAR(255), seller VARCHAR(255), buyer VARCHAR(255), equipment_type VARCHAR(255), quantity INT);", + "sql": "SELECT defense_projects.project_name, military_sales.* FROM defense_projects LEFT JOIN military_sales ON defense_projects.project_name \u003d military_sales.project_name ORDER BY defense_projects.project_name ASC;", + "sql_explanation": "The query selects all defense projects and their associated military equipment sales, if any, and orders them by the defense project name in ascending order." +}, { + "id": "799", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find all defense projects that Contractor R and Contractor S worked on together.", + "sql_context": "CREATE TABLE ProjectParticipation (ParticipationID INT, Contractor VARCHAR(255), ProjectID INT); INSERT INTO ProjectParticipation (ParticipationID, Contractor, ProjectID) VALUES (1, \u0027Contractor R\u0027, 1), (2, \u0027Contractor S\u0027, 1);", + "sql": "SELECT pp1.ProjectID FROM ProjectParticipation pp1 INNER JOIN ProjectParticipation pp2 ON pp1.ProjectID \u003d pp2.ProjectID WHERE pp1.Contractor \u003d \u0027Contractor R\u0027 AND pp2.Contractor \u003d \u0027Contractor S\u0027;", + "sql_explanation": "This query performs an inner join between the ProjectParticipation table and itself based on ProjectID and returns all the projects that Contractor R and Contractor S worked on together." +}, { + "id": "2305", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average cost of a single military equipment item for \u0027Blue Corp.\u0027?", + "sql_context": "CREATE TABLE Equipment(id INT, name VARCHAR(255), manufacturer VARCHAR(255), model VARCHAR(255), cost DECIMAL(10,2)); CREATE VIEW Sales_Summary AS SELECT equipment, SUM(quantity) as total_sold FROM Sales GROUP BY equipment;", + "sql": "SELECT AVG(cost) FROM Equipment JOIN Sales_Summary ON Equipment.name \u003d Sales_Summary.equipment WHERE manufacturer \u003d \u0027Blue Corp.\u0027;", + "sql_explanation": "This query calculates the average cost of a single military equipment item for \u0027Blue Corp.\u0027. It first creates a view \u0027Sales_Summary\u0027 summarizing sales data by equipment name. Then, it joins the \u0027Equipment\u0027 table with \u0027Sales_Summary\u0027 on the equipment name and filters rows with \u0027Blue Corp.\u0027 as the manufacturer. Finally, it calculates the average cost using the \u0027cost\u0027 column." +}, { + "id": "168", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all the policy advocacy initiatives in the last 3 years, along with the number of students impacted, and show the top 10 initiatives with the highest number of students impacted?", + "sql_context": "CREATE TABLE initiatives (initiative_id INT, initiative_name VARCHAR(255), initiative_date DATE, organization_id INT); CREATE TABLE impact (initiative_id INT, student_id INT);", + "sql": "SELECT i.initiative_name, COUNT(DISTINCT imp.student_id) as students_impacted FROM initiatives i JOIN impact imp ON i.initiative_id \u003d imp.initiative_id WHERE i.initiative_date \u003e\u003d DATEADD(year, -3, CURRENT_DATE) GROUP BY i.initiative_name ORDER BY students_impacted DESC LIMIT 10;", + "sql_explanation": "The SQL query first joins the \u0027initiatives\u0027 table with the \u0027impact\u0027 table on the \u0027initiative_id\u0027 column. Then, it filters the results to only include initiatives that occurred in the last 3 years. After that, it groups the results by the \u0027initiative_name\u0027 column and calculates the number of distinct students impacted for each initiative. Finally, it orders the results by the \u0027students_impacted\u0027 column in descending order and limits the results to the top 10." +}, { + "id": "630", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the number of students who require accommodations by accommodation type and their enrollment status.", + "sql_context": "CREATE TABLE student_enrollment (student_id INT, enrollment_status VARCHAR(50), requires_accommodation BOOLEAN); INSERT INTO student_enrollment (student_id, enrollment_status, requires_accommodation) VALUES (1, \u0027Enrolled\u0027, TRUE), (2, \u0027Dropped\u0027, FALSE);", + "sql": "SELECT accommodation_type, enrollment_status, COUNT(*) FROM student_enrollment JOIN accommodations ON student_enrollment.student_id \u003d accommodations.student_id GROUP BY accommodation_type, enrollment_status;", + "sql_explanation": "This SQL query retrieves the number of students that require accommodations by accommodation type and their enrollment status by joining the \u0027student_enrollment\u0027 table with the \u0027accommodations\u0027 table on \u0027student_id\u0027 and grouping the results by \u0027accommodation_type\u0027 and \u0027enrollment_status\u0027." +}, { + "id": "805", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the disability services advocates that have participated in more than 3 policy advocacy events in the last year?", + "sql_context": "CREATE TABLE advocates (advocate_id INT, advocate_name VARCHAR(255), advocate_role VARCHAR(255));", + "sql": "SELECT advocate_name FROM advocates A JOIN policy_events PE ON A.advocate_name \u003d PE.advocate_name WHERE PE.event_date \u003e\u003d DATEADD(year, -1, GETDATE()) GROUP BY advocate_name HAVING COUNT(*) \u003e 3;", + "sql_explanation": "This query joins the advocates and policy_events tables on the advocate_name column, filters for policy events within the past year, groups by advocate name, and then selects the names of advocates who have participated in more than 3 events." +}, { + "id": "902", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many students with visual impairments have received accommodations in the last year?", + "sql_context": "CREATE TABLE Accommodations (id INT, student VARCHAR(255), date DATE); CREATE TABLE Students (id INT, name VARCHAR(255), age INT, disability VARCHAR(255));", + "sql": "SELECT COUNT(*) FROM Accommodations INNER JOIN Students ON Accommodations.student \u003d Students.id WHERE disability \u003d \u0027visual impairment\u0027 AND date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR);", + "sql_explanation": "The SQL query calculates the number of students with visual impairments who have received accommodations in the last year. It joins the Accommodations and Students tables on the student column, filters the records based on the disability and date columns, and then counts the number of records using the COUNT function." +}, { + "id": "1430", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average disability accommodation budget by state?", + "sql_context": "CREATE TABLE disability_accommodations_state (accom_id INT, accom_name TEXT, budget DECIMAL(10,2), state_id INT);CREATE TABLE states (state_id INT, state_name TEXT);", + "sql": "SELECT s.state_name, AVG(da.budget) AS avg_budget FROM disability_accommodations_state da INNER JOIN states s ON da.state_id \u003d s.state_id GROUP BY s.state_name;", + "sql_explanation": "This query joins the disability_accommodations_state and states tables on the state_id field. It then groups the results by the state_name field and calculates the average budget for each group, providing the average disability accommodation budget by state." +}, { + "id": "1520", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the policy advocacy history for programs with a focus on emotional support animals in the Northeast and the South.", + "sql_context": "CREATE TABLE Programs (program_id INT, state VARCHAR(50), focus_area VARCHAR(50), policy_advocacy_history TEXT); CREATE TABLE Regions (region VARCHAR(50), state VARCHAR(50));", + "sql": "SELECT * FROM Programs P INNER JOIN Regions R ON P.state \u003d R.state WHERE R.region IN (\u0027Northeast\u0027, \u0027South\u0027) AND P.focus_area \u003d \u0027emotional support animals\u0027;", + "sql_explanation": "This query first creates two tables named \u0027Programs\u0027 and \u0027Regions\u0027 with 4 and 2 columns, respectively. Then it inserts a sample record for a program with a focus on emotional support animals in the \u0027Programs\u0027 table. The query then retrieves the policy advocacy history for programs with a focus on emotional support animals in the Northeast and the South by performing an inner join on the \u0027Programs\u0027 and \u0027Regions\u0027 tables and filtering the data based on the given conditions." +}, { + "id": "1984", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which accommodation types were provided to students in the \u0027Arts\u0027 department?", + "sql_context": "CREATE TABLE Students (student_id INT, department VARCHAR(255)); CREATE TABLE Accommodations (accommodation_id INT, student_id INT, accommodation_type VARCHAR(255));", + "sql": "SELECT a.accommodation_type, s.department FROM Students s JOIN Accommodations a ON s.student_id \u003d a.student_id WHERE s.department \u003d \u0027Arts\u0027;", + "sql_explanation": "This query performs a join between Students and Accommodations tables to get the accommodation types for each student. It then filters the results to only show students in the \u0027Arts\u0027 department." +}, { + "id": "2197", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which employees have not received disability accommodation training?", + "sql_context": "CREATE TABLE employees (id INT, name VARCHAR(50), department VARCHAR(50)); INSERT INTO employees (id, name, department) VALUES (1, \u0027John Doe\u0027, \u0027HR\u0027), (2, \u0027Jane Smith\u0027, \u0027IT\u0027); CREATE TABLE training (id INT, employee_id INT, training_name VARCHAR(50)); INSERT INTO training (id, employee_id, training_name) VALUES (1, 1, \u0027Disability Awareness\u0027), (2, 2, \u0027First Aid\u0027), (3, 2, \u0027Disability Accommodation\u0027);", + "sql": "SELECT employees.name FROM employees LEFT JOIN training ON employees.id \u003d training.employee_id WHERE training.training_name IS NULL;", + "sql_explanation": "The SQL query uses a left join to join the employees and training tables on the employee_id column. It then filters the results to only include records where the training_name is NULL, which indicates that there is no matching record in the training table for that employee. Finally, it selects the name column from the employees table to display in the result." +}, { + "id": "32", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average response time for public transportation services in New York and Pennsylvania, and what is the total budget allocated to these services in each state?", + "sql_context": "CREATE TABLE response_times (state VARCHAR(20), service VARCHAR(20), response_time INT); INSERT INTO response_times (state, service, response_time) VALUES (\u0027New York\u0027, \u0027Public Transportation\u0027, 60), (\u0027New York\u0027, \u0027Sanitation\u0027, 45), (\u0027Pennsylvania\u0027, \u0027Public Transportation\u0027, 70), (\u0027Pennsylvania\u0027, \u0027Education\u0027, 40); CREATE TABLE budget_allocation (state VARCHAR(20), service VARCHAR(20), budget FLOAT); INSERT INTO budget_allocation (state, service, budget) VALUES (\u0027New York\u0027, \u0027Public Transportation\u0027, 15000000), (\u0027New York\u0027, \u0027Sanitation\u0027, 20000000), (\u0027Pennsylvania\u0027, \u0027Public Transportation\u0027, 12000000), (\u0027Pennsylvania\u0027, \u0027Education\u0027, 20000000);", + "sql": "SELECT AVG(response_times.response_time) as avg_response_time, budget_allocation.state, SUM(budget_allocation.budget) as total_budget FROM response_times INNER JOIN budget_allocation ON response_times.service \u003d budget_allocation.service WHERE response_times.service \u003d \u0027Public Transportation\u0027 AND (response_times.state \u003d \u0027New York\u0027 OR response_times.state \u003d \u0027Pennsylvania\u0027) GROUP BY response_times.state;", + "sql_explanation": "This query calculates the average response time for public transportation services and the total budget allocated to these services in New York and Pennsylvania by selecting the average value of the \u0027response_time\u0027 column and the sum of the \u0027budget\u0027 column for rows where \u0027service\u0027 is \u0027Public Transportation\u0027 and \u0027state\u0027 is either \u0027New York\u0027 or \u0027Pennsylvania\u0027. The query performs an inner join between the \u0027response_times\u0027 and \u0027budget_allocation\u0027 tables on the \u0027service\u0027 column." +}, { + "id": "299", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of citizen feedback records received for public safety in Q3 2022?", + "sql_context": "CREATE TABLE FeedbackQ3 (Service TEXT, Quarter INT, Year INT, FeedbackCount INT); INSERT INTO FeedbackQ3 (Service, Quarter, Year, FeedbackCount) VALUES (\u0027Public Safety\u0027, 3, 2022, 800), (\u0027Transportation\u0027, 3, 2022, 1000), (\u0027Healthcare\u0027, 3, 2022, 1200); CREATE TABLE TotalFeedbackQ3 (Service TEXT, Quarter INT, Year INT, TotalCount INT); INSERT INTO TotalFeedbackQ3 (Service, Quarter, Year, TotalCount) VALUES (\u0027Public Safety\u0027, 3, 2022, 10000), (\u0027Transportation\u0027, 3, 2022, 15000), (\u0027Healthcare\u0027, 3, 2022, 20000);", + "sql": "SELECT Service, (SUM(FeedbackCount) * 100.0 / SUM(TotalCount)) AS Percentage FROM FeedbackQ3 INNER JOIN TotalFeedbackQ3 ON FeedbackQ3.Service \u003d TotalFeedbackQ3.Service WHERE Quarter \u003d 3 AND Year \u003d 2022 AND Service \u003d \u0027Public Safety\u0027 GROUP BY Service;", + "sql_explanation": "This query calculates the percentage of citizen feedback records received for public safety in Q3 2022 by joining the FeedbackQ3 and TotalFeedbackQ3 tables on Service, filtering for the Quarter 3 and Year 2022 and the Service Public Safety, then summing the FeedbackCount and TotalCount values and calculating the percentage for each Service." +}, { + "id": "351", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which services in city 1 have the highest average rating in the last month?", + "sql_context": "CREATE TABLE Citizen_Feedback (id INT, city_id INT, service VARCHAR(50), rating INT, date_created DATETIME); INSERT INTO Citizen_Feedback (id, city_id, service, rating, date_created) VALUES (9, 1, \u0027Road Maintenance\u0027, 4, \u00272022-01-15 12:00:00\u0027), (10, 1, \u0027Waste Management\u0027, 3, \u00272022-02-20 14:30:00\u0027), (11, 1, \u0027Road Maintenance\u0027, 5, \u00272022-06-05 09:00:00\u0027), (12, 1, \u0027Waste Management\u0027, 5, \u00272022-05-10 16:00:00\u0027);", + "sql": "SELECT CF.service, AVG(CF.rating) as avg_rating FROM Citizen_Feedback CF INNER JOIN City C ON CF.city_id \u003d C.id WHERE C.name \u003d \u0027City1\u0027 AND CF.date_created \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 1 MONTH) GROUP BY CF.service ORDER BY avg_rating DESC;", + "sql_explanation": "This query calculates the average rating for each service in city 1 in the last month by joining the Citizen_Feedback and City tables on the city_id foreign key, filtering the results to only include city1 and records created in the last month, grouping the results by service, and ordering the results by average rating in descending order." +}, { + "id": "2785", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of public parks in cities with a population over 1 million.", + "sql_context": "CREATE TABLE City (Name VARCHAR(20), Population INT); CREATE TABLE Park (City VARCHAR(20), Type VARCHAR(10)); INSERT INTO City (Name, Population) VALUES (\u0027CityA\u0027, 1500000), (\u0027CityB\u0027, 800000), (\u0027CityC\u0027, 1200000); INSERT INTO Park (City, Type) VALUES (\u0027CityA\u0027, \u0027Public\u0027), (\u0027CityA\u0027, \u0027Private\u0027), (\u0027CityB\u0027, \u0027Public\u0027), (\u0027CityC\u0027, \u0027Public\u0027);", + "sql": "SELECT COUNT(*) FROM City INNER JOIN Park ON City.Name \u003d Park.City WHERE Population \u003e 1000000 AND Type \u003d \u0027Public\u0027;", + "sql_explanation": "The query performs an inner join on the City and Park tables, filters for cities with a population over 1 million and public parks, and then counts the number of resulting rows." +}, { + "id": "1378", + "domain": "media entertainment", + "domain_description": "Audience demographics, viewership trends, content ratings, production budgets, and marketing strategies for movies, TV shows, and music.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many TV shows were produced by each network in the genre of comedy?", + "sql_context": "CREATE TABLE networks (id INT, name TEXT); CREATE TABLE shows (id INT, title TEXT, genre TEXT, network_id INT); INSERT INTO networks (id, name) VALUES (1, \u0027Network A\u0027), (2, \u0027Network B\u0027); INSERT INTO shows (id, title, genre, network_id) VALUES (1, \u0027Show 1\u0027, \u0027Comedy\u0027, 1), (2, \u0027Show 2\u0027, \u0027Drama\u0027, 1), (3, \u0027Show 3\u0027, \u0027Comedy\u0027, 2), (4, \u0027Show 4\u0027, \u0027Action\u0027, 2);", + "sql": "SELECT networks.name, COUNT(shows.id) as num_shows FROM networks JOIN shows ON networks.id \u003d shows.network_id WHERE shows.genre \u003d \u0027Comedy\u0027 GROUP BY networks.name;", + "sql_explanation": "This SQL query counts the number of TV shows in the genre of comedy produced by each network. It does this by joining the \u0027networks\u0027 table with the \u0027shows\u0027 table on the \u0027network_id\u0027 column. Then, it filters the results to only include shows in the comedy genre using the WHERE clause. Finally, it groups the results by network name and counts the number of shows for each network using the COUNT() function and GROUP BY clause." +}, { + "id": "542", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of Gadolinium imported by Japan from mines with a production capacity under 500 tons?", + "sql_context": "CREATE TABLE GadoliniumShipments (id INT PRIMARY KEY, mine_id INT, import_year INT, quantity INT, FOREIGN KEY (mine_id) REFERENCES GadoliniumMines(id)); CREATE TABLE GadoliniumMines (id INT PRIMARY KEY, name VARCHAR(100), production_capacity INT);", + "sql": "SELECT SUM(quantity) FROM GadoliniumShipments INNER JOIN GadoliniumMines ON GadoliniumShipments.mine_id \u003d GadoliniumMines.id WHERE GadoliniumShipments.country \u003d \u0027Japan\u0027 AND GadoliniumMines.production_capacity \u003c 500;", + "sql_explanation": "This SQL query calculates the total quantity of Gadolinium imported by Japan from mines with a production capacity under 500 tons. It does this by joining the GadoliniumShipments table with the GadoliniumMines table on the mine_id foreign key. Then, it filters the joined table for records with country equal to \u0027Japan\u0027 and production_capacity less than 500. Finally, it uses the SUM function to compute the sum of the quantity for the filtered records." +}, { + "id": "1407", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which countries had zero production of Dysprosium in 2019, from the \u0027production\u0027 and \u0027countries\u0027 tables?", + "sql_context": "CREATE TABLE production ( id INT PRIMARY KEY, country_id INT, element VARCHAR(10), year INT, quantity INT); CREATE TABLE countries ( id INT PRIMARY KEY, name VARCHAR(255)); INSERT INTO production (id, country_id, element, year, quantity) VALUES (1, 1, \u0027Dysprosium\u0027, 2018, 50), (2, 2, \u0027Dysprosium\u0027, 2019, 0), (3, 3, \u0027Dysprosium\u0027, 2020, 30), (4, 1, \u0027Dysprosium\u0027, 2021, 100); INSERT INTO countries (id, name) VALUES (1, \u0027China\u0027), (2, \u0027USA\u0027), (3, \u0027Australia\u0027);", + "sql": "SELECT countries.name FROM production INNER JOIN countries ON production.country_id \u003d countries.id WHERE element \u003d \u0027Dysprosium\u0027 AND year \u003d 2019 AND quantity \u003d 0;", + "sql_explanation": "The SQL query lists the countries that had zero production of Dysprosium in 2019 by joining the \u0027production\u0027 and \u0027countries\u0027 tables, filtering rows where \u0027element\u0027 is \u0027Dysprosium\u0027, \u0027year\u0027 is 2019, and \u0027quantity\u0027 is zero." +}, { + "id": "1457", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all Ytterbium suppliers and their respective number of shipments in 2020.", + "sql_context": "CREATE TABLE ytterbium_suppliers (supplier VARCHAR(50), shipments INT); CREATE TABLE ytterbium_shipments (supplier VARCHAR(50), year INT);", + "sql": "SELECT s.supplier, COUNT(*) FROM ytterbium_shipments sh INNER JOIN ytterbium_suppliers s ON sh.supplier \u003d s.supplier WHERE sh.year \u003d 2020 GROUP BY s.supplier;", + "sql_explanation": "The SQL query lists all Ytterbium suppliers and their respective number of shipments in 2020. It first joins the ytterbium_shipments and ytterbium_suppliers tables on the supplier column, then filters the joined table to only include rows from 2020, and finally calculates the count of rows for each supplier." +}, { + "id": "1972", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 5 mining locations with the highest average Gadolinium production in 2020, using a cross join.", + "sql_context": "CREATE TABLE mines (id INT, location VARCHAR(50), Gadolinium_prod FLOAT, datetime DATETIME); INSERT INTO mines (id, location, Gadolinium_prod, datetime) VALUES (1, \u0027Mount Weld\u0027, 120.0, \u00272020-01-01 10:00:00\u0027), (2, \u0027Bayan Obo\u0027, 350.0, \u00272020-02-15 14:30:00\u0027);", + "sql": "SELECT a.location, AVG(a.Gadolinium_prod) FROM mines a CROSS JOIN mines b GROUP BY a.location ORDER BY AVG(a.Gadolinium_prod) DESC LIMIT 5;", + "sql_explanation": "The SQL query performs a cross join on the \u0027mines\u0027 table and calculates the average Gadolinium production for each mining location. The result is then ordered in descending order, and the top 5 locations are returned." +}, { + "id": "2021", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all Dyprosium market trends and their corresponding environmental impact stats.", + "sql_context": "CREATE TABLE dyprosium_market_trends (year INT, trend VARCHAR(50), value INT); CREATE TABLE dyprosium_environmental_impact (market_trend VARCHAR(50), impact VARCHAR(50));", + "sql": "SELECT m.trend, m.value, e.impact FROM dyprosium_market_trends m INNER JOIN dyprosium_environmental_impact e ON m.trend \u003d e.market_trend;", + "sql_explanation": "The SQL query joins the dyprosium_market_trends and dyprosium_environmental_impact tables on the market_trend/trend columns, creating a new table that includes all Dyprosium market trends and their corresponding environmental impact stats." +}, { + "id": "2147", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many rare earth elements were produced in each year since 2015?", + "sql_context": "CREATE TABLE production (year INT, element TEXT, volume INT); INSERT INTO production (year, element, volume) VALUES (2015, \u0027neodymium\u0027, 10000), (2016, \u0027neodymium\u0027, 12000), (2017, \u0027neodymium\u0027, 14000), (2018, \u0027neodymium\u0027, 15000), (2015, \u0027dysprosium\u0027, 5000), (2016, \u0027dysprosium\u0027, 6000), (2017, \u0027dysprosium\u0027, 7000), (2018, \u0027dysprosium\u0027, 8000), (2019, \u0027dysprosium\u0027, 9000); CREATE TABLE elements (element TEXT); INSERT INTO elements (element) VALUES (\u0027neodymium\u0027), (\u0027dysprosium\u0027);", + "sql": "SELECT p.year, COUNT(DISTINCT e.element) AS element_count FROM production p JOIN elements e ON e.element \u003d p.element GROUP BY p.year;", + "sql_explanation": "This query counts the number of unique rare earth elements produced in each year since 2015. It does this by joining the production table with the elements table, grouping the results by year, and calculating the count of unique elements for each year." +}, { + "id": "202", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show athlete performance and wellbeing data, joining the \u0027athlete_stats\u0027 and \u0027athlete_wellbeing\u0027 tables", + "sql_context": "CREATE TABLE athlete_stats (athlete_id INT, name VARCHAR(100), team VARCHAR(50), position VARCHAR(50), games_played INT, goals_scored INT); CREATE TABLE athlete_wellbeing (athlete_id INT, name VARCHAR(100), mental_health_score INT, physical_health_score INT); INSERT INTO athlete_stats (athlete_id, name, team, position, games_played, goals_scored) VALUES (1, \u0027John Doe\u0027, \u0027Team A\u0027, \u0027Forward\u0027, 20, 15), (2, \u0027Jane Smith\u0027, \u0027Team B\u0027, \u0027Defender\u0027, 25, 5); INSERT INTO athlete_wellbeing (athlete_id, name, mental_health_score, physical_health_score) VALUES (1, \u0027John Doe\u0027, 80, 85), (2, \u0027Jane Smith\u0027, 85, 90);", + "sql": "SELECT athlete_stats.name, athlete_stats.games_played, athlete_stats.goals_scored, athlete_wellbeing.mental_health_score, athlete_wellbeing.physical_health_score FROM athlete_stats INNER JOIN athlete_wellbeing ON athlete_stats.athlete_id \u003d athlete_wellbeing.athlete_id;", + "sql_explanation": "This query uses an INNER JOIN to combine the \u0027athlete_stats\u0027 and \u0027athlete_wellbeing\u0027 tables, displaying athlete performance and wellbeing data for athletes." +}, { + "id": "244", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the highest selling merchandise item by total revenue in the last year?", + "sql_context": "CREATE TABLE merchandise (merchandise_id INT, merchandise_name VARCHAR(50), unit_price DECIMAL(5,2));CREATE TABLE sales (sale_id INT, merchandise_id INT, sale_date DATE, quantity INT); INSERT INTO merchandise (merchandise_id, merchandise_name, unit_price) VALUES (1, \u0027T-Shirt\u0027, 20.00), (2, \u0027Hat\u0027, 15.00); INSERT INTO sales (sale_id, merchandise_id, sale_date, quantity) VALUES (1, 1, \u00272022-01-01\u0027, 5), (2, 2, \u00272022-01-02\u0027, 3);", + "sql": "SELECT m.merchandise_name, SUM(m.unit_price * s.quantity) as total_revenue FROM merchandise m JOIN sales s ON m.merchandise_id \u003d s.merchandise_id WHERE s.sale_date \u003e\u003d DATEADD(year, -1, GETDATE()) GROUP BY m.merchandise_name ORDER BY total_revenue DESC LIMIT 1;", + "sql_explanation": "The SQL query finds the highest selling merchandise item by total revenue in the last year by joining the merchandise and sales tables on the merchandise_id column and filtering for sales in the last year using the WHERE clause and the DATEADD function. The total revenue is then calculated by multiplying the unit price by the quantity and summing the result. The query then orders by total revenue in descending order and returns the top result using the LIMIT clause." +}, { + "id": "318", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many tickets were sold for basketball games in Los Angeles and Chicago in the last quarter?", + "sql_context": "CREATE TABLE tickets (ticket_id INT, game_id INT, price INT, sale_date DATE); INSERT INTO tickets (ticket_id, game_id, price, sale_date) VALUES (1, 1, 50, \u00272021-09-01\u0027), (2, 2, 60, \u00272021-10-01\u0027); CREATE TABLE games (game_id INT, sport VARCHAR(20), city VARCHAR(20), game_date DATE); INSERT INTO games (game_id, sport, city, game_date) VALUES (1, \u0027Basketball\u0027, \u0027Los Angeles\u0027, \u00272021-09-01\u0027), (2, \u0027Basketball\u0027, \u0027Chicago\u0027, \u00272021-10-01\u0027);", + "sql": "SELECT COUNT(tickets.ticket_id) FROM tickets INNER JOIN games ON tickets.game_id \u003d games.game_id WHERE games.sport \u003d \u0027Basketball\u0027 AND (games.city \u003d \u0027Los Angeles\u0027 OR games.city \u003d \u0027Chicago\u0027) AND tickets.sale_date \u003e\u003d DATEADD(quarter, -1, GETDATE());", + "sql_explanation": "The SQL query first joins the \u0027tickets\u0027 and \u0027games\u0027 tables based on the game_id. It then filters the records to only include basketball games in Los Angeles and Chicago and tickets sold in the last quarter. Lastly, it calculates the number of tickets sold." +}, { + "id": "414", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 5 athletes with the highest number of injuries in the last year and their respective team.", + "sql_context": "CREATE TABLE athlete (id INT, name VARCHAR(50), team VARCHAR(50));CREATE TABLE injury (id INT, athlete_id INT, injury_date DATE);", + "sql": "SELECT a.name, a.team, COUNT(i.id) AS injury_count FROM athlete a JOIN injury i ON a.id \u003d i.athlete_id WHERE i.injury_date BETWEEN DATE_SUB(CURDATE(), INTERVAL 1 YEAR) AND CURDATE() GROUP BY a.id ORDER BY injury_count DESC LIMIT 5;", + "sql_explanation": "The SQL query joins the athlete and injury tables based on the athlete_id. It then filters the records where the injury_date is within the last year. Afterward, it calculates the number of injuries per athlete and orders them in descending order. Lastly, it selects the top 5 athletes with the highest number of injuries." +}, { + "id": "451", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of athletes enrolled in wellbeing programs, by sport, for the past year.", + "sql_context": "CREATE TABLE athlete (athlete_id INT, name VARCHAR(50), sport VARCHAR(50)); CREATE TABLE wellbeing_programs (program_id INT, athlete_id INT, enrollment_date DATE); INSERT INTO athlete VALUES (1, \u0027Jane Smith\u0027, \u0027Basketball\u0027); INSERT INTO wellbeing_programs VALUES (1, 1, \u00272022-06-15\u0027);", + "sql": "SELECT a.sport, COUNT(DISTINCT a.athlete_id) AS athletes_enrolled FROM athlete a JOIN wellbeing_programs wp ON a.athlete_id \u003d wp.athlete_id WHERE wp.enrollment_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) GROUP BY a.sport;", + "sql_explanation": "This query retrieves the number of athletes enrolled in wellbeing programs, by sport, for the past year by joining the athlete and wellbeing_programs tables on athlete_id. It filters for enrollment dates within the past year and groups the result by sport, counting distinct athlete IDs." +}, { + "id": "823", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue for home games of each team, considering only games with attendance greater than 30000?", + "sql_context": "CREATE TABLE teams (team_id INT, team_name VARCHAR(50));CREATE TABLE games (game_id INT, team_id INT, home_team BOOLEAN, price DECIMAL(5,2), attendance INT);INSERT INTO teams (team_id, team_name) VALUES (1, \u0027Red Sox\u0027), (2, \u0027Yankees\u0027);INSERT INTO games (game_id, team_id, home_team, price, attendance) VALUES (1, 1, 1, 35.50, 45000), (2, 2, 1, 42.75, 32000), (3, 1, 0, 28.00, 22000);", + "sql": "SELECT t.team_name, SUM(g.price * g.attendance) AS revenue FROM teams t INNER JOIN games g ON t.team_id \u003d g.team_id AND g.home_team \u003d t.team_id WHERE g.attendance \u003e 30000 GROUP BY t.team_name;", + "sql_explanation": "Join teams and games tables, filter on games with attendance \u003e 30000, and calculate the total revenue for each home team." +}, { + "id": "976", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average wellbeing program participation for athletes in the Western Conference?", + "sql_context": "CREATE TABLE Teams (team_id INT, conference VARCHAR(255)); INSERT INTO Teams (team_id, conference) VALUES (1, \u0027Eastern\u0027), (2, \u0027Western\u0027), (3, \u0027Eastern\u0027), (4, \u0027Western\u0027); CREATE TABLE Athletes (athlete_id INT, team_id INT, name VARCHAR(255), participations INT); INSERT INTO Athletes (athlete_id, team_id, name, participations) VALUES (1, 1, \u0027Athlete A\u0027, 12), (2, 1, \u0027Athlete B\u0027, 15), (3, 2, \u0027Athlete C\u0027, 18), (4, 3, \u0027Athlete D\u0027, 10);", + "sql": "SELECT Teams.conference, AVG(Athletes.participations) FROM Athletes INNER JOIN Teams ON Athletes.team_id \u003d Teams.team_id WHERE Teams.conference \u003d \u0027Western\u0027 GROUP BY Teams.conference;", + "sql_explanation": "The SQL query joins the Teams and Athletes tables on the team_id column and filters for Western Conference teams using the WHERE clause. Then, it groups the results by conference and calculates the average wellbeing program participation for athletes in the Western Conference." +}, { + "id": "1094", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of fans by gender for each baseball team?", + "sql_context": "CREATE TABLE fan_demographics (fan_id INT, gender VARCHAR(255), team_id INT); INSERT INTO fan_demographics (fan_id, gender, team_id) VALUES (1, \u0027Male\u0027, 1), (2, \u0027Female\u0027, 2), (3, \u0027Male\u0027, 1), (4, \u0027Male\u0027, 3), (5, \u0027Female\u0027, 2); CREATE TABLE teams (team_id INT, team_name VARCHAR(255), sport VARCHAR(255)); INSERT INTO teams (team_id, team_name, sport) VALUES (1, \u0027Red Sox\u0027, \u0027Baseball\u0027), (2, \u0027Yankees\u0027, \u0027Baseball\u0027), (3, \u0027Lakers\u0027, \u0027Basketball\u0027);", + "sql": "SELECT t.team_name, f.gender, COUNT(f.fan_id) fan_count FROM fan_demographics f JOIN teams t ON f.team_id \u003d t.team_id WHERE t.sport \u003d \u0027Baseball\u0027 GROUP BY t.team_name, f.gender;", + "sql_explanation": "Join the fan_demographics and teams tables on team_id, filter to only include baseball teams, then group by team_name and gender and compute the number of fans for each group." +}, { + "id": "1121", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of fans who participated in athlete wellbeing programs by age group?", + "sql_context": "CREATE TABLE WellbeingPrograms (ProgramID INT, ProgramName VARCHAR(255), AgeGroup VARCHAR(255)); INSERT INTO WellbeingPrograms (ProgramID, ProgramName, AgeGroup) VALUES (1, \u0027Yoga\u0027, \u002718-25\u0027), (2, \u0027Meditation\u0027, \u002726-35\u0027), (3, \u0027Strength Training\u0027, \u002736-45\u0027); CREATE TABLE Participants (ParticipantID INT, Age INT, ProgramID INT); INSERT INTO Participants (ParticipantID, Age, ProgramID) VALUES (1, 22, 1), (2, 30, 2), (3, 38, 3), (4, 25, 1);", + "sql": "SELECT w.ProgramName, p.AgeGroup, COUNT(*) as Total_Participants FROM Participants p JOIN WellbeingPrograms w ON p.ProgramID \u003d w.ProgramID GROUP BY w.ProgramName, p.AgeGroup;", + "sql_explanation": "Join WellbeingPrograms and Participants tables, then group by ProgramName and AgeGroup to count the number of fans participating in athlete wellbeing programs by age group." +}, { + "id": "1349", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all athletes who have never participated in any wellbeing programs.", + "sql_context": "CREATE TABLE athletes (athlete_id INT, name VARCHAR(30), team VARCHAR(20)); INSERT INTO athletes VALUES (1, \u0027Durant\u0027, \u0027Nets\u0027); INSERT INTO athletes VALUES (2, \u0027Curry\u0027, \u0027Warriors\u0027); CREATE TABLE wellbeing_programs (program_id INT, athlete_id INT, program_name VARCHAR(30)); INSERT INTO wellbeing_programs VALUES (1, 1, \u0027Yoga\u0027);", + "sql": "SELECT athletes.name FROM athletes LEFT JOIN wellbeing_programs ON athletes.athlete_id \u003d wellbeing_programs.athlete_id WHERE wellbeing_programs.athlete_id IS NULL;", + "sql_explanation": "1. Perform a left join on the athletes and wellbeing_programs tables. 2. Filter the rows where the athlete_id in the wellbeing_programs table is NULL. 3. Select the name column from the athletes table." +}, { + "id": "1366", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total revenue for each team\u0027s merchandise sales by quarter in 2022?", + "sql_context": "CREATE TABLE teams (id INT, name VARCHAR(255)); INSERT INTO teams (id, name) VALUES (1, \u0027TeamA\u0027), (2, \u0027TeamB\u0027), (3, \u0027TeamC\u0027); CREATE TABLE merchandise_sales (team_id INT, year INT, quarter INT, revenue DECIMAL(10,2));", + "sql": "SELECT t.name, m.quarter, SUM(m.revenue) as total_revenue FROM merchandise_sales m JOIN teams t ON m.team_id \u003d t.id WHERE m.year \u003d 2022 GROUP BY t.name, m.quarter;", + "sql_explanation": "This query joins the merchandise_sales and teams tables on the team_id column. It then filters for data from the year 2022 and groups the results by team name and quarter. Finally, it calculates the total revenue for each team\u0027s merchandise sales by quarter in 2022." +}, { + "id": "1395", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 3 favorite sports among fans, by ticket sales.", + "sql_context": "CREATE TABLE fan_demographics (fan_id INT, favorite_sport VARCHAR(20)); CREATE TABLE ticket_sales (ticket_id INT, fan_id INT, sport VARCHAR(20), sales INT);", + "sql": "SELECT sport, SUM(sales) as total_sales FROM ticket_sales td JOIN fan_demographics fd ON td.fan_id \u003d fd.fan_id GROUP BY sport ORDER BY total_sales DESC LIMIT 3;", + "sql_explanation": "This query joins the fan_demographics and ticket_sales tables on fan_id, groups the results by sport, calculates the total sales for each sport, and orders them in descending order. The query then returns the top 3 sports with the highest ticket sales." +}, { + "id": "1689", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue for each event by state in the \u0027concerts\u0027 and \u0027fans\u0027 tables?", + "sql_context": "CREATE TABLE concerts (event_id INT, event_name VARCHAR(50), location VARCHAR(50), date DATE, ticket_price DECIMAL(5,2), num_tickets INT, city VARCHAR(50)); CREATE TABLE fans (fan_id INT, fan_name VARCHAR(50), age INT, city VARCHAR(50), state VARCHAR(50), country VARCHAR(50));", + "sql": "SELECT event_name, state, SUM(ticket_price * num_tickets) as total_revenue FROM concerts c JOIN fans f ON c.city \u003d f.city GROUP BY event_name, state;", + "sql_explanation": "This query calculates the total revenue for each event by state in the \u0027concerts\u0027 and \u0027fans\u0027 tables by joining the records based on city and grouping the records by event_name and state." +}, { + "id": "1902", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total number of tickets sold by each salesperson, grouped by team.", + "sql_context": "CREATE TABLE salesperson (id INT, name VARCHAR(50), team VARCHAR(50)); CREATE TABLE tickets (id INT, salesperson_id INT, quantity INT, city VARCHAR(50)); INSERT INTO salesperson (id, name, team) VALUES (1, \u0027John Doe\u0027, \u0027Knicks\u0027), (2, \u0027Jane Smith\u0027, \u0027Giants\u0027), (3, \u0027Mia Rodriguez\u0027, \u0027Lakers\u0027), (4, \u0027Mason Green\u0027, \u0027United\u0027); INSERT INTO tickets (id, salesperson_id, quantity, city) VALUES (1, 1, 50, \u0027New York\u0027), (2, 1, 75, \u0027New York\u0027), (3, 2, 30, \u0027Los Angeles\u0027), (4, 2, 40, \u0027Los Angeles\u0027), (5, 3, 25, \u0027Chicago\u0027), (6, 3, 50, \u0027Chicago\u0027), (7, 4, 10, \u0027London\u0027);", + "sql": "SELECT s.team, s.name, SUM(t.quantity) as total_quantity FROM salesperson s JOIN tickets t ON s.id \u003d t.salesperson_id GROUP BY s.team, s.name;", + "sql_explanation": "This query joins the salesperson and tickets tables on the salesperson_id column. It then groups the results by team and salesperson name, calculating the total quantity of tickets sold for each group." +}, { + "id": "1909", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the total revenue for each team\u0027s merchandise sales in 2021?", + "sql_context": "CREATE TABLE teams (id INT, name VARCHAR(255)); INSERT INTO teams (id, name) VALUES (1, \u0027TeamA\u0027), (2, \u0027TeamB\u0027); CREATE TABLE merchandise_sales (team_id INT, year INT, revenue DECIMAL(10,2));", + "sql": "SELECT t.name, SUM(m.revenue) as total_revenue FROM merchandise_sales m JOIN teams t ON m.team_id \u003d t.id WHERE m.year \u003d 2021 GROUP BY t.name;", + "sql_explanation": "This query joins the merchandise_sales and teams tables on the team_id column. It then filters for data from the year 2021 and groups the results by team name. Finally, it calculates the total revenue for each team\u0027s merchandise sales in 2021." +}, { + "id": "2195", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum age of athletes in each wellbeing program?", + "sql_context": "CREATE TABLE athletes (athlete_id INT, program_id INT, age INT); INSERT INTO athletes (athlete_id, program_id, age) VALUES (6, 1, 30), (7, 1, 31), (8, 2, 32), (9, 2, 33), (10, 3, 34); CREATE TABLE programs (program_id INT, program_name VARCHAR(50)); INSERT INTO programs (program_id, program_name) VALUES (1, \u0027Yoga\u0027), (2, \u0027Meditation\u0027), (3, \u0027Mindfulness\u0027);", + "sql": "SELECT p.program_name, MAX(a.age) as max_age FROM athletes a JOIN programs p ON a.program_id \u003d p.program_id GROUP BY p.program_name;", + "sql_explanation": "The SQL query joins the athletes and programs tables on the program_id field and calculates the maximum age of athletes in each wellbeing program. It then groups by the program_name field and calculates the maximum age of athletes who are part of each program." +}, { + "id": "2215", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of tickets sold for a single game of the basketball team \"Eagles\" in the year 2021?", + "sql_context": "CREATE TABLE games (id INT, team VARCHAR(20), date DATE); INSERT INTO games (id, team, date) VALUES (1, \u0027Eagles\u0027, \u00272021-01-01\u0027), (2, \u0027Eagles\u0027, \u00272021-02-15\u0027), (3, \u0027Eagles\u0027, \u00272021-04-01\u0027); CREATE TABLE tickets (id INT, game_id INT, quantity INT); INSERT INTO tickets (id, game_id, quantity) VALUES (1, 1, 2000), (2, 1, 500), (3, 2, 1500), (4, 2, 700), (5, 3, 2500), (6, 3, 1500);", + "sql": "SELECT MAX(quantity) FROM tickets JOIN games ON tickets.game_id \u003d games.id WHERE games.team \u003d \u0027Eagles\u0027 AND YEAR(games.date) \u003d 2021;", + "sql_explanation": "This SQL query joins the tickets and games tables based on the game_id and filters for the Eagles team and the year 2021. Then, it calculates the maximum of the quantity column to find the maximum number of tickets sold for a single game of the Eagles in the year 2021." +}, { + "id": "2758", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of fans who have attended Red Sox home games?", + "sql_context": "CREATE TABLE teams (team_id INT, team_name VARCHAR(50));CREATE TABLE fans (fan_id INT, team_id INT, age INT);INSERT INTO teams (team_id, team_name) VALUES (1, \u0027Red Sox\u0027), (2, \u0027Yankees\u0027);INSERT INTO fans (fan_id, team_id, age) VALUES (1, 1, 35), (2, 1, 28), (3, 2, 45), (4, 1, 42);", + "sql": "SELECT AVG(f.age) AS avg_age FROM fans f INNER JOIN teams t ON f.team_id \u003d t.team_id WHERE t.team_name \u003d \u0027Red Sox\u0027;", + "sql_explanation": "Join teams and fans tables, filter on \u0027Red Sox\u0027 fans, and calculate the average age of fans." +}, { + "id": "830", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all the endangered languages, their corresponding heritage sites, and their current status in Africa.", + "sql_context": "CREATE TABLE Languages (id INT, language TEXT, status TEXT); INSERT INTO Languages (id, language, status) VALUES (1, \u0027Berber Language\u0027, \u0027Endangered\u0027); CREATE TABLE HeritageLanguages (id INT, language_id INT, heritage_site TEXT); INSERT INTO HeritageLanguages (id, language_id, heritage_site) VALUES (1, 1, \u0027Atlas Mountains\u0027);", + "sql": "SELECT L.language, HL.heritage_site, L.status FROM Languages L INNER JOIN HeritageLanguages HL ON L.id \u003d HL.language_id WHERE L.status \u003d \u0027Endangered\u0027 AND HL.heritage_site \u003d \u0027Atlas Mountains\u0027;", + "sql_explanation": "Join Languages and HeritageLanguages tables on language_id and id respectively, then filter for endangered languages and their corresponding heritage sites in Africa to get the current status." +}, { + "id": "1480", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all traditional arts and their associated community engagement events.", + "sql_context": "CREATE TABLE traditional_arts (id INT, name VARCHAR); INSERT INTO traditional_arts (id, name) VALUES (1, \u0027Traditional Art A\u0027), (2, \u0027Traditional Art B\u0027); CREATE TABLE community_events (id INT, art_id INT, event_type VARCHAR); INSERT INTO community_events (id, art_id, event_type) VALUES (1, 1, \u0027Community Engagement\u0027), (2, 2, \u0027Educational\u0027);", + "sql": "SELECT traditional_arts.name, community_events.event_type FROM traditional_arts INNER JOIN community_events ON traditional_arts.id \u003d community_events.art_id;", + "sql_explanation": "This query performs an inner join on the traditional_arts and community_events tables, using the art_id as the common key. It then returns the name of each traditional art and its associated community engagement events." +}, { + "id": "1589", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of annual visitors for each traditional art event in Eastern Europe?", + "sql_context": "CREATE TABLE ArtEvents (id INT, name VARCHAR(50), region VARCHAR(50)); INSERT INTO ArtEvents (id, name, region) VALUES (1, \u0027Kolo\u0027, \u0027Eastern Europe\u0027), (2, \u0027Hora\u0027, \u0027Eastern Europe\u0027), (3, \u0027Kokoshnik\u0027, \u0027Eastern Europe\u0027), (4, \u0027KaragÃļz\u0027, \u0027Eastern Europe\u0027), (5, \u0027CsÃĄngÃŗ\u0027, \u0027Eastern Europe\u0027); CREATE TABLE EventParticipants (id INT, art_event_id INT, name VARCHAR(50), annual_visitors INT); INSERT INTO EventParticipants (id, art_event_id, name, annual_visitors) VALUES (1, 1, \u0027Ivan\u0027, 2500), (2, 2, \u0027Maria\u0027, 1800), (3, 3, \u0027Alex\u0027, 1200), (4, 4, \u0027Natalia\u0027, 2300), (5, 5, \u0027Petro\u0027, 1900);", + "sql": "SELECT AVG(ep.annual_visitors) as avg_visitors FROM ArtEvents ae JOIN EventParticipants ep ON ae.id \u003d ep.art_event_id WHERE ae.region \u003d \u0027Eastern Europe\u0027;", + "sql_explanation": "This SQL query joins the ArtEvents and EventParticipants tables on the art_event_id column. It then filters the data for art events in Eastern Europe. The query calculates the average number of annual visitors for each traditional art event in Eastern Europe, and returns the result." +}, { + "id": "1761", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many languages are spoken in each country?", + "sql_context": "CREATE TABLE Countries (CountryID INT, CountryName VARCHAR(50)); CREATE TABLE Languages (LanguageID INT, CountryID INT, Spoken VARCHAR(50)); INSERT INTO Countries VALUES (1, \u0027CountryA\u0027), (2, \u0027CountryB\u0027), (3, \u0027CountryC\u0027); INSERT INTO Languages VALUES (1, 1, \u0027LanguageX\u0027), (2, 1, \u0027LanguageY\u0027), (3, 2, \u0027LanguageY\u0027), (4, 3, \u0027LanguageZ\u0027), (5, 3, \u0027LanguageW\u0027);", + "sql": "SELECT C.CountryName, COUNT(L.LanguageID) AS LanguagesSpoken FROM Countries C JOIN Languages L ON C.CountryID \u003d L.CountryID GROUP BY C.CountryName;", + "sql_explanation": "Join the Countries and Languages tables, group by CountryName, and count the number of LanguageIDs for each CountryName." +}, { + "id": "1973", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many heritage sites are in each country?", + "sql_context": "CREATE TABLE heritage_sites (site_id INT, country_id INT, site_name VARCHAR(255)); CREATE TABLE countries (country_id INT, country_name VARCHAR(255)); INSERT INTO countries VALUES (1, \u0027France\u0027), (2, \u0027United States\u0027), (3, \u0027Japan\u0027); INSERT INTO heritage_sites VALUES (1, 1, \u0027Eiffel Tower\u0027), (2, 1, \u0027Mont Saint-Michel\u0027), (3, 2, \u0027Statue of Liberty\u0027), (4, 2, \u0027Yellowstone National Park\u0027), (5, 3, \u0027Mount Fuji\u0027), (6, 3, \u0027Himeji Castle\u0027);", + "sql": "SELECT country_name, COUNT(*) FROM heritage_sites JOIN countries ON heritage_sites.country_id \u003d countries.country_id GROUP BY country_name;", + "sql_explanation": "Join the heritage_sites and countries tables on the country_id column, then group the results by country_name and calculate the count of records in each group." +}, { + "id": "2480", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the heritage sites that do not have any associated artists?", + "sql_context": "CREATE TABLE HeritageSites (SiteID int, SiteName text); INSERT INTO HeritageSites (SiteID, SiteName) VALUES (1, \u0027Eiffel Tower\u0027), (2, \u0027Mont Saint-Michel\u0027), (3, \u0027Alhambra\u0027); CREATE TABLE ArtHeritage (ArtID int, HeritageID int); INSERT INTO ArtHeritage (ArtID, HeritageID) VALUES (1, 1), (2, 2), (3, 3);", + "sql": "SELECT HS.SiteName FROM HeritageSites HS LEFT JOIN ArtHeritage AH ON HS.SiteID \u003d AH.HeritageID WHERE AH.HeritageID IS NULL;", + "sql_explanation": "This query returns heritage sites that do not have any associated artists by performing a left join between the HeritageSites and ArtHeritage tables and filtering for null values in the ArtHeritage table." +}, { + "id": "3892", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of artifacts in museums per city?", + "sql_context": "CREATE TABLE museums (museum_id INT, city_id INT, num_artifacts INT); CREATE TABLE cities (city_id INT, city_name VARCHAR(255)); INSERT INTO cities VALUES (1, \u0027Paris\u0027), (2, \u0027New York\u0027), (3, \u0027Tokyo\u0027); INSERT INTO museums VALUES (1, 1, 5000), (2, 1, 3000), (3, 2, 8000), (4, 2, 9000), (5, 3, 6000), (6, 3, 7000);", + "sql": "SELECT AVG(num_artifacts) FROM museums JOIN cities ON museums.city_id \u003d cities.city_id;", + "sql_explanation": "First, join the museums and cities tables on the city_id column. Then, calculate the average num_artifacts across all records in the joined table." +}, { + "id": "1078", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many IoT sensors were installed in each region in the past month?", + "sql_context": "CREATE TABLE region (id INTEGER, name TEXT);CREATE TABLE iot_sensor (id INTEGER, region_id INTEGER, installed_date DATE);", + "sql": "SELECT r.name, COUNT(s.id) as num_sensors FROM region r INNER JOIN iot_sensor s ON r.id \u003d s.region_id WHERE s.installed_date \u003e\u003d DATEADD(month, -1, CURRENT_DATE) GROUP BY r.name;", + "sql_explanation": "The SQL query calculates the number of IoT sensors installed in each region in the past month by joining the region and iot_sensor tables on the region_id column, filtering for records where the installed_date is within the past month, and then grouping the results by region name, counting the number of sensors for each group." +}, { + "id": "1134", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which satellite images have anomalies in the past month for soybean fields?", + "sql_context": "CREATE TABLE satellite_image (id INT, field_id INT, image_url TEXT, anomaly BOOLEAN, timestamp TIMESTAMP); CREATE TABLE field (id INT, type VARCHAR(20));", + "sql": "SELECT s.image_url FROM satellite_image s INNER JOIN field f ON s.field_id \u003d f.id WHERE f.type \u003d \u0027soybean\u0027 AND s.anomaly \u003d true AND s.timestamp \u003e\u003d NOW() - INTERVAL \u00271 month\u0027;", + "sql_explanation": "This query joins the satellite_image and field tables on the field_id column. It then filters the data to include only records for soybean fields with anomalies in the past month. Finally, it selects the image_url column to return the URLs of the relevant satellite images." +}, { + "id": "1346", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average temperature recorded by sensor 001 in the \u0027SensorData\u0027 table?", + "sql_context": "CREATE TABLE WeatherData (date DATE, temperature FLOAT); CREATE TABLE SensorData (sensor_id INT, weather_data_id INT, FOREIGN KEY (weather_data_id) REFERENCES WeatherData(weather_data_id));", + "sql": "SELECT AVG(WeatherData.temperature) FROM WeatherData JOIN SensorData ON WeatherData.weather_data_id \u003d SensorData.weather_data_id WHERE SensorData.sensor_id \u003d 1;", + "sql_explanation": "The SQL query joins the \u0027WeatherData\u0027 and \u0027SensorData\u0027 tables on the \u0027weather_data_id\u0027 column. It then filters for records where \u0027sensor_id\u0027 is 1 and calculates the average temperature." +}, { + "id": "2131", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the average rainfall for farms with an irrigation system?", + "sql_context": "CREATE TABLE farm (id INT, name VARCHAR(50), size FLOAT, irrigation_system BOOLEAN, PRIMARY KEY(id)); INSERT INTO farm (id, name, size, irrigation_system) VALUES (1, \u0027Farm A\u0027, 50.3, true); INSERT INTO farm (id, name, size, irrigation_system) VALUES (2, \u0027Farm B\u0027, 75.8, false); CREATE TABLE rainfall (id INT, farm_id INT, rainfall FLOAT, PRIMARY KEY(id)); INSERT INTO rainfall (id, farm_id, rainfall) VALUES (1, 1, 35.2); INSERT INTO rainfall (id, farm_id, rainfall) VALUES (2, 2, 21.0);", + "sql": "SELECT f.name, AVG(r.rainfall) FROM farm f INNER JOIN rainfall r ON f.id \u003d r.farm_id WHERE f.irrigation_system \u003d true GROUP BY f.name;", + "sql_explanation": "This query joins the farm table and the rainfall table on their common column farm_id. It filters the results to only show farms with an irrigation system (f.irrigation_system \u003d true). Then, it groups the results by farm name (f.name) and calculates the average rainfall (AVG(r.rainfall)) for farms with an irrigation system." +}, { + "id": "2921", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all users who have never planted any crops.", + "sql_context": "CREATE TABLE users (user_id INT, name VARCHAR(255)); CREATE TABLE planting_records (record_id INT, user_id INT, crop_type VARCHAR(255));", + "sql": "SELECT u.name FROM users u LEFT JOIN planting_records pr ON u.user_id \u003d pr.user_id WHERE pr.record_id IS NULL;", + "sql_explanation": "Perform a left join between users and planting_records. Filter on null values in the planting_records table to find users who have no records in the table." +}, { + "id": "226", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which vehicle type in the \u0027Bus\u0027 service had the most maintenance incidents in the last month?", + "sql_context": "CREATE TABLE MaintenanceIncidents (IncidentID INT, VehicleID INT, VehicleType VARCHAR(50), IncidentDate DATE); INSERT INTO MaintenanceIncidents (IncidentID, VehicleID, VehicleType, IncidentDate) VALUES (1, 1, \u0027MiniBus\u0027, \u00272022-02-01\u0027), (2, 1, \u0027MiniBus\u0027, \u00272022-02-03\u0027), (3, 2, \u0027Coach\u0027, \u00272022-02-02\u0027), (4, 3, \u0027MidiBus\u0027, \u00272022-02-04\u0027), (5, 4, \u0027MiniBus\u0027, \u00272022-02-05\u0027), (6, 4, \u0027MiniBus\u0027, \u00272022-02-06\u0027), (7, 5, \u0027MidiBus\u0027, \u00272022-02-07\u0027), (8, 5, \u0027MidiBus\u0027, \u00272022-02-08\u0027), (9, 1, \u0027MiniBus\u0027, \u00272022-02-09\u0027);", + "sql": "SELECT v.VehicleType, COUNT(*) as MaintenanceIncidents FROM Vehicles v JOIN MaintenanceIncidents mi ON v.VehicleID \u003d mi.VehicleID WHERE v.Service \u003d \u0027Bus\u0027 AND mi.IncidentDate \u003e\u003d DATEADD(month, -1, GETDATE()) GROUP BY v.VehicleType ORDER BY MaintenanceIncidents DESC;", + "sql_explanation": "This query joins the Vehicles and MaintenanceIncidents tables on the VehicleID column, filters for the Bus service and incidents in the last month, groups the results by vehicle type, and orders the results by the number of maintenance incidents in descending order." +}, { + "id": "406", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total fare collected per month for each route?", + "sql_context": "CREATE TABLE RouteFareCollections (CollectionID int, RouteID int, CollectionDate date, FareAmount decimal); INSERT INTO RouteFareCollections VALUES (1, 1, \u00272022-01-01\u0027, 5.00), (2, 1, \u00272022-01-01\u0027, 2.50), (3, 2, \u00272022-01-01\u0027, 1.75), (4, 2, \u00272022-02-01\u0027, 3.00);", + "sql": "SELECT R.RouteName, DATE_FORMAT(RF.CollectionDate, \u0027%Y-%m\u0027) as CollectionMonth, SUM(RF.FareAmount) as MonthlyTotalFare FROM Routes R INNER JOIN RouteFareCollections RF ON R.RouteID \u003d RF.RouteID GROUP BY R.RouteName, CollectionMonth;", + "sql_explanation": "Join Routes and RouteFareCollections tables on RouteID, then calculate the total fare collected per month for each route." +}, { + "id": "455", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of public transportation trips taken by seniors in the last month, by mode?", + "sql_context": "CREATE TABLE trips (trip_id INT, user_id INT, trip_date DATE, trip_mode VARCHAR(255)); INSERT INTO trips (trip_id, user_id, trip_date, trip_mode) VALUES (10, 10, \u00272022-03-01\u0027, \u0027Bus\u0027), (11, 11, \u00272022-03-02\u0027, \u0027Train\u0027); CREATE TABLE user_demographics (user_id INT, age_group VARCHAR(255)); INSERT INTO user_demographics (user_id, age_group) VALUES (10, \u0027Senior\u0027), (11, \u0027Senior\u0027);", + "sql": "SELECT trip_mode, COUNT(*) AS trips_taken FROM trips JOIN user_demographics ON trips.user_id \u003d user_demographics.user_id WHERE age_group \u003d \u0027Senior\u0027 AND trip_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH) GROUP BY trip_mode;", + "sql_explanation": "Counts the total number of public transportation trips taken by seniors in the last month, by mode, by joining trips and user_demographics tables based on user_id." +}, { + "id": "532", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many vehicles are there in total for each type in the \u0027tokyo\u0027 schema?", + "sql_context": "CREATE TABLE tokyo.vehicle_types (id INT, type VARCHAR); CREATE TABLE tokyo.vehicles (id INT, type_id INT, is_active BOOLEAN);", + "sql": "SELECT tokyo.vehicle_types.type, COUNT(*) FROM tokyo.vehicle_types INNER JOIN tokyo.vehicles ON tokyo.vehicle_types.id \u003d tokyo.vehicles.type_id WHERE tokyo.vehicles.is_active \u003d TRUE GROUP BY tokyo.vehicle_types.type;", + "sql_explanation": "This query counts the number of vehicles for each type in the \u0027tokyo\u0027 schema. It does this by performing an INNER JOIN on the \u0027vehicle_types\u0027 and \u0027vehicles\u0027 tables, matching records where the \u0027type_id\u0027 columns are equal. The WHERE clause filters the result set to only include records where the \u0027is_active\u0027 column is TRUE. Finally, the GROUP BY clause groups the result set by the \u0027type\u0027 column, and the COUNT(*) function counts the number of records in each group." +}, { + "id": "546", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the number of unique users who used the bus and subway systems in March 2021.", + "sql_context": "CREATE TABLE bus_sales (sale_id INT, sale_date DATE, sale_revenue FLOAT, system_name VARCHAR(20)); CREATE TABLE user_activity (user_id INT, sale_id INT, activity_date DATE);", + "sql": "SELECT COUNT(DISTINCT user_id) FROM user_activity UA JOIN bus_sales BS ON UA.sale_id \u003d BS.sale_id WHERE BS.system_name \u003d \u0027Bus\u0027 OR BS.system_name \u003d \u0027Subway\u0027 AND UA.activity_date BETWEEN \u00272021-03-01\u0027 AND \u00272021-03-31\u0027;", + "sql_explanation": "Counting the number of unique users who used the bus and subway systems in March 2021." +}, { + "id": "556", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average speed of buses, by route?", + "sql_context": "CREATE TABLE Routes (RouteID int, RouteType varchar(10), StartingLocation varchar(20), Length float); CREATE TABLE VehicleSpeeds (VehicleID int, RouteID int, Speed float); INSERT INTO Routes VALUES (1, \u0027Bus\u0027, \u0027City Center\u0027, 20.0), (2, \u0027Tram\u0027, \u0027City Center\u0027, 15.0), (3, \u0027Bus\u0027, \u0027Suburbs\u0027, 30.0); INSERT INTO VehicleSpeeds VALUES (1, 1, 30), (2, 1, 25), (3, 2, 16), (4, 3, 28), (5, 3, 32);", + "sql": "SELECT Routes.RouteID, Routes.RouteType, AVG(VehicleSpeeds.Speed) as avg_speed FROM Routes INNER JOIN VehicleSpeeds ON Routes.RouteID \u003d VehicleSpeeds.RouteID WHERE Routes.RouteType \u003d \u0027Bus\u0027 GROUP BY Routes.RouteID;", + "sql_explanation": "This query first joins the Routes and VehicleSpeeds tables on the RouteID field. It then filters for rows where the RouteType is \u0027Bus\u0027. Finally, it groups the results by route ID and calculates the average speed for each route." +}, { + "id": "632", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total fare collected for each driver in the last month?", + "sql_context": "CREATE TABLE driver (driver_id INT, driver_name TEXT);CREATE TABLE fare (fare_id INT, driver_id INT, fare_amount DECIMAL, collection_date DATE); INSERT INTO driver (driver_id, driver_name) VALUES (1, \u0027Driver1\u0027), (2, \u0027Driver2\u0027), (3, \u0027Driver3\u0027), (4, \u0027Driver4\u0027), (5, \u0027Driver5\u0027); INSERT INTO fare (fare_id, driver_id, fare_amount, collection_date) VALUES (1, 1, 5.00, \u00272023-01-01\u0027), (2, 1, 5.00, \u00272023-01-02\u0027), (3, 2, 3.00, \u00272023-01-01\u0027), (4, 2, 3.00, \u00272023-01-03\u0027), (5, 3, 2.00, \u00272023-01-01\u0027);", + "sql": "SELECT d.driver_name, SUM(f.fare_amount) as total_fare FROM driver d JOIN fare f ON d.driver_id \u003d f.driver_id WHERE f.collection_date BETWEEN DATEADD(month, -1, GETDATE()) AND GETDATE() GROUP BY d.driver_id;", + "sql_explanation": "This query lists the total fare collected for each driver in the last month by joining the driver and fare tables on the driver_id field. It then calculates the SUM of the fare_amount for each driver, filters the records where the collection_date is within the range of the last month, groups by driver_name, and returns the result sorted by total_fare." +}, { + "id": "726", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many wheelchair-accessible taxis were active on Sundays in the last month?", + "sql_context": "CREATE TABLE vehicles (vehicle_id INT, vehicle_type VARCHAR(255), day_of_week ENUM(\u0027Monday\u0027, \u0027Tuesday\u0027, \u0027Wednesday\u0027, \u0027Thursday\u0027, \u0027Friday\u0027, \u0027Saturday\u0027, \u0027Sunday\u0027)); INSERT INTO vehicles (vehicle_id, vehicle_type, day_of_week) VALUES (7, \u0027Wheelchair Accessible\u0027, \u0027Sunday\u0027); CREATE TABLE taxi_activity (taxi_id INT, vehicle_id INT, activity_date DATE); INSERT INTO taxi_activity (taxi_id, vehicle_id, activity_date) VALUES (8, 7, \u00272022-03-06\u0027);", + "sql": "SELECT COUNT(*) FROM taxi_activity JOIN vehicles ON taxi_activity.vehicle_id \u003d vehicles.vehicle_id WHERE vehicles.day_of_week \u003d \u0027Sunday\u0027 AND activity_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH);", + "sql_explanation": "Counts the number of wheelchair-accessible taxis that were active on Sundays in the last month by joining taxi_activity and vehicles tables based on vehicle_id." +}, { + "id": "742", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of maintenance tasks performed on each vehicle, by vehicle type?", + "sql_context": "CREATE TABLE vehicles (vehicle_id INT, vehicle_type TEXT); CREATE TABLE maintenance (maintenance_id INT, vehicle_id INT, maintenance_date DATE, maintenance_type TEXT);", + "sql": "SELECT v.vehicle_type, v.vehicle_id, COUNT(m.maintenance_id) as total_maintenance_tasks FROM vehicles v INNER JOIN maintenance m ON v.vehicle_id \u003d m.vehicle_id GROUP BY v.vehicle_type, v.vehicle_id;", + "sql_explanation": "The SQL query joins the vehicles and maintenance tables on the vehicle_id column. It then counts the number of maintenance tasks for each vehicle, by vehicle type, and groups the results by vehicle type and vehicle id." +}, { + "id": "895", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many riders have used each route type in the last month?", + "sql_context": "CREATE TABLE trips (route_id INT, fare DECIMAL(5,2), date DATE); INSERT INTO trips VALUES (1, 2.00, \u00272022-01-01\u0027), (2, 0.00, \u00272022-01-02\u0027), (3, 1.50, \u00272022-01-03\u0027), (1, 2.00, \u00272022-01-04\u0027); CREATE TABLE routes (route_id INT, type VARCHAR(50)); INSERT INTO routes VALUES (1, \u0027Bus\u0027), (2, \u0027Tram\u0027), (3, \u0027Ferry\u0027);", + "sql": "SELECT routes.type, COUNT(DISTINCT trips.route_id) FROM trips JOIN routes ON trips.route_id \u003d routes.route_id WHERE trips.date \u003e\u003d (CURRENT_DATE - INTERVAL \u00271 month\u0027) GROUP BY routes.type;", + "sql_explanation": "This query calculates the number of riders for each route type by joining the trips table with the routes table based on the route_id, filtering for trips within the last month, and then grouping the result by route type and counting the number of unique route_id." +}, { + "id": "982", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of wheelchair-accessible buses per depot.", + "sql_context": "CREATE TABLE depot (depot_id INT, depot_name TEXT);CREATE TABLE bus (bus_id INT, depot_id INT, is_wheelchair_accessible BOOLEAN); INSERT INTO depot (depot_id, depot_name) VALUES (1, \u0027DepotA\u0027), (2, \u0027DepotB\u0027), (3, \u0027DepotC\u0027); INSERT INTO bus (bus_id, depot_id, is_wheelchair_accessible) VALUES (1, 1, true), (2, 1, false), (3, 2, true), (4, 2, false), (5, 3, true);", + "sql": "SELECT d.depot_name, COUNT(b.bus_id) as wheelchair_accessible_buses FROM depot d JOIN bus b ON d.depot_id \u003d b.depot_id WHERE b.is_wheelchair_accessible \u003d true GROUP BY d.depot_name;", + "sql_explanation": "This query counts the number of wheelchair-accessible buses per depot by joining the depot and bus tables on the depot_id field. It then filters the records where is_wheelchair_accessible is true and counts the number of buses per depot, grouping by depot_name." +}, { + "id": "1012", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all unique bus stops and their respective routes in the \u0027madrid\u0027 schema.", + "sql_context": "CREATE TABLE madrid.bus_stops (id INT, stop_name VARCHAR); CREATE TABLE madrid.stop_routes (id INT, stop_id INT, route_number INT);", + "sql": "SELECT DISTINCT madrid.bus_stops.stop_name, madrid.stop_routes.route_number FROM madrid.bus_stops INNER JOIN madrid.stop_routes ON madrid.bus_stops.id \u003d madrid.stop_routes.stop_id;", + "sql_explanation": "This query lists all unique bus stops and their respective routes in the \u0027madrid\u0027 schema. It does this by performing an INNER JOIN on the \u0027bus_stops\u0027 and \u0027stop_routes\u0027 tables, matching records where the \u0027stop_id\u0027 columns are equal. The DISTINCT keyword is used to eliminate duplicate stop names from the result set. The result set includes the \u0027stop_name\u0027 and \u0027route_number\u0027 columns." +}, { + "id": "1330", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "List all maintenance records for the vehicle with vehicle_id 1.", + "sql_context": "CREATE VIEW vehicles_detailed AS SELECT * FROM vehicles v JOIN maintenance m ON v.vehicle_id \u003d m.vehicle_id; SELECT * FROM vehicles_detailed WHERE vehicle_id \u003d 1;", + "sql": "CREATE VIEW vehicles_detailed AS SELECT * FROM vehicles v JOIN maintenance m ON v.vehicle_id \u003d m.vehicle_id; SELECT * FROM vehicles_detailed WHERE vehicle_id \u003d 1;", + "sql_explanation": "1. A CREATE VIEW statement is used to create a view with the join of the \"vehicles\" and \"maintenance\" tables. 2. A SELECT statement is used to list all maintenance records for the vehicle with vehicle_id 1 from the \"vehicles_detailed\" view." +}, { + "id": "1331", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total fare collected for each route segment?", + "sql_context": "CREATE TABLE route_segments (segment_id INT, segment_name TEXT, route_id INT); CREATE TABLE fares (fare_id INT, segment_id INT, fare_amount DECIMAL); INSERT INTO route_segments (segment_id, segment_name, route_id) VALUES (1, \u0027Downtown to Midtown\u0027, 1), (2, \u0027Midtown to Uptown\u0027, 1), (3, \u0027City Center to Suburbs\u0027, 2); INSERT INTO fares (fare_id, segment_id, fare_amount) VALUES (1, 1, 2.50), (2, 1, 2.50), (3, 2, 2.50), (4, 2, 2.50), (5, 3, 3.50), (6, 3, 3.50);", + "sql": "SELECT f.segment_id, r.segment_name, SUM(f.fare_amount) AS total_fare FROM fares f JOIN route_segments r ON f.segment_id \u003d r.segment_id GROUP BY f.segment_id;", + "sql_explanation": "The SQL query joins the fares and route_segments tables on the segment_id column. It then calculates the sum of fare_amount for each unique segment_id to determine the total fare collected for each route segment." +}, { + "id": "1422", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which drivers have completed more than 500 trips on route 101?", + "sql_context": "CREATE TABLE drivers (driver_id INT, driver_name VARCHAR(255)); CREATE TABLE trips (trip_id INT, driver_id INT, route_id INT, trip_date DATE);", + "sql": "SELECT d.driver_name FROM drivers d INNER JOIN trips t ON d.driver_id \u003d t.driver_id WHERE t.route_id \u003d 101 GROUP BY d.driver_name HAVING COUNT(t.trip_id) \u003e 500;", + "sql_explanation": "The query joins the drivers table with the trips table on the driver_id column. It then filters the records to include only those trips taken on route 101. The query calculates the number of trips for each driver using the COUNT function and groups the results by driver name with the GROUP BY clause. Finally, it filters the results to include only those drivers who have completed more than 500 trips using the HAVING clause." +}, { + "id": "1534", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many passengers traveled on each route on a specific date?", + "sql_context": "CREATE TABLE trips (route_id INT, trip_date DATE); INSERT INTO trips (route_id, trip_date) VALUES (1, \u00272022-05-01\u0027), (1, \u00272022-05-01\u0027), (2, \u00272022-05-01\u0027), (3, \u00272022-05-01\u0027), (3, \u00272022-05-01\u0027);", + "sql": "SELECT r.route_name, t.trip_date, COUNT(t.route_id) AS passengers FROM trips t JOIN routes r ON t.route_id \u003d r.route_id GROUP BY r.route_name, t.trip_date;", + "sql_explanation": "Query joins trips and routes tables on route_id, then groups by route_name and trip_date to count passengers for each route on a specific date." +}, { + "id": "1567", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all stations with wheelchair accessibility and elevator availability", + "sql_context": "CREATE TABLE stations (station_id INT, name VARCHAR(255), latitude DECIMAL(9,6), longitude DECIMAL(9,6)); CREATE TABLE accessibility (station_id INT, wheelchair_accessible BOOLEAN, elevator_availability BOOLEAN);", + "sql": "SELECT s.name FROM stations s JOIN accessibility a ON s.station_id \u003d a.station_id WHERE a.wheelchair_accessible \u003d TRUE AND a.elevator_availability \u003d TRUE;", + "sql_explanation": "Lists all stations with wheelchair accessibility and elevator availability by joining the \u0027stations\u0027 and \u0027accessibility\u0027 table on station_id and filtering for stations where wheelchair accessibility and elevator availability are both true." +}, { + "id": "1829", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average speed of vehicles on each route?", + "sql_context": "CREATE TABLE routes (route_id INT, route_name TEXT);CREATE TABLE vehicles (vehicle_id INT, route_id INT, speed INT); INSERT INTO routes VALUES (123, \u0027Route 123\u0027); INSERT INTO routes VALUES (456, \u0027Route 456\u0027); INSERT INTO vehicles VALUES (1, 123, 50); INSERT INTO vehicles VALUES (2, 123, 55); INSERT INTO vehicles VALUES (3, 456, 45); INSERT INTO vehicles VALUES (4, 456, 40);", + "sql": "SELECT routes.route_name, AVG(vehicles.speed) FROM routes INNER JOIN vehicles ON routes.route_id \u003d vehicles.route_id GROUP BY routes.route_name;", + "sql_explanation": "This query joins the routes table and the vehicles table on the route_id foreign key. It then groups the results by route name and calculates the average speed of vehicles on each route." +}, { + "id": "2159", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue for each route in the bus system?", + "sql_context": "CREATE TABLE routes (route_id INT, route_name VARCHAR(255)); INSERT INTO routes VALUES (1, \u0027Route 1A\u0027); INSERT INTO routes VALUES (2, \u0027Route 2B\u0027); CREATE TABLE fares (fare_id INT, route_id INT, fare_amount DECIMAL(5,2)); INSERT INTO fares VALUES (1, 1, 2.50); INSERT INTO fares VALUES (2, 1, 3.00); INSERT INTO fares VALUES (3, 2, 1.75);", + "sql": "SELECT r.route_name, SUM(f.fare_amount) as total_revenue FROM routes r JOIN fares f ON r.route_id \u003d f.route_id GROUP BY r.route_name;", + "sql_explanation": "Join the routes and fares tables on the route_id column, then calculate the total revenue for each route by summing the fare_amount for each route." +}, { + "id": "2214", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total fare collected for metro lines in Paris?", + "sql_context": "CREATE TABLE metro_lines (line_id INT, city VARCHAR(50)); INSERT INTO metro_lines (line_id, city) VALUES (1, \u0027Paris\u0027), (2, \u0027Berlin\u0027); CREATE TABLE fares_collected (line_id INT, fare DECIMAL(5,2)); INSERT INTO fares_collected (line_id, fare) VALUES (1, 500.00), (1, 750.00), (2, 300.00);", + "sql": "SELECT SUM(fare) FROM fares_collected INNER JOIN metro_lines ON fares_collected.line_id \u003d metro_lines.line_id WHERE city \u003d \u0027Paris\u0027;", + "sql_explanation": "This query calculates the total fare collected for metro lines in Paris by joining the fares_collected and metro_lines tables on the line_id column. It then filters the results to only include rows where the city is \u0027Paris\u0027 and calculates the sum of the fare using the SUM function." +}, { + "id": "2552", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many maintenance records exist for vehicles with a \u0027bus\u0027 type?", + "sql_context": "CREATE TABLE vehicle (vehicle_id INT, type TEXT); INSERT INTO vehicle (vehicle_id, type) VALUES (1, \u0027bus\u0027), (2, \u0027train\u0027), (3, \u0027tram\u0027); CREATE TABLE maintenance (maintenance_id INT, vehicle_id INT, date DATE); INSERT INTO maintenance (maintenance_id, vehicle_id, date) VALUES (101, 1, \u00272022-01-01\u0027), (102, 2, \u00272022-02-01\u0027), (103, 1, \u00272022-03-01\u0027);", + "sql": "SELECT COUNT(*) FROM maintenance JOIN vehicle ON maintenance.vehicle_id \u003d vehicle.vehicle_id WHERE vehicle.type \u003d \u0027bus\u0027;", + "sql_explanation": "This SQL query calculates the number of maintenance records for vehicles with a \u0027bus\u0027 type by joining the \u0027maintenance\u0027 and \u0027vehicle\u0027 table, and filtering on the \u0027type\u0027 column using the WHERE clause." +}, { + "id": "2707", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total fare collected for each type of vehicle?", + "sql_context": "CREATE TABLE trips (vehicle_id INT, fare DECIMAL(5,2), date DATE); INSERT INTO trips VALUES (1, 2.00, \u00272022-01-01\u0027), (2, 0.00, \u00272022-01-02\u0027), (3, 1.50, \u00272022-01-03\u0027), (1, 2.00, \u00272022-01-04\u0027);", + "sql": "SELECT fleet.type, SUM(trips.fare) FROM trips JOIN fleet ON trips.vehicle_id \u003d fleet.vehicle_id GROUP BY fleet.type;", + "sql_explanation": "This query calculates the total fare collected for each type of vehicle by joining the trips table with the fleet table based on the vehicle_id, and then grouping the result by vehicle type and summing up the total fare." +}, { + "id": "112", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all animal populations in African community education programs", + "sql_context": "CREATE TABLE community_education_programs (id INT, program_name VARCHAR(255), location VARCHAR(255)); CREATE TABLE animal_populations (id INT, program_id INT, animal_type VARCHAR(255), population INT); INSERT INTO community_education_programs (id, program_name, location) VALUES (1, \u0027African Wildlife Initiative\u0027, \u0027Africa\u0027), (2, \u0027Lion Guardians\u0027, \u0027Kenya\u0027), (3, \u0027Gorilla Doctors\u0027, \u0027Rwanda\u0027); INSERT INTO animal_populations (id, program_id, animal_type, population) VALUES (1, 1, \u0027Lion\u0027, 300), (2, 1, \u0027Elephant\u0027, 500), (3, 2, \u0027Lion\u0027, 200), (4, 3, \u0027Gorilla\u0027, 100);", + "sql": "SELECT community_education_programs.program_name, animal_populations.animal_type, animal_populations.population FROM community_education_programs INNER JOIN animal_populations ON community_education_programs.id \u003d animal_populations.program_id WHERE community_education_programs.location \u003d \u0027Africa\u0027;", + "sql_explanation": "This query lists all animal populations in African community education programs. It joins the \u0027community_education_programs\u0027 table with the \u0027animal_populations\u0027 table on the program_id field. It then filters the results to only include rows where the location is \u0027Africa\u0027." +}, { + "id": "621", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of volunteers for each education program?", + "sql_context": "CREATE TABLE education_programs (id INT PRIMARY KEY, name VARCHAR(50), animal_id INT, coordinator VARCHAR(50)); CREATE TABLE volunteers (id INT PRIMARY KEY, name VARCHAR(50), program_id INT); CREATE TABLE animals (id INT PRIMARY KEY, name VARCHAR(50), species VARCHAR(50), population INT);", + "sql": "SELECT education_programs.name, COUNT(DISTINCT volunteers.id) AS num_volunteers FROM education_programs INNER JOIN volunteers ON education_programs.id \u003d volunteers.program_id GROUP BY education_programs.name;", + "sql_explanation": "This query retrieves the total number of distinct volunteers for each education program. It uses an INNER JOIN to combine the education_programs and volunteers tables based on the program ID. A GROUP BY clause is then used to group the results by education program name." +}, { + "id": "1133", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which animal species have a population greater than 1000 in each of their protected habitats?", + "sql_context": "CREATE TABLE animals (id INT, species TEXT, population INT); CREATE TABLE habitats (id INT, name TEXT, animal_id INT); INSERT INTO animals (id, species, population) VALUES (1, \u0027Tiger\u0027, 1200), (2, \u0027Elephant\u0027, 1500), (3, \u0027Rhinoceros\u0027, 800); INSERT INTO habitats (id, name, animal_id) VALUES (1, \u0027Habitat1\u0027, 1), (2, \u0027Habitat2\u0027, 2), (3, \u0027Habitat3\u0027, 2), (4, \u0027Habitat4\u0027, 3);", + "sql": "SELECT a.species FROM animals a JOIN habitats h ON a.id \u003d h.animal_id GROUP BY a.species HAVING COUNT(DISTINCT h.name) \u003d SUM(CASE WHEN a.population \u003e 1000 THEN 1 ELSE 0 END);", + "sql_explanation": "This query identifies the animal species that have a population greater than 1000 in each of their protected habitats by joining the animals and habitats tables and grouping by the species column. It then filters for species with a population greater than 1000 in each of their habitats using the HAVING clause." +}, { + "id": "1807", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of unique species in each community education program", + "sql_context": "CREATE TABLE education_programs (id INT, name VARCHAR(50));CREATE TABLE animals (id INT, species VARCHAR(50), program_id INT);INSERT INTO education_programs (id, name) VALUES (1, \u0027Adopt an Animal\u0027), (2, \u0027Wildlife Warriors\u0027);INSERT INTO animals (id, species, program_id) VALUES (1, \u0027Lion\u0027, 1), (2, \u0027Elephant\u0027, 2), (3, \u0027Zebra\u0027, 1), (4, \u0027Lion\u0027, 2);", + "sql": "SELECT e.name, COUNT(DISTINCT a.species) as unique_species FROM education_programs e INNER JOIN animals a ON e.id \u003d a.program_id GROUP BY e.name;", + "sql_explanation": "This SQL query finds the number of unique species in each community education program by joining the \u0027education_programs\u0027 and \u0027animals\u0027 table on the \u0027program_id\u0027 column. The result is grouped by the program name and the distinct species count is calculated." +}, { + "id": "1857", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of animals in habitats located in Africa and Australia?", + "sql_context": "CREATE TABLE habitat (id INT, location TEXT, size FLOAT); CREATE TABLE animal_population (id INT, habitat_id INT, animal_count INT);", + "sql": "SELECT SUM(ap.animal_count) FROM animal_population ap INNER JOIN habitat h ON ap.habitat_id \u003d h.id WHERE h.location IN (\u0027Africa\u0027, \u0027Australia\u0027);", + "sql_explanation": "This query calculates the total number of animals in habitats located in Africa and Australia. It performs an inner join between the animal_population and habitat tables based on the habitat_id, filters the records where the location of the habitat is Africa or Australia, and then calculates the sum of animal_count." +}, { + "id": "2440", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of animals in the \u0027sanctuary_a\u0027 and \u0027sanctuary_b\u0027?", + "sql_context": "CREATE TABLE sanctuary_a (animal_id INT, animal_name VARCHAR(50), population INT); INSERT INTO sanctuary_a VALUES (1, \u0027tiger\u0027, 25); INSERT INTO sanctuary_a VALUES (2, \u0027elephant\u0027, 30); CREATE TABLE sanctuary_b (animal_id INT, animal_name VARCHAR(50), population INT); INSERT INTO sanctuary_b VALUES (1, \u0027tiger\u0027, 20); INSERT INTO sanctuary_b VALUES (3, \u0027monkey\u0027, 35);", + "sql": "SELECT SUM(s1.population + s2.population) FROM sanctuary_a s1 FULL OUTER JOIN sanctuary_b s2 ON s1.animal_id \u003d s2.animal_id;", + "sql_explanation": "This query calculates the total population of animals in both sanctuaries by summing up the population counts of matching animal_id\u0027s from both sanctuaries. A full outer join is used to include all records from both tables, even if there is no match." +}, { + "id": "821", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of online travel agency bookings for hotels in Mumbai, India in the month of July 2022?", + "sql_context": "CREATE TABLE online_travel_agencies (id INT, hotel_id INT, revenue INT, booking_date DATE); CREATE TABLE hotels (id INT, name TEXT, city TEXT, country TEXT);", + "sql": "SELECT SUM(1) FROM online_travel_agencies ota INNER JOIN hotels h ON ota.hotel_id \u003d h.id WHERE h.city \u003d \u0027Mumbai\u0027 AND h.country \u003d \u0027India\u0027 AND booking_date BETWEEN \u00272022-07-01\u0027 AND \u00272022-07-31\u0027;", + "sql_explanation": "This SQL query calculates the total number of online travel agency bookings for hotels in Mumbai, India in the month of July 2022. It does this by using the SUM function on the constant value 1, filtering the rows by the city, country, and booking_date, and joining the online_travel_agencies table with the hotels table." +}, { + "id": "1040", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many virtual tours were engaged with in the EU for luxury hotels?", + "sql_context": "CREATE TABLE hotels (hotel_id INT, hotel_type TEXT, country TEXT); CREATE TABLE virtual_tours (tour_id INT, hotel_id INT, views INT); INSERT INTO hotels VALUES (1, \u0027Luxury\u0027, \u0027France\u0027); INSERT INTO virtual_tours VALUES (1, 1, 100);", + "sql": "SELECT SUM(virtual_tours.views) FROM virtual_tours INNER JOIN hotels ON virtual_tours.hotel_id \u003d hotels.hotel_id WHERE hotels.hotel_type \u003d \u0027Luxury\u0027 AND hotels.country LIKE \u0027EU%\u0027;", + "sql_explanation": "This SQL query calculates the total number of virtual tour views for luxury hotels in the European Union. It does this by performing an inner join between the virtual_tours and hotels tables, filtering for luxury hotels in EU countries, and finally summing the number of virtual tour views." +}, { + "id": "1307", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many AI-powered hotel features are available in South America?", + "sql_context": "CREATE TABLE ai_features (feature_id INT, feature TEXT); INSERT INTO ai_features (feature_id, feature) VALUES (1, \u0027Voice Assistant\u0027), (2, \u0027Chatbot\u0027), (3, \u0027Facial Recognition\u0027), (4, \u0027Predictive Analytics\u0027), (5, \u0027Robotics\u0027); CREATE TABLE hotel_features (feature_id INT, hotel_region TEXT); INSERT INTO hotel_features (feature_id, hotel_region) VALUES (1, \u0027South America\u0027), (2, \u0027South America\u0027), (3, \u0027South America\u0027), (4, \u0027South America\u0027), (5, \u0027South America\u0027);", + "sql": "SELECT COUNT(*) FROM ai_features INNER JOIN hotel_features ON ai_features.feature_id \u003d hotel_features.feature_id WHERE hotel_features.hotel_region \u003d \u0027South America\u0027;", + "sql_explanation": "This query counts the number of AI-powered hotel features available in South America. It performs an inner join between the ai_features and hotel_features tables, filters for South America features, and then calculates the count of features." +}, { + "id": "1342", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of virtual tours engaged with in a single day for hotels in Rome, Italy?", + "sql_context": "CREATE TABLE virtual_tours (id INT, hotel_id INT, engagement_count INT, engagement_date DATE); CREATE TABLE hotels (id INT, name TEXT, city TEXT, country TEXT);", + "sql": "SELECT MAX(engagement_count) FROM virtual_tours vt INNER JOIN hotels h ON vt.hotel_id \u003d h.id WHERE h.city \u003d \u0027Rome\u0027 AND h.country \u003d \u0027Italy\u0027 GROUP BY engagement_date;", + "sql_explanation": "This SQL query calculates the maximum number of virtual tours engaged with in a single day for hotels in Rome, Italy. It does this by using the MAX function on the engagement_count column, filtering the rows by the city and country, and grouping the rows by the engagement_date." +}, { + "id": "2257", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the adoption rate of AI-powered chatbots in the hotel industry for the \u0027Africa\u0027 region?", + "sql_context": "CREATE TABLE ai_chatbots (id INT, hotel_id INT, region TEXT, adoption_rate FLOAT); INSERT INTO ai_chatbots (id, hotel_id, region, adoption_rate) VALUES (1, 1, \u0027Africa\u0027, 0.6), (2, 2, \u0027Africa\u0027, 0.7), (3, 3, \u0027Europe\u0027, 0.8), (4, 4, \u0027Asia-Pacific\u0027, 0.9); CREATE TABLE hotels (id INT, name TEXT, region TEXT); INSERT INTO hotels (id, name, region) VALUES (1, \u0027Hotel AF1\u0027, \u0027Africa\u0027), (2, \u0027Hotel AF2\u0027, \u0027Africa\u0027), (3, \u0027Hotel AF3\u0027, \u0027Europe\u0027), (4, \u0027Hotel AF4\u0027, \u0027Asia-Pacific\u0027);", + "sql": "SELECT region, AVG(adoption_rate) FROM ai_chatbots a JOIN hotels h ON a.hotel_id \u003d h.id WHERE h.region \u003d \u0027Africa\u0027 GROUP BY region;", + "sql_explanation": "The query calculates the adoption rate of AI-powered chatbots in the hotel industry for the \u0027Africa\u0027 region by using the AVG function on the \u0027adoption_rate\u0027 column, filtering the rows with the WHERE clause to only include hotels from the \u0027Africa\u0027 region, and joining the \u0027ai_chatbots\u0027 and \u0027hotels\u0027 tables on the hotel_id column." +}, { + "id": "2558", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of stars for each hotel in the luxury_hotels view?", + "sql_context": "CREATE VIEW luxury_hotels AS SELECT * FROM hotels WHERE revenue \u003e 1000000; CREATE TABLE hotel_stars (hotel_id INT, stars INT);", + "sql": "SELECT h.hotel_name, MAX(hs.stars) FROM luxury_hotels h JOIN hotel_stars hs ON h.id \u003d hs.hotel_id GROUP BY h.hotel_name;", + "sql_explanation": "This query joins the luxury_hotels view with the hotel_stars table on the id column, then calculates the maximum number of stars for each hotel by grouping by the hotel_name column." +}, { + "id": "167", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which underrepresented communities have contributed to the development of creative AI applications for the arts sector in the past two years, in the Creative AI database?", + "sql_context": "CREATE TABLE communities (id INT, name VARCHAR(255)); INSERT INTO communities (id, name) VALUES (1, \u0027Indigenous Artists Network\u0027), (2, \u0027Women in AI Arts\u0027); CREATE TABLE applications (id INT, name VARCHAR(255), community_id INT, sector VARCHAR(255), published_date DATE); INSERT INTO applications (id, name, community_id, sector, published_date) VALUES (1, \u0027App1\u0027, 1, \u0027Arts\u0027, \u00272021-01-01\u0027), (2, \u0027App2\u0027, 2, \u0027Cultural Heritage\u0027, \u00272020-01-01\u0027);", + "sql": "SELECT communities.name FROM communities JOIN applications ON communities.id \u003d applications.community_id WHERE sector IN (\u0027Arts\u0027, \u0027Cultural Heritage\u0027) AND YEAR(published_date) \u003e\u003d YEAR(CURRENT_DATE()) - 2 AND communities.name IN (\u0027Indigenous Artists Network\u0027, \u0027Women in AI Arts\u0027);", + "sql_explanation": "This query retrieves the underrepresented communities that have contributed to the development of creative AI applications for the arts and cultural heritage sectors in the past two years in the Creative AI database. It joins the communities and applications tables based on their relationships and filters for applications published in the past two years in the arts and cultural heritage sectors. It then filters for communities with names \u0027Indigenous Artists Network\u0027 or \u0027Women in AI Arts\u0027 and returns the name of the communities." +}, { + "id": "386", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the ratio of safe AI algorithms to unsafe AI algorithms by gender of the lead developer?", + "sql_context": "CREATE TABLE safe_ai_algorithms (algorithm_id INT, algorithm_name TEXT, is_safe BOOLEAN); INSERT INTO safe_ai_algorithms (algorithm_id, algorithm_name, is_safe) VALUES (1, \u0027Safe AI\u0027, true), (2, \u0027Unsafe AI\u0027, false); CREATE TABLE ai_developers (developer_id INT, developer_name TEXT, developer_gender TEXT, lead_developer BOOLEAN); INSERT INTO ai_developers (developer_id, developer_name, developer_gender, lead_developer) VALUES (1001, \u0027Alice\u0027, \u0027Female\u0027, true), (1002, \u0027Bob\u0027, \u0027Male\u0027, false), (1003, \u0027Charlie\u0027, \u0027Female\u0027, true);", + "sql": "SELECT d.developer_gender, SUM(saa.is_safe) as num_safe, COUNT(*) as num_total, 1.0 * SUM(saa.is_safe) / COUNT(*) as ratio FROM safe_ai_algorithms saa CROSS JOIN ai_developers d WHERE d.lead_developer \u003d true GROUP BY d.developer_gender;", + "sql_explanation": "This query calculates the ratio of safe AI algorithms to unsafe AI algorithms by gender of the lead developer. It does so by cross joining the safe_ai_algorithms and ai_developers tables on the lead_developer column. It then groups the results by the developer_gender column and calculates the ratio of safe AI algorithms to the total number of AI algorithms for each gender." +}, { + "id": "730", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of models developed by each researcher for explainable AI?", + "sql_context": "CREATE TABLE researchers (id INT, name TEXT); INSERT INTO researchers (id, name) VALUES (1, \u0027Alice\u0027), (2, \u0027Bob\u0027), (3, \u0027Charlie\u0027); CREATE TABLE models (id INT, researcher_id INT, name TEXT, domain TEXT); INSERT INTO models (id, researcher_id, name, domain) VALUES (1, 1, \u0027XAIModel1\u0027, \u0027Explainable AI\u0027), (2, 1, \u0027XAIModel2\u0027, \u0027Explainable AI\u0027), (3, 2, \u0027XAIModel3\u0027, \u0027Explainable AI\u0027), (4, 3, \u0027XAIModel4\u0027, \u0027Explainable AI\u0027);", + "sql": "SELECT researchers.name, COUNT(models.id) as total_models FROM researchers INNER JOIN models ON researchers.id \u003d models.researcher_id WHERE models.domain \u003d \u0027Explainable AI\u0027 GROUP BY researchers.name;", + "sql_explanation": "This query joins the researchers and models tables on the researcher_id field and filters for models in the Explainable AI domain. It then groups the results by researcher name and calculates the count of models for each researcher." +}, { + "id": "921", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of AI safety incidents per country in the last year?", + "sql_context": "CREATE TABLE ai_safety_incidents (incident_id INT, incident_date DATE, incident_country TEXT); INSERT INTO ai_safety_incidents (incident_id, incident_date, incident_country) VALUES (1, \u00272021-03-15\u0027, \u0027USA\u0027), (2, \u00272020-12-21\u0027, \u0027Canada\u0027), (3, \u00272021-08-01\u0027, \u0027UK\u0027); CREATE TABLE countries (country_id INT, country_name TEXT); INSERT INTO countries (country_id, country_name) VALUES (101, \u0027USA\u0027), (102, \u0027Canada\u0027), (103, \u0027UK\u0027), (104, \u0027Australia\u0027);", + "sql": "SELECT c.country_name, AVG(EXTRACT(YEAR FROM ai.incident_date)) as avg_year FROM ai_safety_incidents ai JOIN countries c ON ai.incident_country \u003d c.country_name GROUP BY c.country_name;", + "sql_explanation": "This query calculates the average number of AI safety incidents per country in the last year. It does so by joining the ai_safety_incidents and countries tables on the incident_country column. It then extracts the year from the incident_date column and calculates the average year for each country." +}, { + "id": "1254", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average transaction amount per user for users from India, partitioned by transaction type?", + "sql_context": "CREATE TABLE users (id INT, country VARCHAR(20)); INSERT INTO users (id, country) VALUES (1, \u0027India\u0027), (2, \u0027USA\u0027); CREATE TABLE transactions (id INT, user_id INT, type VARCHAR(20), amount DECIMAL(10, 2)); INSERT INTO transactions (id, user_id, type, amount) VALUES (1, 1, \u0027credit\u0027, 100.00), (2, 1, \u0027debit\u0027, 50.00), (3, 2, \u0027credit\u0027, 200.00), (4, 2, \u0027debit\u0027, 150.00);", + "sql": "SELECT type, AVG(amount) as avg_amount FROM transactions INNER JOIN users ON transactions.user_id \u003d users.id WHERE users.country \u003d \u0027India\u0027 GROUP BY type, users.country;", + "sql_explanation": "The SQL query first joins the transactions and users tables based on user_id and country. Then, it groups the data by transaction type and country, and calculates the average transaction amount for each group." +}, { + "id": "1901", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show AI safety incidents for underrepresented communities in Canada, cross-joined with AI model details.", + "sql_context": "CREATE TABLE AISafetyIncidents (incident_id INTEGER, community TEXT, country TEXT); CREATE TABLE AIModels (model_id INTEGER, model_name TEXT); INSERT INTO AISafetyIncidents (incident_id, community, country) VALUES (1, \u0027Indigenous\u0027, \u0027Canada\u0027), (2, \u0027Minority\u0027, \u0027Canada\u0027); INSERT INTO AIModels (model_id, model_name) VALUES (1, \u0027ModelA\u0027), (2, \u0027ModelB\u0027);", + "sql": "SELECT AISafetyIncidents.community, AIModels.model_name FROM AISafetyIncidents CROSS JOIN AIModels WHERE AISafetyIncidents.country \u003d \u0027Canada\u0027;", + "sql_explanation": "This SQL query performs a cross join between AI safety incidents and AI model details for Canada. It returns all possible combinations of rows from both tables, filtering for Canada." +}, { + "id": "1945", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average transaction amount per user in the \u0027creative_ai\u0027 application?", + "sql_context": "CREATE TABLE users (user_id INT, app VARCHAR(20)); INSERT INTO users (user_id, app) VALUES (1, \u0027creative_ai\u0027), (2, \u0027algorithmic_fairness\u0027), (3, \u0027explainable_ai\u0027); CREATE TABLE transactions (transaction_id INT, user_id INT, amount DECIMAL(10, 2)); INSERT INTO transactions (transaction_id, user_id, amount) VALUES (1, 1, 50.00), (2, 1, 75.00), (3, 2, 30.00), (4, 3, 100.00), (5, 1, 60.00);", + "sql": "SELECT AVG(amount) as avg_amount FROM transactions INNER JOIN users ON transactions.user_id \u003d users.user_id WHERE users.app \u003d \u0027creative_ai\u0027;", + "sql_explanation": "The SQL query joins the \u0027transactions\u0027 and \u0027users\u0027 tables based on the \u0027user_id\u0027 column. It then filters the results to only include transactions related to the \u0027creative_ai\u0027 application and calculates the average transaction amount by using the AVG function." +}, { + "id": "2079", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which AI safety research topics intersect with creative AI application topics?", + "sql_context": "CREATE TABLE if not exists safety_research (research_id INT PRIMARY KEY, topic TEXT); INSERT INTO safety_research (research_id, topic) VALUES (1, \u0027Robustness\u0027), (2, \u0027Fairness\u0027), (3, \u0027Interpretability\u0027); CREATE TABLE if not exists creative_ai_topics (topic_id INT PRIMARY KEY, topic TEXT); INSERT INTO creative_ai_topics (topic_id, topic) VALUES (1, \u0027Art Generation\u0027), (2, \u0027Image Recognition\u0027), (4, \u0027Robustness in Creativity\u0027);", + "sql": "SELECT DISTINCT safety_research.topic FROM safety_research JOIN creative_ai_topics ON safety_research.topic \u003d creative_ai_topics.topic;", + "sql_explanation": "This SQL query joins the safety_research and creative_ai_topics tables based on the topic column. It returns the DISTINCT topic column from the safety_research table, showing which AI safety research topics intersect with creative AI application topics." +}, { + "id": "2505", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average sentiment score for African American authors\u0027 books?", + "sql_context": "CREATE TABLE authors (id INT, name VARCHAR(255), ethnicity VARCHAR(255)); INSERT INTO authors (id, name, ethnicity) VALUES (1, \u0027Toni Morrison\u0027, \u0027African American\u0027); CREATE TABLE books (id INT, title VARCHAR(255), sentiment FLOAT, author_id INT); INSERT INTO books (id, title, sentiment, author_id) VALUES (1, \u0027Beloved\u0027, 8.5, 1);", + "sql": "SELECT AVG(sentiment) FROM books JOIN authors ON books.author_id \u003d authors.id WHERE authors.ethnicity \u003d \u0027African American\u0027", + "sql_explanation": "Calculate the average sentiment score of books written by African American authors." +}, { + "id": "2881", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What papers were published by AI researchers from India?", + "sql_context": "CREATE TABLE ai_researcher(id INT PRIMARY KEY, name VARCHAR(50), country VARCHAR(50)); INSERT INTO ai_researcher (id, name, country) VALUES (1, \u0027Alice\u0027, \u0027USA\u0027), (2, \u0027Bob\u0027, \u0027India\u0027), (3, \u0027Charlie\u0027, \u0027Canada\u0027); CREATE TABLE ai_papers(id INT PRIMARY KEY, title VARCHAR(50), researcher_id INT); INSERT INTO ai_papers (id, title, researcher_id) VALUES (1, \u0027Explainable AI\u0027, 2), (2, \u0027AI Ethics\u0027, 3);", + "sql": "SELECT p.title FROM ai_papers p INNER JOIN ai_researcher r ON p.researcher_id \u003d r.id WHERE r.country \u003d \u0027India\u0027;", + "sql_explanation": "This query performs an inner join between the \u0027ai_papers\u0027 and \u0027ai_researcher\u0027 tables, joining on the \u0027researcher_id\u0027 and \u0027id\u0027 columns. It then filters the results to only show rows where the \u0027country\u0027 column value in the \u0027ai_researcher\u0027 table is \u0027India\u0027." +}, { + "id": "3234", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which AI researchers have not published any papers?", + "sql_context": "CREATE TABLE ai_researcher(id INT PRIMARY KEY, name VARCHAR(50), country VARCHAR(50)); INSERT INTO ai_researcher (id, name, country) VALUES (1, \u0027Alice\u0027, \u0027USA\u0027), (2, \u0027Bob\u0027, \u0027Canada\u0027), (3, \u0027Charlie\u0027, \u0027UK\u0027), (4, \u0027Dave\u0027, \u0027India\u0027); CREATE TABLE ai_papers(id INT PRIMARY KEY, title VARCHAR(50), researcher_id INT); INSERT INTO ai_papers (id, title, researcher_id) VALUES (1, \u0027Fair AI\u0027, 1), (2, \u0027AI Safety\u0027, 3), (3, \u0027AI Ethics\u0027, 1), (4, \u0027Explainable AI for AI Safety\u0027, 3);", + "sql": "SELECT r.name FROM ai_researcher r LEFT JOIN ai_papers p ON r.id \u003d p.researcher_id WHERE p.id IS NULL;", + "sql_explanation": "This query performs a left join between the \u0027ai_researcher\u0027 and \u0027ai_papers\u0027 tables, joining on the \u0027id\u0027 and \u0027researcher_id\u0027 columns. It then filters the results to only show rows where the \u0027id\u0027 column value in the \u0027ai_papers\u0027 table is NULL, and selects the \u0027name\u0027 values from the \u0027ai_researcher\u0027 table." +}, { + "id": "340", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the number of distinct sectors that \u0027Portfolio Manager 1\u0027 has invested in and their respective ESG ratings?", + "sql_context": "CREATE TABLE portfolio_managers (manager_name VARCHAR(20), id INT); CREATE TABLE investments (manager_id INT, sector VARCHAR(20), ESG_rating FLOAT); INSERT INTO portfolio_managers (manager_name, id) VALUES (\u0027Portfolio Manager 1\u0027, 1), (\u0027Portfolio Manager 2\u0027, 2), (\u0027Portfolio Manager 3\u0027, 3); INSERT INTO investments (manager_id, sector, ESG_rating) VALUES (1, \u0027renewable_energy\u0027, 8.1), (1, \u0027technology\u0027, 7.5), (1, \u0027finance\u0027, 6.8), (2, \u0027renewable_energy\u0027, 6.5), (2, \u0027technology\u0027, 9.0), (3, \u0027finance\u0027, 6.8), (3, \u0027renewable_energy\u0027, 9.2);", + "sql": "SELECT investments.sector, AVG(investments.ESG_rating) FROM investments INNER JOIN portfolio_managers ON investments.manager_id \u003d portfolio_managers.id WHERE portfolio_managers.manager_name \u003d \u0027Portfolio Manager 1\u0027 GROUP BY investments.sector;", + "sql_explanation": "Join the portfolio_managers and investments tables on the manager_id and id columns. Filter the results to only include rows where the manager_name is \u0027Portfolio Manager 1\u0027. Group the results by sector. Calculate the average ESG_rating using the AVG() function." +}, { + "id": "644", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the risk level and risk factor for investments in the \u0027Consumer Staples\u0027 sector with an investment amount greater than 12000000?", + "sql_context": "CREATE TABLE risk_assessment (investment_id INT, risk_level VARCHAR(50), risk_factor FLOAT); INSERT INTO risk_assessment (investment_id, risk_level, risk_factor) VALUES (10, \u0027Low\u0027, 0.10), (11, \u0027Medium\u0027, 0.15), (12, \u0027High\u0027, 0.20); CREATE TABLE impact_investments (id INT, investment_name VARCHAR(50), strategy VARCHAR(50), investment_amount FLOAT, sector VARCHAR(50)); INSERT INTO impact_investments (id, investment_name, strategy, investment_amount, sector) VALUES (10, \u0027Organic Food Company\u0027, \u0027Sustainable Agriculture\u0027, 15000000.00, \u0027Consumer Staples\u0027), (11, \u0027Eco-friendly Cleaning Products Manufacturer\u0027, \u0027Sustainable Manufacturing\u0027, 18000000.00, \u0027Consumer Staples\u0027), (12, \u0027Fair Trade Clothing Brand\u0027, \u0027Sustainable Fashion\u0027, 13000000.00, \u0027Consumer Discretionary\u0027);", + "sql": "SELECT i.investment_name, r.risk_level, r.risk_factor FROM impact_investments i INNER JOIN risk_assessment r ON i.id \u003d r.investment_id WHERE i.sector \u003d \u0027Consumer Staples\u0027 AND i.investment_amount \u003e 12000000;", + "sql_explanation": "This query joins the impact_investments table with the risk_assessment table on the investment_id column and filters for Consumer Staples sector investments with an investment amount greater than 12 million, returning the investment name, risk level, and risk factor." +}, { + "id": "718", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find all investments made by \u0027Impact Fund 1\u0027 and their associated ESG ratings?", + "sql_context": "CREATE TABLE investments (fund_name VARCHAR(20), company_id INT, investment_amount FLOAT); CREATE TABLE companies (id INT, company_name VARCHAR(20), sector VARCHAR(20), ESG_rating FLOAT); INSERT INTO investments (fund_name, company_id, investment_amount) VALUES (\u0027Impact Fund 1\u0027, 1, 50000,), (\u0027Impact Fund 2\u0027, 2, 75000); INSERT INTO companies (id, company_name, sector, ESG_rating) VALUES (1, \u0027Tech Innovations\u0027, \u0027technology\u0027, 8.1), (2, \u0027Finance Group\u0027, \u0027finance\u0027, 6.5);", + "sql": "SELECT investments.fund_name, companies.company_name, companies.ESG_rating FROM investments INNER JOIN companies ON investments.company_id \u003d companies.id WHERE investments.fund_name \u003d \u0027Impact Fund 1\u0027;", + "sql_explanation": "Join the investments and companies tables on the company_id and id columns. Filter the results to only include rows where the fund_name is \u0027Impact Fund 1\u0027. Select the fund_name, company_name, and ESG_rating columns." +}, { + "id": "877", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many companies does \u0027Impact Fund 1\u0027 have investments in, and what\u0027s their average ESG rating?", + "sql_context": "CREATE TABLE investments (fund_name VARCHAR(20), company_id INT); CREATE TABLE companies (id INT, company_name VARCHAR(20), sector VARCHAR(20), ESG_rating FLOAT); INSERT INTO investments (fund_name, company_id) VALUES (\u0027Impact Fund 1\u0027, 1), (\u0027Impact Fund 1\u0027, 2), (\u0027Impact Fund 2\u0027, 3); INSERT INTO companies (id, company_name, sector, ESG_rating) VALUES (1, \u0027Tech Innovations\u0027, \u0027technology\u0027, 8.1), (2, \u0027Finance Group\u0027, \u0027finance\u0027, 6.5), (3, \u0027Green Solutions\u0027, \u0027renewable_energy\u0027, 9.0);", + "sql": "SELECT COUNT(DISTINCT companies.id), AVG(companies.ESG_rating) FROM investments INNER JOIN companies ON investments.company_id \u003d companies.id WHERE investments.fund_name \u003d \u0027Impact Fund 1\u0027;", + "sql_explanation": "Join the investments and companies tables on the company_id and id columns. Filter the results to only include rows where the fund_name is \u0027Impact Fund 1\u0027. Calculate the number of distinct companies using the COUNT(DISTINCT) function and the average ESG_rating using the AVG() function." +}, { + "id": "998", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the average investment in socially responsible companies for each investor?", + "sql_context": "CREATE TABLE investments (id INT, investor_id INT, company_id INT, invested_amount FLOAT); CREATE TABLE companies (id INT, socially_responsible BOOLEAN); INSERT INTO investments (id, investor_id, company_id, invested_amount) VALUES (1, 1, 2, 5000), (2, 1, 3, 8000), (3, 2, 1, 7000), (4, 3, 3, 6000); INSERT INTO companies (id, socially_responsible) VALUES (1, true), (2, true), (3, true);", + "sql": "SELECT i.investor_id, AVG(i.invested_amount) as avg_investment FROM investments i JOIN companies c ON i.company_id \u003d c.id WHERE c.socially_responsible \u003d true GROUP BY i.investor_id;", + "sql_explanation": "The SQL query calculates the average investment in socially responsible companies for each investor by joining the investments and companies tables, filtering rows with socially_responsible as true, and grouping results based on investor_id." +}, { + "id": "1030", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the investors that have invested more than $20,000 in the \u0027Environmental Sustainability\u0027 field?", + "sql_context": "CREATE TABLE if not exists investors (id INT PRIMARY KEY, name TEXT, location TEXT, investment_goal TEXT); INSERT INTO investors (id, name, location, investment_goal) VALUES (1, \u0027Ana Trujillo\u0027, \u0027Mexico City\u0027, \u0027Environmental Sustainability\u0027); CREATE TABLE if not exists investments (id INT PRIMARY KEY, investor_id INT, nonprofit_id INT, amount DECIMAL(10,2), investment_date DATE); INSERT INTO investments (id, investor_id, nonprofit_id, amount, investment_date) VALUES (1, 1, 1, 25000.00, \u00272022-04-01\u0027);", + "sql": "SELECT i.name FROM investors i JOIN investments investment ON i.id \u003d investment.investor_id WHERE investment.amount \u003e 20000 AND i.investment_goal \u003d \u0027Environmental Sustainability\u0027;", + "sql_explanation": "This SQL query performs a join between the investors and investments tables using the investor_id. It then filters the data to return only the investors who have invested more than $20,000 in the \u0027Environmental Sustainability\u0027 field. Finally, it selects the name column from the investors table." +}, { + "id": "62", + "domain": "startups venture capital", + "domain_description": "Company founding data, investment rounds, exit strategies, and diversity metrics in the startup ecosystem.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the median time (in months) between the founding date and the first investment round for startups founded by individuals who identify as Middle Eastern in the cybersecurity sector?", + "sql_context": "CREATE TABLE startups (id INT, name TEXT, founder_ethnicity TEXT, industry TEXT, founding_date DATE); CREATE TABLE investments (id INT, startup_id INT, investment_date DATE, funding_amount INT);", + "sql": "SELECT AVG(DATEDIFF(\u0027month\u0027, startups.founding_date, investments.investment_date))/2 FROM startups INNER JOIN investments ON startups.id \u003d investments.startup_id WHERE startups.founder_ethnicity \u003d \u0027Middle Eastern\u0027 AND startups.industry \u003d \u0027Cybersecurity\u0027 ORDER BY DATEDIFF(\u0027month\u0027, startups.founding_date, investments.investment_date);", + "sql_explanation": "This query calculates the median time between the founding date and the first investment round for startups founded by individuals who identify as Middle Eastern in the cybersecurity sector by using the DATEDIFF() function to calculate the difference in months, and then using the AVG() function to calculate the median time. The result is divided by 2 to convert the number of months to a whole number." +}, { + "id": "513", + "domain": "startups venture capital", + "domain_description": "Company founding data, investment rounds, exit strategies, and diversity metrics in the startup ecosystem.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum exit valuation for startups in the FinTech sector founded by underrepresented minority founders?", + "sql_context": "CREATE TABLE exits (id INT, company_id INT, exit_type TEXT, exit_valuation INT, date DATE); INSERT INTO exits (id, company_id, exit_type, exit_valuation, date) VALUES (1, 1, \u0027Acquisition\u0027, 50000000, \u00272021-01-01\u0027), (2, 2, \u0027IPO\u0027, 100000000, \u00272022-01-01\u0027), (3, 3, \u0027Acquisition\u0027, 25000000, \u00272019-01-01\u0027);", + "sql": "SELECT MIN(exits.exit_valuation) FROM exits JOIN companies ON exits.company_id \u003d companies.id WHERE companies.industry \u003d \u0027FinTech\u0027 AND companies.founder_ethnicity IN (\u0027Hispanic\u0027, \u0027African American\u0027, \u0027Native American\u0027);", + "sql_explanation": "This query calculates the minimum exit valuation for startups in the FinTech sector founded by underrepresented minority founders by joining the exits and companies tables on the company_id field, filtering for FinTech industry and underrepresented minority founders, and then calculating the minimum exit_valuation field." +}, { + "id": "813", + "domain": "startups venture capital", + "domain_description": "Company founding data, investment rounds, exit strategies, and diversity metrics in the startup ecosystem.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum and minimum number of investors per funding round for companies founded by Latinx individuals?", + "sql_context": "CREATE TABLE companies (id INT, name TEXT, founder_race TEXT); CREATE TABLE funding_rounds (id INT, company_id INT, investors INT, size INT);", + "sql": "SELECT MAX(funding_rounds.investors), MIN(funding_rounds.investors) FROM companies INNER JOIN funding_rounds ON companies.id \u003d funding_rounds.company_id WHERE companies.founder_race \u003d \u0027Latinx\u0027;", + "sql_explanation": "The SQL query calculates the maximum and minimum number of investors per funding round for companies founded by Latinx individuals by joining the companies and funding_rounds tables on the company_id column, and then calculating the maximum and minimum of the investors column for records where the founder_race is \u0027Latinx\u0027." +}, { + "id": "847", + "domain": "startups venture capital", + "domain_description": "Company founding data, investment rounds, exit strategies, and diversity metrics in the startup ecosystem.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the sum of funding raised for companies with a female founder in the healthcare sector?", + "sql_context": "CREATE TABLE company (id INT, name TEXT, founding_year INT, founder_gender TEXT, industry TEXT); INSERT INTO company (id, name, founding_year, founder_gender, industry) VALUES (1, \u0027Acme Inc\u0027, 2010, \u0027female\u0027, \u0027healthcare\u0027); INSERT INTO company (id, name, founding_year, founder_gender, industry) VALUES (2, \u0027Beta Corp\u0027, 2015, \u0027male\u0027, \u0027technology\u0027);", + "sql": "SELECT SUM(funding_raised) FROM investment_rounds INNER JOIN company ON investment_rounds.company_id \u003d company.id WHERE company.founder_gender \u003d \u0027female\u0027 AND company.industry \u003d \u0027healthcare\u0027;", + "sql_explanation": "This query returns the sum of funding raised for companies with a female founder in the healthcare sector. It does this by performing an inner join between the investment_rounds and company tables on the company_id and id columns, respectively. Then it filters the results to only include rows where the founder_gender column of the company table is \u0027female\u0027 and the industry column of the company table is \u0027healthcare\u0027 and finally calculates the sum of the funding_raised column." +}, { + "id": "1198", + "domain": "startups venture capital", + "domain_description": "Company founding data, investment rounds, exit strategies, and diversity metrics in the startup ecosystem.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average salary of engineers working in South African startups that have had an exit strategy?", + "sql_context": "CREATE TABLE employee (id INT, company_id INT, employee_name VARCHAR(50), role VARCHAR(50), gender VARCHAR(10), ethnicity VARCHAR(25), salary FLOAT, hire_date DATE); CREATE TABLE company (id INT, name VARCHAR(50), founding_year INT, industry VARCHAR(50), ceo_id INT, exit_strategy ENUM(\u0027Acquisition\u0027, \u0027IPO\u0027, \u0027Merger\u0027, \u0027Liquidation\u0027, \u0027Dissolution\u0027, \u0027Others\u0027) NOT NULL);", + "sql": "SELECT AVG(e.salary) FROM employee e INNER JOIN company c ON e.company_id \u003d c.id WHERE e.role \u003d \u0027Engineer\u0027 AND c.exit_strategy IS NOT NULL AND c.location \u003d \u0027South Africa\u0027;", + "sql_explanation": "This query joins the employee and company tables on the company_id field. It filters for engineers working in South African startups that have had an exit strategy and returns the average salary." +}, { + "id": "1543", + "domain": "startups venture capital", + "domain_description": "Company founding data, investment rounds, exit strategies, and diversity metrics in the startup ecosystem.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average funding round size for startups founded by Latinx individuals?", + "sql_context": "CREATE TABLE company (id INT, name TEXT, founding_year INT, founder_gender TEXT, founder_ethnicity TEXT); INSERT INTO company (id, name, founding_year, founder_gender, founder_ethnicity) VALUES (1, \u0027TechFuturo\u0027, 2018, \u0027male\u0027, \u0027Latinx\u0027); INSERT INTO company (id, name, founding_year, founder_gender, founder_ethnicity) VALUES (2, \u0027EcoVida\u0027, 2020, \u0027female\u0027, \u0027Latinx\u0027); CREATE TABLE funding_round (company_id INT, round_amount INT); INSERT INTO funding_round (company_id, round_amount) VALUES (1, 3000000); INSERT INTO funding_round (company_id, round_amount) VALUES (2, 6000000);", + "sql": "SELECT AVG(funding_round.round_amount) FROM company JOIN funding_round ON company.id \u003d funding_round.company_id WHERE company.founder_ethnicity \u003d \u0027Latinx\u0027;", + "sql_explanation": "Join the company and funding_round tables, filter for rows where the founder is Latinx, and calculate the average round_amount." +}, { + "id": "1904", + "domain": "startups venture capital", + "domain_description": "Company founding data, investment rounds, exit strategies, and diversity metrics in the startup ecosystem.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average funding amount for companies founded by women?", + "sql_context": "CREATE TABLE companies (id INT, name TEXT, founded_date DATE, founder_gender TEXT); INSERT INTO companies (id, name, founded_date, founder_gender) VALUES (1, \u0027Acme Inc\u0027, \u00272010-01-01\u0027, \u0027female\u0027); INSERT INTO companies (id, name, founded_date, founder_gender) VALUES (2, \u0027Beta Corp\u0027, \u00272015-05-15\u0027, \u0027male\u0027);", + "sql": "SELECT AVG(funding_amount) FROM investments JOIN companies ON investments.company_id \u003d companies.id WHERE companies.founder_gender \u003d \u0027female\u0027;", + "sql_explanation": "Join the investments and companies tables, filter for companies founded by women, and calculate the average funding amount." +}, { + "id": "2080", + "domain": "startups venture capital", + "domain_description": "Company founding data, investment rounds, exit strategies, and diversity metrics in the startup ecosystem.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all investors who have participated in funding rounds with companies in the \u0027Renewable Energy\u0027 sector.", + "sql_context": "CREATE TABLE investments (id INT, company_id INT, investor TEXT, investment_amount FLOAT); INSERT INTO investments (id, company_id, investor, investment_amount) VALUES (1, 3, \u0027Green Ventures\u0027, 5000000.0); INSERT INTO investments (id, company_id, investor, investment_amount) VALUES (2, 4, \u0027Solar Angels\u0027, 3000000.0);", + "sql": "SELECT investor FROM investments JOIN companies ON investments.company_id \u003d companies.id WHERE companies.industry \u003d \u0027Renewable Energy\u0027;", + "sql_explanation": "This SQL query lists all investors who have participated in funding rounds with companies in the \u0027Renewable Energy\u0027 sector. It uses a JOIN clause to combine the data from the investments and companies tables, based on the company_id column, and filters the results by the industry category \u0027Renewable Energy\u0027." +}, { + "id": "2326", + "domain": "startups venture capital", + "domain_description": "Company founding data, investment rounds, exit strategies, and diversity metrics in the startup ecosystem.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the average amount of funds raised by companies founded by LGBTQ+ individuals.", + "sql_context": "CREATE TABLE company (id INT, name TEXT, founder_gender TEXT, founder_sexuality TEXT); INSERT INTO company (id, name, founder_gender, founder_sexuality) VALUES (1, \u0027Acme Inc\u0027, \u0027Female\u0027, \u0027LGBTQ+\u0027), (2, \u0027Beta Corp\u0027, \u0027Male\u0027, \u0027Straight\u0027), (3, \u0027Gamma PLC\u0027, \u0027Non-binary\u0027, \u0027LGBTQ+\u0027); CREATE TABLE investment (id INT, company_id INT, amount INT); INSERT INTO investment (id, company_id, amount) VALUES (1, 1, 50000), (2, 1, 100000), (3, 2, 75000);", + "sql": "SELECT AVG(amount) FROM investment JOIN company ON investment.company_id \u003d company.id WHERE company.founder_sexuality \u003d \u0027LGBTQ+\u0027", + "sql_explanation": "We first join the investment and company tables on the company id. Then, we filter for companies founded by LGBTQ+ individuals and calculate the average amount of funds raised." +}, { + "id": "2332", + "domain": "startups venture capital", + "domain_description": "Company founding data, investment rounds, exit strategies, and diversity metrics in the startup ecosystem.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total funding raised by companies with male founders?", + "sql_context": "CREATE TABLE rounds (id INT, company_id INT, funding_round_size INT, round_type TEXT); INSERT INTO rounds (id, company_id, funding_round_size, round_type) VALUES (1, 1, 5000000, \u0027Series A\u0027); INSERT INTO rounds (id, company_id, funding_round_size, round_type) VALUES (2, 3, 12000000, \u0027Series B\u0027);", + "sql": "SELECT SUM(funding_round_size) FROM rounds JOIN company ON rounds.company_id \u003d company.id WHERE company.founder_gender \u003d \u0027male\u0027;", + "sql_explanation": "This SQL query calculates the total funding raised by companies with male founders. It joins the \u0027rounds\u0027 table with the \u0027company\u0027 table on the \u0027company_id\u0027 column. Then, it filters the results to only include rows where the founder_gender is \u0027male\u0027. Finally, it calculates the sum of the \u0027funding_round_size\u0027 column." +}, { + "id": "3348", + "domain": "startups venture capital", + "domain_description": "Company founding data, investment rounds, exit strategies, and diversity metrics in the startup ecosystem.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the total number of exits made by startups founded by refugees", + "sql_context": "CREATE TABLE startups(id INT, name TEXT, founding_year INT, founder_refugee BOOLEAN); CREATE TABLE exits(id INT, startup_id INT, exit_type TEXT, exit_value FLOAT); INSERT INTO startups (id, name, founding_year, founder_refugee) VALUES (1, \u0027Acme Inc\u0027, 2010, false); INSERT INTO startups (id, name, founding_year, founder_refugee) VALUES (2, \u0027Beta Corp\u0027, 2015, true); INSERT INTO startups (id, name, founding_year, founder_refugee) VALUES (3, \u0027Gamma LLC\u0027, 2020, false); INSERT INTO exits (id, startup_id, exit_type, exit_value) VALUES (1, 1, \u0027Acquisition\u0027, 10000000); INSERT INTO exits (id, startup_id, exit_type, exit_value) VALUES (2, 3, \u0027IPO\u0027, 50000000); INSERT INTO exits (id, startup_id, exit_type, exit_value) VALUES (3, 2, \u0027Acquisition\u0027, 7500000);", + "sql": "SELECT COUNT(*) FROM startups s JOIN exits e ON s.id \u003d e.startup_id WHERE s.founder_refugee \u003d true;", + "sql_explanation": "This query counts the number of records where the founder_refugee is true in the startups table and joins the exits table on the startup_id column to return all the records where there is an exit." +}, { + "id": "501", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all smart city projects and their corresponding technology categories in the \u0027smart_cities\u0027 schema.", + "sql_context": "CREATE SCHEMA if not exists smart_cities; CREATE TABLE if not exists smart_cities.projects (id INT, project_name VARCHAR, location VARCHAR); CREATE TABLE if not exists smart_cities.technologies (id INT, project_id INT, technology_category VARCHAR); INSERT INTO smart_cities.projects (id, project_name, location) VALUES (1, \u0027Smart City 1\u0027, \u0027USA\u0027), (2, \u0027Smart City 2\u0027, \u0027Germany\u0027); INSERT INTO smart_cities.technologies (id, project_id, technology_category) VALUES (1, 1, \u0027IoT\u0027), (2, 1, \u0027AI\u0027), (3, 2, \u0027AI\u0027);", + "sql": "SELECT smart_cities.projects.project_name, smart_cities.technologies.technology_category FROM smart_cities.projects INNER JOIN smart_cities.technologies ON smart_cities.projects.id \u003d smart_cities.technologies.project_id;", + "sql_explanation": "This query lists all smart city projects and their corresponding technology categories in the \u0027smart_cities\u0027 schema by performing an inner join between the \u0027projects\u0027 and \u0027technologies\u0027 tables on the project_id column." +}, { + "id": "763", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the initiatives, cities, and capacities for renewable energy projects related to hydropower energy in countries with BREEAM certified buildings.", + "sql_context": "CREATE TABLE green_buildings (id INT, name VARCHAR(50), city VARCHAR(50), country VARCHAR(50), certification VARCHAR(50)); INSERT INTO green_buildings (id, name, city, country, certification) VALUES (1, \u0027EcoTowers\u0027, \u0027Toronto\u0027, \u0027Canada\u0027, \u0027BREEAM Excellent\u0027); CREATE TABLE renewable_energy (id INT, project_name VARCHAR(50), city VARCHAR(50), country VARCHAR(50), energy_type VARCHAR(50), capacity FLOAT); INSERT INTO renewable_energy (id, project_name, city, country, energy_type, capacity) VALUES (1, \u0027HydroPlant1\u0027, \u0027Toronto\u0027, \u0027Canada\u0027, \u0027Hydropower\u0027, 4000);", + "sql": "SELECT r.project_name, r.city, r.capacity FROM green_buildings g INNER JOIN renewable_energy r ON g.country \u003d r.country WHERE g.certification \u003d \u0027BREEAM Excellent\u0027 AND r.energy_type \u003d \u0027Hydropower\u0027;", + "sql_explanation": "Join \u0027green_buildings\u0027 and \u0027renewable_energy\u0027 tables on the \u0027country\u0027 column. Filter rows where the \u0027certification\u0027 in \u0027green_buildings\u0027 is \u0027BREEAM Excellent\u0027 and \u0027energy_type\u0027 in \u0027renewable_energy\u0027 is \u0027Hydropower\u0027. Return the project name, city, and capacity." +}, { + "id": "1187", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all smart city initiatives implemented in the state of California that involve renewable energy sources.", + "sql_context": "CREATE TABLE smart_cities (city_id INT, city_name VARCHAR(255), state VARCHAR(255)); CREATE TABLE renewable_energy (project_id INT, project_name VARCHAR(255), city_name VARCHAR(255), state VARCHAR(255));", + "sql": "SELECT smart_cities.city_name FROM smart_cities INNER JOIN renewable_energy ON smart_cities.city_name \u003d renewable_energy.city_name WHERE smart_cities.state \u003d \u0027California\u0027;", + "sql_explanation": "This SQL query lists all smart city initiatives implemented in the state of California that involve renewable energy sources by joining the smart_cities and renewable_energy tables on the city_name field, and filtering the results to only include rows where the state field is \u0027California\u0027." +}, { + "id": "1311", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of green buildings and carbon offset projects in each state?", + "sql_context": "CREATE TABLE green_buildings (id INT, building_name VARCHAR(255), state VARCHAR(255)); CREATE TABLE carbon_offset_projects (id INT, project_name VARCHAR(255), state VARCHAR(255));", + "sql": "SELECT state, COUNT(gb.building_name) + COUNT(cop.project_name) FROM green_buildings gb RIGHT JOIN carbon_offset_projects cop ON gb.state \u003d cop.state GROUP BY state;", + "sql_explanation": "This SQL query counts the total number of green buildings and carbon offset projects in each state by joining the green_buildings and carbon_offset_projects tables based on the state column, and calculating the count of each table separately." +}, { + "id": "1419", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total capacity of renewable energy projects in a given state?", + "sql_context": "CREATE TABLE State (state_id INT, state_name VARCHAR(50)); CREATE TABLE Project (project_id INT, project_name VARCHAR(50), project_capacity INT, state_id INT);", + "sql": "SELECT State.state_name, SUM(Project.project_capacity) as total_capacity FROM State JOIN Project ON State.state_id \u003d Project.state_id GROUP BY State.state_name;", + "sql_explanation": "This query joins the State and Project tables on the state_id column and then groups the results by state_name. It then calculates the sum of project_capacity for each state, providing the total capacity of renewable energy projects in a given state." +}, { + "id": "2176", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "single join", + "sql_complexity_description": "only one join (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Maximum carbon offsets achieved by green building projects in Europe?", + "sql_context": "CREATE TABLE carbon_offsets (id INT, green_building_id INT, offsets INT); CREATE VIEW green_buildings_europe AS SELECT * FROM green_buildings WHERE country \u003d \u0027Europe\u0027;", + "sql": "SELECT MAX(offsets) FROM carbon_offsets JOIN green_buildings_europe ON carbon_offsets.green_building_id \u003d green_buildings_europe.id;", + "sql_explanation": "The query calculates the maximum carbon offsets achieved by green building projects in Europe using the MAX function." +}, { + "id": "336", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the distribution of content categories in movies directed by people of color.", + "sql_context": "CREATE TABLE ContentCategories (MovieID INT, ContentCategory TEXT); INSERT INTO ContentCategories (MovieID, ContentCategory) VALUES (1, \u0027Drama\u0027), (1, \u0027Action\u0027), (2, \u0027Comedy\u0027); CREATE TABLE Movies (MovieID INT, DirectorID INT); INSERT INTO Movies (MovieID, DirectorID) VALUES (1, 2), (2, 3); CREATE TABLE Directors (DirectorID INT, DirectorName TEXT, Race TEXT); INSERT INTO Directors (DirectorID, DirectorName, Race) VALUES (2, \u0027Ava DuVernay\u0027, \u0027African-American\u0027), (3, \u0027Alfonso CuarÃŗn\u0027, \u0027Hispanic\u0027);", + "sql": "SELECT ContentCategory, COUNT(*) as Count FROM ContentCategories JOIN Movies ON ContentCategories.MovieID \u003d Movies.MovieID JOIN Directors ON Movies.DirectorID \u003d Directors.DirectorID WHERE Directors.Race !\u003d \u0027Caucasian\u0027 GROUP BY ContentCategory;", + "sql_explanation": "This query joins the ContentCategories, Movies, and Directors tables to associate content categories with movies and directors. It then filters for movies directed by people of color and calculates the distribution of content categories for those movies." +}, { + "id": "150", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all playlists that contain songs from both \u0027BTS\u0027 and \u0027Blackpink\u0027 bands.", + "sql_context": "CREATE TABLE playlists (playlist_id INT, playlist VARCHAR(50)); CREATE TABLE playlist_songs (playlist_id INT, song_id INT); CREATE TABLE songs (song_id INT, song VARCHAR(50), band VARCHAR(50)); INSERT INTO playlists VALUES (1, \u0027K-Pop Favorites\u0027), (2, \u0027Asian Stars\u0027), (3, \u0027Music Legends\u0027); INSERT INTO playlist_songs VALUES (1, 1), (1, 2), (2, 2), (3, 3); INSERT INTO songs VALUES (1, \u0027Dynamite\u0027, \u0027BTS\u0027), (2, \u0027How You Like That\u0027, \u0027Blackpink\u0027), (3, \u0027Bohemian Rhapsody\u0027, \u0027Queen\u0027);", + "sql": "SELECT p.playlist FROM playlists p JOIN playlist_songs ps1 ON p.playlist_id \u003d ps1.playlist_id JOIN playlist_songs ps2 ON p.playlist_id \u003d ps2.playlist_id JOIN songs s1 ON ps1.song_id \u003d s1.song_id JOIN songs s2 ON ps2.song_id \u003d s2.song_id WHERE s1.band \u003d \u0027BTS\u0027 AND s2.band \u003d \u0027Blackpink\u0027;", + "sql_explanation": "This query performs a self-join on the playlist_songs table to find playlists associated with both \u0027BTS\u0027 and \u0027Blackpink\u0027 songs, and then joins the playlist_songs table with the songs table to retrieve the corresponding playlist names." +}, { + "id": "558", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of streams and albums sold by artists who have won a Grammy award?", + "sql_context": "CREATE TABLE Artists (ArtistID INT, ArtistName VARCHAR(100), GrammyWinner BOOLEAN); INSERT INTO Artists (ArtistID, ArtistName, GrammyWinner) VALUES (1, \u0027Taylor Swift\u0027, TRUE), (2, \u0027Green Day\u0027, FALSE); CREATE TABLE MusicStreams (StreamID INT, SongID INT, ArtistID INT); INSERT INTO MusicStreams (StreamID, SongID, ArtistID) VALUES (1, 1, 1), (2, 2, 2); CREATE TABLE Albums (AlbumID INT, AlbumName VARCHAR(100), ArtistID INT); INSERT INTO Albums (AlbumID, AlbumName, ArtistID) VALUES (1, \u0027Fearless\u0027, 1), (2, \u0027American Idiot\u0027, 2);", + "sql": "SELECT COUNT(DISTINCT ms.StreamID) + COUNT(DISTINCT a.AlbumID) AS TotalReleases FROM Artists a JOIN MusicStreams ms ON a.ArtistID \u003d ms.ArtistID JOIN Albums al ON a.ArtistID \u003d al.ArtistID WHERE GrammyWinner \u003d TRUE;", + "sql_explanation": "This query first joins the Artists, MusicStreams, and Albums tables on the ArtistID column, then filters the results to only include artists who have won a Grammy award. It then uses the COUNT function to count the number of distinct stream and album IDs for those artists, adding the two results together to get the total number of releases." +}, { + "id": "590", + "domain": "music", + "domain_description": "Detailed records on music creation, distribution, marketing, and consumption patterns across various genres and platforms.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of unique users who have streamed pop and country songs, excluding any streams from the United States.", + "sql_context": "CREATE TABLE genres (id INT, name VARCHAR(255), type VARCHAR(255)); CREATE TABLE users (id INT, name VARCHAR(255), location VARCHAR(255)); CREATE TABLE streams (id INT, user_id INT, song_id INT, timestamp TIMESTAMP); INSERT INTO genres (id, name, type) VALUES (1, \u0027Pop\u0027, \u0027Music\u0027), (2, \u0027Country\u0027, \u0027Music\u0027); INSERT INTO users (id, name, location) VALUES (1, \u0027Alex Brown\u0027, \u0027USA\u0027), (2, \u0027Maria Garcia\u0027, \u0027Mexico\u0027); INSERT INTO streams (id, user_id, song_id, timestamp) VALUES (1, 1, 1, NOW()), (2, 2, 2, NOW()); CREATE VIEW pop_country_songs AS SELECT song_id FROM genres WHERE type IN (\u0027Pop\u0027, \u0027Country\u0027);", + "sql": "SELECT COUNT(DISTINCT user_id) FROM streams JOIN users ON streams.user_id \u003d users.id JOIN (SELECT song_id FROM pop_country_songs) AS song_ids ON streams.song_id \u003d song_ids.song_id WHERE users.location !\u003d \u0027USA\u0027;", + "sql_explanation": "The query joins the streams, users, and song_ids tables on their respective id columns. It then filters out the streams from the USA and counts the number of unique user_id\u0027s." +}, { + "id": "475", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total energy generated by wind and solar sources, and the corresponding carbon pricing in 2021?", + "sql_context": "CREATE TABLE wind_energy (year INT, region VARCHAR(20), energy_generated INT);CREATE TABLE solar_energy (year INT, region VARCHAR(20), energy_generated INT);CREATE TABLE carbon_pricing (region VARCHAR(20), price DECIMAL(5,2));", + "sql": "SELECT SUM(w.energy_generated) + SUM(s.energy_generated) AS total_energy, c.price FROM wind_energy w JOIN solar_energy s ON w.year \u003d s.year JOIN carbon_pricing c ON w.region \u003d c.region WHERE w.year \u003d 2021 GROUP BY w.region;", + "sql_explanation": "This SQL query calculates the total energy generated by wind and solar sources, and the corresponding carbon pricing in 2021. It uses a JOIN to combine data from the wind_energy, solar_energy, and carbon_pricing tables based on the year and region, and filters for only 2021. The SUM function is used to add up the energy_generated values for each table, and the results are then added together to get the total energy. The GROUP BY clause is used to group the data by region, and the carbon pricing is returned for each region." +}, { + "id": "154", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all the designers who have created VR games, along with the average age of players who have played their games.", + "sql_context": "CREATE TABLE Designers (DesignerID INT, DesignerName VARCHAR(50), Age INT); CREATE TABLE VR_Games (GameID INT, GameName VARCHAR(50), Genre VARCHAR(20), DesignerID INT); CREATE TABLE GamePlayer (PlayerID INT, GameID INT); CREATE TABLE Players (PlayerID INT, Age INT, Gender VARCHAR(10));", + "sql": "SELECT Designers.DesignerName, AVG(Players.Age) FROM Designers INNER JOIN VR_Games ON Designers.DesignerID \u003d VR_Games.DesignerID INNER JOIN GamePlayer ON VR_Games.GameID \u003d GamePlayer.GameID INNER JOIN Players ON GamePlayer.PlayerID \u003d Players.PlayerID GROUP BY Designers.DesignerName;", + "sql_explanation": "We perform INNER JOINs between the Designers, VR_Games, GamePlayer, and Players tables on DesignerID, GameID, GameID, and PlayerID. Then, we group the results by the DesignerName column and calculate the average age using the AVG function." +}, { + "id": "297", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many players from India play games that are available in the \u0027Action\u0027 category?", + "sql_context": "CREATE TABLE Players (PlayerID INT, Age INT, Country VARCHAR(50)); INSERT INTO Players (PlayerID, Age, Country) VALUES (1, 25, \u0027India\u0027), (2, 30, \u0027Canada\u0027), (3, 22, \u0027Germany\u0027), (4, 35, \u0027Japan\u0027); CREATE TABLE GameLibrary (GameID INT, GameName VARCHAR(50), GameType VARCHAR(50), Category VARCHAR(50)); INSERT INTO GameLibrary (GameID, GameName, GameType, Category) VALUES (1, \u0027GameA\u0027, \u0027VR\u0027, \u0027Action\u0027), (2, \u0027GameB\u0027, \u0027Non-VR\u0027, \u0027Strategy\u0027), (3, \u0027GameC\u0027, \u0027VR\u0027, \u0027Action\u0027); CREATE TABLE PlayerGameLibrary (PlayerID INT, GameID INT); INSERT INTO PlayerGameLibrary (PlayerID, GameID) VALUES (1, 1), (2, 2), (3, 1), (4, 3);", + "sql": "SELECT COUNT(Players.PlayerID) FROM Players JOIN PlayerGameLibrary ON Players.PlayerID \u003d PlayerGameLibrary.PlayerID JOIN GameLibrary ON PlayerGameLibrary.GameID \u003d GameLibrary.GameID WHERE Players.Country \u003d \u0027India\u0027 AND GameLibrary.Category \u003d \u0027Action\u0027;", + "sql_explanation": "The SQL query calculates the number of players from India who play games in the \u0027Action\u0027 category. It first joins the Players, PlayerGameLibrary, and GameLibrary tables based on the PlayerID and GameID. Then, it filters the records where the country is \u0027India\u0027 and the category is \u0027Action\u0027. Finally, it counts the number of matching records." +}, { + "id": "307", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the names and regions of esports teams that have participated in at least one cross-platform tournament.", + "sql_context": "CREATE TABLE Teams (TeamID INT, TeamName VARCHAR(50), Platform VARCHAR(20)); INSERT INTO Teams (TeamID, TeamName, Platform) VALUES (1, \u0027Team A\u0027, \u0027PC\u0027); INSERT INTO Teams (TeamID, TeamName, Platform) VALUES (2, \u0027Team B\u0027, \u0027Console\u0027); CREATE TABLE Tournaments (TournamentID INT, TournamentName VARCHAR(50), Platform VARCHAR(20)); INSERT INTO Tournaments (TournamentID, TournamentName, Platform) VALUES (1, \u0027Tournament 1\u0027, \u0027Cross-Platform\u0027); INSERT INTO Tournaments (TournamentID, TournamentName, Platform) VALUES (2, \u0027Tournament 2\u0027, \u0027PC\u0027); CREATE TABLE Participation (TeamID INT, TournamentID INT); INSERT INTO Participation (TeamID, TournamentID) VALUES (1, 1); INSERT INTO Participation (TeamID, TournamentID) VALUES (2, 2);", + "sql": "SELECT DISTINCT Teams.TeamName, Teams.Platform FROM Teams INNER JOIN Participation ON Teams.TeamID \u003d Participation.TeamID INNER JOIN Tournaments ON Participation.TournamentID \u003d Tournaments.TournamentID WHERE Tournaments.Platform \u003d \u0027Cross-Platform\u0027;", + "sql_explanation": "The SQL query lists the names and regions of esports teams that have participated in at least one cross-platform tournament by joining the Teams, Participation, and Tournaments tables on the TeamID and TournamentID columns. It filters for cross-platform tournaments and selects distinct team names and platforms." +}, { + "id": "374", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of players who play VR games and are from the United States?", + "sql_context": "CREATE TABLE Players (PlayerID INT, Age INT, Country VARCHAR(50)); INSERT INTO Players (PlayerID, Age, Country) VALUES (1, 25, \u0027USA\u0027), (2, 30, \u0027Canada\u0027), (3, 22, \u0027USA\u0027), (4, 35, \u0027Mexico\u0027); CREATE TABLE GameLibrary (GameID INT, GameName VARCHAR(50), GameType VARCHAR(50)); INSERT INTO GameLibrary (GameID, GameName, GameType) VALUES (1, \u0027GameA\u0027, \u0027VR\u0027), (2, \u0027GameB\u0027, \u0027Non-VR\u0027), (3, \u0027GameC\u0027, \u0027VR\u0027); CREATE TABLE PlayerGameLibrary (PlayerID INT, GameID INT); INSERT INTO PlayerGameLibrary (PlayerID, GameID) VALUES (1, 1), (2, 2), (3, 1), (4, 3);", + "sql": "SELECT AVG(Players.Age) FROM Players JOIN PlayerGameLibrary ON Players.PlayerID \u003d PlayerGameLibrary.PlayerID JOIN GameLibrary ON PlayerGameLibrary.GameID \u003d GameLibrary.GameID WHERE Players.Country \u003d \u0027USA\u0027 AND GameLibrary.GameType \u003d \u0027VR\u0027;", + "sql_explanation": "The SQL query calculates the average age of players who play VR games and are from the United States. It first joins the Players, PlayerGameLibrary, and GameLibrary tables based on the PlayerID and GameID. Then, it filters the records where the country is \u0027USA\u0027 and the game type is \u0027VR\u0027. Finally, it calculates the average age of the matching records." +}, { + "id": "781", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average age of players who have played a specific VR game, \u0027CyberSphere\u0027?", + "sql_context": "CREATE TABLE Players (PlayerID INT, Age INT, Gender VARCHAR(10), Country VARCHAR(50)); CREATE TABLE VRPlayers (PlayerID INT, VRGameID INT); CREATE TABLE VRGames (VRGameID INT, Title VARCHAR(50)); INSERT INTO Players (PlayerID, Age, Gender, Country) VALUES (1, 25, \u0027Male\u0027, \u0027USA\u0027); INSERT INTO Players (PlayerID, Age, Gender, Country) VALUES (2, 28, \u0027Female\u0027, \u0027Canada\u0027); INSERT INTO VRPlayers (PlayerID, VRGameID) VALUES (1, 1); INSERT INTO VRPlayers (PlayerID, VRGameID) VALUES (2, 1); INSERT INTO VRGames (VRGameID, Title) VALUES (1, \u0027CyberSphere\u0027);", + "sql": "SELECT AVG(Players.Age) FROM Players INNER JOIN VRPlayers ON Players.PlayerID \u003d VRPlayers.PlayerID INNER JOIN VRGames ON VRPlayers.VRGameID \u003d VRGames.VRGameID WHERE VRGames.Title \u003d \u0027CyberSphere\u0027;", + "sql_explanation": "Join the Players, VRPlayers, and VRGames tables on PlayerID and VRGameID, then filter for the specific VR game \u0027CyberSphere\u0027 and calculate the average age of players who have played this game." +}, { + "id": "191", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum water consumption per mining site, and which rock formations are present at that site?", + "sql_context": "CREATE TABLE environmental_impact (id INT PRIMARY KEY, mining_site_id INT, pollution_level INT, water_consumption FLOAT, FOREIGN KEY (mining_site_id) REFERENCES mining_sites(id)); INSERT INTO environmental_impact (id, mining_site_id, pollution_level, water_consumption) VALUES (1, 1, 5, 1500.5), (2, 2, 3, 1200.3), (3, 3, 7, 2000.0), (4, 1, 2, 2000.0); CREATE TABLE geological_survey (id INT PRIMARY KEY, mining_site_id INT, rock_formation VARCHAR(255), depth FLOAT, FOREIGN KEY (mining_site_id) REFERENCES mining_sites(id)); INSERT INTO geological_survey (id, mining_site_id, rock_formation, depth) VALUES (1, 1, \u0027Granite\u0027, 50.2), (2, 2, \u0027Basalt\u0027, 30.0), (3, 3, \u0027Shale\u0027, 75.5), (4, 1, \u0027Limestone\u0027, 60.0);", + "sql": "SELECT e.name, MAX(ei.water_consumption) AS max_water_consumption, GROUP_CONCAT(DISTINCT gs.rock_formation) AS rock_formations FROM environmental_impact ei JOIN mining_sites e ON ei.mining_site_id \u003d e.id JOIN geological_survey gs ON e.id \u003d gs.mining_site_id GROUP BY e.name;", + "sql_explanation": "This query joins the environmental_impact and geological_survey tables with the mining_sites table on their common id or mining_site_id. It then groups the data by the name of the mining site and calculates the maximum water_consumption for each group. It also concatenates the distinct rock_formation values for each group." +}, { + "id": "66", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which stores in Tokyo have sold more than 30 units of eco-friendly cleaning products since their launch?", + "sql_context": "CREATE TABLE products(product_id VARCHAR(20), product_name VARCHAR(20), launched_date DATE); INSERT INTO products (product_id, product_name, launched_date) VALUES (\u0027Eco-friendly Cleaner\u0027, \u00272022-01-01\u0027); CREATE TABLE stores(store_id VARCHAR(20), store_location VARCHAR(20)); INSERT INTO stores (store_id, store_location) VALUES (\u0027Tokyo Store 1\u0027, \u0027Tokyo\u0027), (\u0027Tokyo Store 2\u0027, \u0027Tokyo\u0027); CREATE TABLE sales(store_id VARCHAR(20), product_id VARCHAR(20), sale_date DATE, quantity INTEGER); INSERT INTO sales (store_id, product_id, sale_date, quantity) VALUES (\u0027Tokyo Store 1\u0027, \u0027Eco-friendly Cleaner\u0027, \u00272022-01-05\u0027, 20), (\u0027Tokyo Store 2\u0027, \u0027Eco-friendly Cleaner\u0027, \u00272022-01-07\u0027, 40);", + "sql": "SELECT store_location, SUM(quantity) FROM sales JOIN stores ON sales.store_id \u003d stores.store_id JOIN products ON sales.product_id \u003d products.product_id WHERE products.product_name \u003d \u0027Eco-friendly Cleaner\u0027 AND sale_date \u003e\u003d products.launched_date AND store_location \u003d \u0027Tokyo\u0027 GROUP BY store_location HAVING SUM(quantity) \u003e 30;", + "sql_explanation": "This query identifies Tokyo stores selling more than 30 units of eco-friendly cleaning products by joining the sales, stores, and products tables, filtering by product_name, launched_date, and store_location, calculating the total quantity for each store, and then filtering based on the condition SUM(quantity) \u003e 30." +}, { + "id": "175", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the vendors in Africa with ethical labor practices and their total revenue from Q2 2022?", + "sql_context": "CREATE TABLE Vendors (vendorID INT, vendorName VARCHAR(50), country VARCHAR(50), ethicalPractice BOOLEAN); CREATE TABLE Products (productID INT, vendorID INT, productName VARCHAR(50), price DECIMAL(10,2)); CREATE TABLE Sales (saleID INT, productID INT, saleDate DATE);", + "sql": "SELECT V.vendorName, SUM(P.price) FROM Vendors V INNER JOIN Products P ON V.vendorID \u003d P.vendorID INNER JOIN Sales S ON P.productID \u003d S.productID WHERE V.country \u003d \u0027Africa\u0027 AND V.ethicalPractice \u003d TRUE AND S.saleDate BETWEEN \u00272022-04-01\u0027 AND \u00272022-06-30\u0027 GROUP BY V.vendorName;", + "sql_explanation": "This query lists vendors in Africa with ethical labor practices and their total revenue from Q2 2022. It performs an inner join between Vendors, Products, and Sales tables based on vendorID and productID, filters vendors from Africa with ethical labor practices, and calculates the total revenue for each vendor using the GROUP BY and SUM clauses." +}, { + "id": "187", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 5 vendors with the highest revenue from circular supply chain products?", + "sql_context": "CREATE TABLE vendors (vendor_id INT, name TEXT); CREATE TABLE sales (sale_id INT, vendor_id INT, product_id INT, price DECIMAL(5,2)); INSERT INTO vendors (vendor_id, name) VALUES (1, \u0027Vendor A\u0027), (2, \u0027Vendor B\u0027), (3, \u0027Vendor C\u0027), (4, \u0027Vendor D\u0027), (5, \u0027Vendor E\u0027); INSERT INTO sales (sale_id, vendor_id, product_id, price) VALUES (1, 1, 1, 20.99), (2, 1, 3, 75.00), (3, 2, 2, 50.00), (4, 3, 1, 20.99), (5, 3, 3, 75.00), (6, 4, 2, 50.00), (7, 5, 3, 75.00); CREATE TABLE circular_supply_chain_products (product_id INT); INSERT INTO circular_supply_chain_products (product_id) VALUES (1), (3);", + "sql": "SELECT vendors.name, SUM(sales.price) FROM vendors INNER JOIN sales ON vendors.vendor_id \u003d sales.vendor_id INNER JOIN circular_supply_chain_products ON sales.product_id \u003d circular_supply_chain_products.product_id GROUP BY vendors.name ORDER BY SUM(sales.price) DESC LIMIT 5;", + "sql_explanation": "This query identifies the top 5 vendors with the highest revenue from circular supply chain products by selecting the name column from the vendors table, joining it with the sales table on the vendor_id column, and then joining it again with the circular_supply_chain_products table on the product_id column. The results are grouped by vendor name using the GROUP BY clause, and the total revenue for each vendor is calculated using the SUM() function. The results are then ordered by total revenue in descending order using the ORDER BY clause, and the top 5 vendors are selected using the LIMIT clause." +}, { + "id": "605", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of organic products sold in North America?", + "sql_context": "CREATE TABLE regions (id INT, name TEXT); INSERT INTO regions (id, name) VALUES (1, \u0027North America\u0027), (2, \u0027South America\u0027), (3, \u0027Europe\u0027), (4, \u0027Asia\u0027), (5, \u0027Africa\u0027); CREATE TABLE products (id INT, name TEXT, is_organic BOOLEAN); INSERT INTO products (id, name, is_organic) VALUES (1, \u0027Product X\u0027, true), (2, \u0027Product Y\u0027, false), (3, \u0027Product Z\u0027, true), (4, \u0027Product W\u0027, false); CREATE TABLE sales (id INT, product TEXT, quantity INT, region TEXT); INSERT INTO sales (id, product, quantity, region) VALUES (1, \u0027Product X\u0027, 100, \u0027North America\u0027), (2, \u0027Product Y\u0027, 150, \u0027South America\u0027), (3, \u0027Product Z\u0027, 80, \u0027Europe\u0027), (4, \u0027Product W\u0027, 120, \u0027Asia\u0027);", + "sql": "SELECT SUM(sales.quantity) FROM sales INNER JOIN regions ON sales.region \u003d regions.name INNER JOIN products ON sales.product \u003d products.name WHERE products.is_organic \u003d true AND regions.name \u003d \u0027North America\u0027;", + "sql_explanation": "The SQL query first performs an inner join between the sales, regions, and products tables, based on the region and product columns. Then, it filters the results to only include rows where the is_organic column is true and the name column is North America. Finally, it calculates the sum of the quantity column for the filtered rows." +}, { + "id": "656", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue generated by vendors selling fair trade products in Hawaii?", + "sql_context": "CREATE TABLE vendors (vendor_id INT, vendor_name VARCHAR(50), state VARCHAR(50)); INSERT INTO vendors VALUES (1, \u0027VendorA\u0027, \u0027Hawaii\u0027); INSERT INTO vendors VALUES (2, \u0027VendorB\u0027, \u0027Texas\u0027); CREATE TABLE products (product_id INT, product_name VARCHAR(50), vendor_id INT, price DECIMAL(5,2), fair_trade BOOLEAN); INSERT INTO products VALUES (1, \u0027Product1\u0027, 1, 150, true); INSERT INTO products VALUES (2, \u0027Product2\u0027, 1, 75, true); INSERT INTO products VALUES (3, \u0027Product3\u0027, 2, 100, false); INSERT INTO products VALUES (4, \u0027Product4\u0027, 1, 200, true); CREATE TABLE sales (sale_id INT, product_id INT, vendor_id INT, sale_amount DECIMAL(5,2)); INSERT INTO sales VALUES (1, 1, 1, 50); INSERT INTO sales VALUES (2, 2, 1, 75); INSERT INTO sales VALUES (3, 3, 2, 30); INSERT INTO sales VALUES (4, 4, 1, 60);", + "sql": "SELECT SUM(sale_amount) FROM sales JOIN products ON sales.product_id \u003d products.product_id JOIN vendors ON sales.vendor_id \u003d vendors.vendor_id WHERE products.fair_trade \u003d true AND vendors.state \u003d \u0027Hawaii\u0027;", + "sql_explanation": "First, join the sales, products, and vendors tables based on the sale_id, product_id, and vendor_id. Then, filter the joined table for fair trade products sold by vendors in Hawaii. Lastly, calculate the sum of sale_amount for the filtered rows." +}, { + "id": "727", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of eco-friendly products sold, by brand and region?", + "sql_context": "CREATE TABLE Brands (id INT, brand VARCHAR(255), region VARCHAR(255)); INSERT INTO Brands (id, brand, region) VALUES (1, \u0027EcoBrandA\u0027, \u0027North America\u0027), (2, \u0027EcoBrandB\u0027, \u0027Europe\u0027), (3, \u0027EcoBrandC\u0027, \u0027Asia\u0027); CREATE TABLE Sales (id INT, brand_id INT, product VARCHAR(255), quantity INT, region VARCHAR(255)); INSERT INTO Sales (id, brand_id, product, quantity, region) VALUES (1, 1, \u0027EcoProduct1\u0027, 50, \u0027North America\u0027), (2, 1, \u0027EcoProduct2\u0027, 75, \u0027North America\u0027), (3, 2, \u0027EcoProduct3\u0027, 30, \u0027Europe\u0027), (4, 2, \u0027EcoProduct4\u0027, 40, \u0027Europe\u0027), (5, 3, \u0027EcoProduct5\u0027, 60, \u0027Asia\u0027), (6, 3, \u0027EcoProduct6\u0027, 80, \u0027Asia\u0027);", + "sql": "SELECT s.region, b.brand, SUM(s.quantity) AS total_quantity FROM Sales s JOIN Brands b ON s.brand_id \u003d b.id JOIN Products p ON s.product \u003d p.id WHERE p.eco_friendly \u003d true GROUP BY s.region, b.brand;", + "sql_explanation": "Joins the Sales, Brands, and Products tables on their respective id columns. Filters the results to only include eco-friendly products using the WHERE clause. Calculates the total quantity of eco-friendly products sold, grouped by brand and region using the SUM function and GROUP BY clause." +}, { + "id": "53", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of games played and the number of wins, losses, and draws for each team, from the teams, games, and games_results tables?", + "sql_context": "CREATE TABLE teams (team_id INT, team_name VARCHAR(255)); CREATE TABLE games (game_id INT, team_id INT, game_date DATE); CREATE TABLE games_results (game_id INT, team_id INT, result VARCHAR(10));", + "sql": "SELECT t.team_name, COUNT(g.game_id) AS total_games, SUM(CASE WHEN gr.result \u003d \u0027win\u0027 THEN 1 ELSE 0 END) AS wins, SUM(CASE WHEN gr.result \u003d \u0027loss\u0027 THEN 1 ELSE 0 END) AS losses, SUM(CASE WHEN gr.result \u003d \u0027draw\u0027 THEN 1 ELSE 0 END) AS draws FROM teams t JOIN games g ON t.team_id \u003d g.team_id JOIN games_results gr ON g.game_id \u003d gr.game_id GROUP BY t.team_name;", + "sql_explanation": "The SQL query performs a join between the teams, games, and games_results tables using the team_id and game_id columns to associate teams with their games and results. Then, it calculates the total number of games played, the number of wins, losses, and draws for each team." +}, { + "id": "561", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average points scored per game, grouped by team and year, from the teams, games, and games_stats tables?", + "sql_context": "CREATE TABLE teams (team_id INT, team_name VARCHAR(255)); CREATE TABLE games (game_id INT, team_id INT, game_date DATE); CREATE TABLE games_stats (game_id INT, team_id INT, points INT);", + "sql": "SELECT t.team_name, EXTRACT(YEAR FROM g.game_date) AS year, AVG(gs.points) AS avg_points FROM teams t JOIN games g ON t.team_id \u003d g.team_id JOIN games_stats gs ON g.game_id \u003d gs.game_id GROUP BY t.team_name, year;", + "sql_explanation": "The SQL query performs a join between the teams, games, and games_stats tables using the team_id and game_id columns to associate teams with their games and scores. Then, it extracts the year from the game_date column and groups the results by team name and year. Finally, it calculates the average points scored per game for each team and year." +}, { + "id": "1103", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average points per game scored by players from the players_stats table, grouped by their team?", + "sql_context": "CREATE TABLE teams (team_id INT, team_name VARCHAR(255)); CREATE TABLE players (player_id INT, player_name VARCHAR(255), team_id INT); CREATE TABLE players_stats (player_id INT, game_id INT, points INT);", + "sql": "SELECT t.team_name, AVG(ps.points) AS avg_points FROM players_stats ps JOIN players p ON ps.player_id \u003d p.player_id JOIN teams t ON p.team_id \u003d t.team_id GROUP BY t.team_name;", + "sql_explanation": "The SQL query performs a join between the players_stats, players, and teams tables, using the player_id and team_id columns to associate players with their teams. Then, it groups the results by team name and calculates the average points scored per game for each team." +}, { + "id": "78", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the top 3 best-selling garments for each manufacturer.", + "sql_context": "CREATE TABLE Manufacturers (ManufacturerID INT, ManufacturerName VARCHAR(50));CREATE TABLE SalesData (SaleID INT, ManufacturerID INT, GarmentID INT, SalesQuantity INT);", + "sql": "SELECT M.ManufacturerName, G.GarmentName, SUM(SD.SalesQuantity) AS TotalSales FROM SalesData SD JOIN Manufacturers M ON SD.ManufacturerID \u003d M.ManufacturerID JOIN Garments G ON SD.GarmentID \u003d G.GarmentID GROUP BY M.ManufacturerName, G.GarmentName ORDER BY TotalSales DESC, M.ManufacturerName, G.GarmentName LIMIT 3;", + "sql_explanation": "This SQL query joins the Manufacturers, SalesData, and Garments tables to display the top 3 best-selling garments for each manufacturer. The JOIN clause is used to combine rows from the Manufacturers, SalesData, and Garments tables based on the ManufacturerID and GarmentID columns. The GROUP BY clause is used to group the results by the manufacturer name and the garment name. The SUM function is used to calculate the total sales, and the results are ordered by the total sales in descending order, manufacturer name, and garment name. The LIMIT clause is used to limit the results to the top 3 best-selling garments per manufacturer." +}, { + "id": "467", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of unsold garments for each brand, grouped by country, which have a quantity greater than 500?", + "sql_context": "CREATE TABLE Countries (id INT, country VARCHAR(50)); INSERT INTO Countries (id, country) VALUES (1, \u0027USA\u0027), (2, \u0027Canada\u0027), (3, \u0027Mexico\u0027); CREATE TABLE Brands (id INT, brand VARCHAR(50)); INSERT INTO Brands (id, brand) VALUES (1, \u0027Gucci\u0027), (2, \u0027Chanel\u0027), (3, \u0027Louis Vuitton\u0027); CREATE TABLE Inventory (id INT, country_id INT, brand_id INT, quantity INT); INSERT INTO Inventory (id, country_id, brand_id, quantity) VALUES (1, 1, 1, 800), (2, 1, 2, 600), (3, 2, 1, 300), (4, 3, 2, 700), (5, 1, 3, 900), (6, 2, 3, 400);", + "sql": "SELECT c.country, b.brand, SUM(i.quantity) as total_unsold_quantity FROM Inventory i JOIN Countries c ON i.country_id \u003d c.id JOIN Brands b ON i.brand_id \u003d b.id GROUP BY c.country, b.brand HAVING total_unsold_quantity \u003e 500;", + "sql_explanation": "This SQL query calculates the total quantity of unsold garments for each brand, grouped by country, by joining the Inventory, Countries, and Brands tables. It then filters for only those with a quantity greater than 500 using the HAVING clause." +}, { + "id": "479", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total sales revenue for each country, ordered by the total sales revenue in descending order?", + "sql_context": "CREATE TABLE Countries (CountryID INT, CountryName VARCHAR(255));CREATE TABLE Garments (GarmentID INT, CountryID INT, SalePrice DECIMAL(10,2));CREATE TABLE Sales (SaleID INT, GarmentID INT, SaleDate DATE, Quantity INT);", + "sql": "SELECT c.CountryName, SUM(g.SalePrice * s.Quantity) AS TotalRevenue FROM Countries c JOIN Garments g ON c.CountryID \u003d g.CountryID JOIN Sales s ON g.GarmentID \u003d s.GarmentID GROUP BY c.CountryName ORDER BY TotalRevenue DESC;", + "sql_explanation": "This query calculates the total sales revenue for each country by joining the Countries, Garments, and Sales tables on CountryID, GarmentID, and GarmentID, respectively. It then groups the results by CountryName and calculates the total sales revenue using the SUM function. Finally, it orders the results by the total sales revenue in descending order." +}, { + "id": "577", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of unique visitors per exhibition in descending order.", + "sql_context": "CREATE TABLE Exhibitions (id INT, city VARCHAR(20), visitors INT); CREATE TABLE VisitorExhibitions (visitor_id INT, exhibition_id INT); INSERT INTO Exhibitions (id, city, visitors) VALUES (1, \u0027Paris\u0027, 3000), (2, \u0027London\u0027, 4000), (3, \u0027New York\u0027, 5000); INSERT INTO VisitorExhibitions (visitor_id, exhibition_id) VALUES (1, 1), (1, 2), (2, 1), (3, 3);", + "sql": "SELECT e.city, COUNT(DISTINCT v.id) AS unique_visitors FROM Exhibitions e JOIN VisitorExhibitions ve ON e.id \u003d ve.exhibition_id JOIN Visitors v ON ve.visitor_id \u003d v.id GROUP BY e.id ORDER BY unique_visitors DESC", + "sql_explanation": "We find the number of unique visitors per exhibition by joining the Exhibitions, VisitorExhibitions, and Visitors tables, grouping by exhibition_id, and then computing the count of distinct visitors for each exhibition. We order the results in descending order of unique visitors." +}, { + "id": "625", + "domain": "forestry", + "domain_description": "Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the density of trees (trees per hectare) by tree type in each country?", + "sql_context": "CREATE TABLE countries (id INT, name VARCHAR(255)); INSERT INTO countries (id, name) VALUES (1, \u0027Canada\u0027), (2, \u0027USA\u0027); CREATE TABLE tree_densities (id INT, country_id INT, tree_type_id INT, trees_per_hectare INT); INSERT INTO tree_densities (id, country_id, tree_type_id, trees_per_hectare) VALUES (1, 1, 1, 100), (2, 1, 2, 150), (3, 2, 1, 80), (4, 2, 2, 120); CREATE TABLE tree_types (id INT, name VARCHAR(255)); INSERT INTO tree_types (id, name) VALUES (1, \u0027Coniferous\u0027), (2, \u0027Deciduous\u0027);", + "sql": "SELECT c.name, tt.name, AVG(td.trees_per_hectare) avg_trees_per_hectare FROM tree_densities td JOIN countries c ON td.country_id \u003d c.id JOIN tree_types tt ON td.tree_type_id \u003d tt.id GROUP BY c.name, tt.name;", + "sql_explanation": "This query retrieves the density of trees (trees per hectare) by tree type in each country. It starts by joining the tree_densities, countries, and tree_types tables based on their relationships. Then, it uses GROUP BY to group the records by country and tree type, followed by AVG to calculate the average density for each group." +}, { + "id": "144", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the total tonnage of cargo transported by vessels with US flag in the second quarter of 2022?", + "sql_context": "CREATE TABLE Flag (flag_id INT PRIMARY KEY, flag_country VARCHAR(255)); INSERT INTO Flag (flag_id, flag_country) VALUES (1, \u0027United States\u0027); CREATE TABLE Vessel (vessel_id INT PRIMARY KEY, vessel_name VARCHAR(255), flag_id INT); CREATE TABLE Cargo (cargo_id INT PRIMARY KEY, vessel_id INT, cargo_weight INT, PRIMARY KEY (cargo_id, vessel_id)); CREATE TABLE Vessel_Movement (vessel_id INT, movement_date DATE, PRIMARY KEY (vessel_id, movement_date));", + "sql": "SELECT SUM(C.cargo_weight) FROM Vessel V JOIN Cargo C ON V.vessel_id \u003d C.vessel_id JOIN Vessel_Movement VM ON V.vessel_id \u003d VM.vessel_id JOIN Flag F ON V.flag_id \u003d F.flag_id WHERE VM.movement_date \u003e\u003d \u00272022-04-01\u0027 AND VM.movement_date \u003c \u00272022-07-01\u0027 AND F.flag_country \u003d \u0027United States\u0027;", + "sql_explanation": "The SQL query calculates the total tonnage of cargo transported by vessels with US flag in the second quarter of 2022. It first joins the Vessel, Cargo, Vessel_Movement, and Flag tables on the relevant fields. Then it filters the records based on the movement_date in the second quarter of 2022 and flag_country as United States. Lastly, it calculates the total tonnage using the SUM function." +}, { + "id": "659", + "domain": "maritime", + "domain_description": "Vessel performance data, cargo tracking, safety records, and regulatory compliance.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the names of vessels that have visited Tokyo and Osaka in Japan?", + "sql_context": "CREATE TABLE Port (port_id INT PRIMARY KEY, port_name VARCHAR(255), port_country VARCHAR(255)); INSERT INTO Port (port_id, port_name, port_country) VALUES (3, \u0027Tokyo\u0027, \u0027Japan\u0027), (4, \u0027Osaka\u0027, \u0027Japan\u0027); CREATE TABLE Vessel (vessel_id INT PRIMARY KEY, vessel_name VARCHAR(255)); CREATE TABLE Vessel_Movement (vessel_id INT, movement_date DATE, port_id INT, PRIMARY KEY (vessel_id, movement_date));", + "sql": "SELECT DISTINCT V.vessel_name FROM Vessel V JOIN Vessel_Movement VM ON V.vessel_id \u003d VM.vessel_id JOIN Port P ON VM.port_id \u003d P.port_id WHERE P.port_name IN (\u0027Tokyo\u0027, \u0027Osaka\u0027) AND P.port_country \u003d \u0027Japan\u0027;", + "sql_explanation": "The SQL query lists the names of vessels that have visited Tokyo and Osaka in Japan. It first joins the Vessel, Vessel_Movement, and Port tables on the relevant fields. Then it filters the records based on the port_name as Tokyo or Osaka and port_country as Japan. Lastly, it selects the distinct vessel_names." +}, { + "id": "213", + "domain": "startups", + "domain_description": "Company founding data, funding records, diversity metrics, and innovation trends.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the number of unique companies founded by a person who identifies as a person of color in the cleantech industry.", + "sql_context": "CREATE TABLE companies (id INT, name TEXT, industry TEXT, founding_date DATE, founder_race TEXT);CREATE TABLE company_founders (company_id INT, founder_id INT);CREATE TABLE founders (id INT, race TEXT);", + "sql": "SELECT COUNT(DISTINCT companies.id) FROM companies INNER JOIN company_founders ON companies.id \u003d company_founders.company_id INNER JOIN founders ON company_founders.founder_id \u003d founders.id WHERE companies.industry \u003d \u0027cleantech\u0027 AND founders.race \u003d \u0027person of color\u0027;", + "sql_explanation": "This query identifies the number of unique companies founded by a person who identifies as a person of color in the cleantech industry. It first joins the companies, company_founders, and founders tables on their respective IDs. Then, it filters the results to only include companies in the cleantech industry and founders who identify as a person of color. Finally, it counts the number of distinct companies using the COUNT and DISTINCT keywords." +}, { + "id": "196", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show all space missions and their related satellites.", + "sql_context": "CREATE TABLE space_missions (mission_id INT, name VARCHAR(100), launch_date DATE); CREATE TABLE mission_satellites (mission_id INT, satellite_id INT); CREATE TABLE satellites (satellite_id INT, name VARCHAR(100), manufacturer VARCHAR(100)); INSERT INTO space_missions (mission_id, name, launch_date) VALUES (1, \u0027Mission1\u0027, \u00272010-05-05\u0027); INSERT INTO mission_satellites (mission_id, satellite_id) VALUES (1, 1); INSERT INTO satellites (satellite_id, name, manufacturer) VALUES (1, \u0027Sat1\u0027, \u0027SpaceTech Inc.\u0027);", + "sql": "SELECT space_missions.name AS mission_name, satellites.name AS satellite_name FROM space_missions INNER JOIN mission_satellites ON space_missions.mission_id \u003d mission_satellites.mission_id INNER JOIN satellites ON mission_satellites.satellite_id \u003d satellites.satellite_id;", + "sql_explanation": "This query performs an inner join on the space_missions and mission_satellites tables, and then performs another inner join with the satellites table. It selects the name columns from both the space_missions and satellites tables." +}, { + "id": "224", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the production volume of each chemical, grouped by day and facility?", + "sql_context": "CREATE TABLE Chemical(Id INT, Name VARCHAR(50)); CREATE TABLE Production(Id INT, ChemicalId INT, FacilityId INT, ProductionDate DATE, QuantityProduced INT);", + "sql": "SELECT c.Name, f.Name AS FacilityName, DATE_FORMAT(p.ProductionDate, \u0027%Y-%m-%d\u0027) AS Date, SUM(p.QuantityProduced) AS TotalQuantityProduced FROM Production p JOIN Chemical c ON p.ChemicalId \u003d c.Id JOIN Facility f ON p.FacilityId \u003d f.Id GROUP BY c.Name, f.Name, Date;", + "sql_explanation": "This SQL query joins the Chemical and Production tables on the ChemicalId foreign key, and joins the Production and Facility tables on the FacilityId foreign key. It then groups the data by chemical name, facility name, and production date using the DATE_FORMAT function, and calculates the total production volume for each chemical, facility, and day using the SUM function." +}, { + "id": "71", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which ingredients are used in products that have received a safety violation in the past year and are not cruelty-free certified?", + "sql_context": "CREATE TABLE products (product_id INT, product_name TEXT, is_cruelty_free BOOLEAN); CREATE TABLE ingredient_sources (ingredient_id INT, product_id INT, source_country TEXT); CREATE TABLE safety_records (record_id INT, product_id INT, violation_date DATE);", + "sql": "SELECT ingredient_sources.ingredient_id FROM ingredient_sources INNER JOIN products ON ingredient_sources.product_id \u003d products.product_id INNER JOIN safety_records ON products.product_id \u003d safety_records.product_id WHERE safety_records.violation_date \u003e\u003d NOW() - INTERVAL \u00271 year\u0027 AND products.is_cruelty_free \u003d FALSE;", + "sql_explanation": "This query returns the list of ingredients used in products that have received a safety violation in the past year and are not cruelty-free certified. It first joins the ingredient_sources table with the products table on the product_id field. It then joins the resulting table with the safety_records table on the product_id field. The query then filters for rows where the violation date is within the past year and the product is not cruelty-free certified. The result is a table with the ingredient ID for each ingredient used in a product that meets the specified criteria." +}, { + "id": "120", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total cost of ingredients sourced from US-based suppliers for each product?", + "sql_context": "CREATE TABLE ingredients (ingredient_id INT, product_id INT, supplier_id INT, cost DECIMAL(10,2)); CREATE TABLE suppliers (supplier_id INT, supplier_name TEXT, supplier_location TEXT); CREATE TABLE products (product_id INT, product_name TEXT);", + "sql": "SELECT products.product_name, SUM(ingredients.cost) as total_cost FROM ingredients INNER JOIN suppliers ON ingredients.supplier_id \u003d suppliers.supplier_id INNER JOIN products ON ingredients.product_id \u003d products.product_id WHERE suppliers.supplier_location \u003d \u0027US\u0027 GROUP BY products.product_name;", + "sql_explanation": "The SQL query joins the ingredients, suppliers, and products tables on their respective id columns. It filters for rows where the supplier_location column is \u0027US\u0027. Then it groups the results by product_name and calculates the sum of the cost column for each group. The result is a list of product names and their total ingredient costs from US-based suppliers." +}, { + "id": "236", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average rating of skincare products supplied by the USA?", + "sql_context": "CREATE TABLE CosmeticsProducts (ProductID INT, ProductName VARCHAR(50), Category VARCHAR(50)); INSERT INTO CosmeticsProducts (ProductID, ProductName, Category) VALUES (1, \u0027Product A\u0027, \u0027Skin Care\u0027), (2, \u0027Product B\u0027, \u0027Skin Care\u0027), (3, \u0027Product C\u0027, \u0027Makeup\u0027);", + "sql": "SELECT AVG(pr.Rating) as AverageRating FROM ProductReviews pr JOIN Inventory i ON pr.ProductID \u003d i.ProductID JOIN Suppliers s ON i.ProductID \u003d s.ProductID JOIN CosmeticsProducts p ON i.ProductID \u003d p.ProductID WHERE p.Category \u003d \u0027Skin Care\u0027 AND s.Country \u003d \u0027USA\u0027;", + "sql_explanation": "The SQL query calculates the average rating for skincare products supplied by the USA by joining the ProductReviews, Inventory, Suppliers, and CosmeticsProducts tables based on the ProductID, filtering them using the WHERE clause, and grouping them using the GROUP BY clause. Then, it computes the average rating using the AVG function." +}, { + "id": "243", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 3 cruelty-free cosmetic products by sales in the Canadian region for 2022?", + "sql_context": "CREATE TABLE products (product_id INT, product_name VARCHAR(50), is_cruelty_free BOOLEAN, sales FLOAT); INSERT INTO products VALUES (1, \u0027Lipstick\u0027, true, 500.50), (2, \u0027Mascara\u0027, false, 300.00), (3, \u0027Foundation\u0027, true, 700.00); CREATE TABLE regions (region_id INT, region_name VARCHAR(50)); INSERT INTO regions VALUES (1, \u0027Canada\u0027), (2, \u0027USA\u0027); CREATE TABLE time (time_id INT, year INT); INSERT INTO time VALUES (1, 2022);", + "sql": "SELECT p.product_name, p.sales FROM products p INNER JOIN regions r ON p.product_id \u003d r.region_id INNER JOIN time t ON p.product_id \u003d t.time_id WHERE p.is_cruelty_free \u003d true GROUP BY p.product_name, p.sales, r.region_name, t.year ORDER BY p.sales DESC LIMIT 3;", + "sql_explanation": "The SQL query performs an inner join between the products, regions, and time tables based on the product_id, region_id, and time_id respectively. It then filters the results for cruelty-free products and groups them by product name, sales, region name, and year. The query then orders the results by sales in descending order and limits the results to the top 3 products." +}, { + "id": "332", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which cruelty-free certified cosmetic brands source ingredients from Australia?", + "sql_context": "CREATE TABLE BrandIngredientSource (BrandID INT, ProductID INT, IngredientSourceCountry VARCHAR(50)); CREATE TABLE CrueltyFreeCertification (BrandID INT, Certified BIT); INSERT INTO BrandIngredientSource (BrandID, ProductID, IngredientSourceCountry) VALUES (1001, 100, \u0027Australia\u0027), (1002, 101, \u0027Australia\u0027), (1003, 102, \u0027Canada\u0027); INSERT INTO CrueltyFreeCertification (BrandID, Certified) VALUES (1001, 1), (1002, 1), (1003, 0);", + "sql": "SELECT DISTINCT B.BrandName FROM BrandIngredientSource BIS INNER JOIN Brands B ON BIS.BrandID \u003d B.BrandID INNER JOIN CrueltyFreeCertification CFC ON B.BrandID \u003d CFC.BrandID WHERE BIS.IngredientSourceCountry \u003d \u0027Australia\u0027 AND CFC.Certified \u003d 1;", + "sql_explanation": "This query first creates and inserts data into the BrandIngredientSource and CrueltyFreeCertification tables. It then performs an inner join between the BrandIngredientSource, Brands, and CrueltyFreeCertification tables, filtered by the IngredientSourceCountry being Australia and CrueltyFreeCertification being 1 (true). The query then selects the distinct BrandName from the joined table, which returns the list of brands that source ingredients from Australia and have cruelty-free certification." +}, { + "id": "438", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many cosmetic products have been certified as cruelty-free in each region?", + "sql_context": "CREATE TABLE certifications (certification_id INT, product_id INT, certification_date DATE, is_cruelty_free BOOLEAN); INSERT INTO certifications VALUES (1, 1, \u00272022-01-01\u0027, true), (2, 2, \u00272022-02-01\u0027, false), (3, 3, \u00272022-03-01\u0027, true); CREATE TABLE products (product_id INT, product_name VARCHAR(50)); INSERT INTO products VALUES (1, \u0027Lipstick\u0027), (2, \u0027Mascara\u0027), (3, \u0027Foundation\u0027); CREATE TABLE regions (region_id INT, region_name VARCHAR(50)); INSERT INTO regions VALUES (1, \u0027Canada\u0027), (2, \u0027USA\u0027), (3, \u0027Europe\u0027);", + "sql": "SELECT r.region_name, COUNT(c.certification_id) FROM certifications c INNER JOIN products p ON c.product_id \u003d p.product_id INNER JOIN regions r ON p.region_id \u003d r.region_id WHERE c.is_cruelty_free \u003d true GROUP BY r.region_name;", + "sql_explanation": "The SQL query performs an inner join between the certifications, products, and regions tables based on the product_id and region_id respectively. It then filters the results for cruelty-free products and groups them by region name. The query then counts the number of certifications for each region." +}, { + "id": "39", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of professional development workshops attended by teachers in each district, grouped by subject area?", + "sql_context": "CREATE TABLE districts (district_id INT, district_name TEXT); INSERT INTO districts (district_id, district_name) VALUES (1, \u0027Urban\u0027), (2, \u0027Suburban\u0027), (3, \u0027Rural\u0027); CREATE TABLE teachers (teacher_id INT, teacher_name TEXT, district_id INT); INSERT INTO teachers (teacher_id, teacher_name, district_id) VALUES (1, \u0027Ms. Doe\u0027, 1), (2, \u0027Mr. Smith\u0027, 2), (3, \u0027Ms. Johnson\u0027, 3), (4, \u0027Mr. Williams\u0027, 1); CREATE TABLE professional_development (program_id INT, program_name TEXT, program_subject TEXT, teacher_id INT); INSERT INTO professional_development (program_id, program_name, program_subject, teacher_id) VALUES (1, \u0027Python for Educators\u0027, \u0027Computer Science\u0027, 1), (2, \u0027Data Science for Teachers\u0027, \u0027Data Science\u0027, 2), (3, \u0027Inclusive Teaching\u0027, \u0027Diversity \u0026 Inclusion\u0027, 3), (4, \u0027Open Pedagogy\u0027, \u0027Pedagogy\u0027, 4), (5, \u0027Diversity and Inclusion\u0027, \u0027Diversity \u0026 Inclusion\u0027, 1), (6, \u0027Curriculum Design\u0027, \u0027Curriculum\u0027, 2), (7, \u0027Culturally Responsive Teaching\u0027, \u0027Diversity \u0026 Inclusion\u0027, 3), (8, \u0027Project Based Learning\u0027, \u0027Pedagogy\u0027, 4);", + "sql": "SELECT d.district_name, p.program_subject, COUNT(pd.program_id) as num_programs FROM districts d JOIN teachers t ON d.district_id \u003d t.district_id JOIN professional_development pd ON t.teacher_id \u003d pd.teacher_id JOIN (SELECT program_subject FROM professional_development GROUP BY program_subject) p ON pd.program_subject \u003d p.program_subject GROUP BY d.district_name, p.program_subject;", + "sql_explanation": "We perform a join between the districts, teachers, and professional_development table on the district_id and teacher_id field, then group by district_name and program_subject and calculate the number of programs for each group." +}, { + "id": "55", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of hours spent by students in lifelong learning programs in each district?", + "sql_context": "CREATE TABLE students (id INT, district TEXT);CREATE TABLE lifelong_learning_programs (id INT, start_date DATE, end_date DATE);CREATE TABLE program_enrollment (student_id INT, program_id INT, enrollment_date DATE, end_date DATE);", + "sql": "SELECT students.district, SUM(DATEDIFF(\u0027day\u0027, program_enrollment.enrollment_date, program_enrollment.end_date)) as total_hours FROM students INNER JOIN program_enrollment ON students.id \u003d program_enrollment.student_id INNER JOIN lifelong_learning_programs ON program_enrollment.program_id \u003d lifelong_learning_programs.id GROUP BY students.district;", + "sql_explanation": "This query joins the students, program_enrollment, and lifelong_learning_programs tables on their respective foreign keys. It then calculates the total number of hours spent by students in lifelong learning programs in each district by summing the difference between the enrollment and end dates for each program using the DATEDIFF function, and groups the results by student district." +}, { + "id": "60", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of students who have completed a lifelong learning course in \u0027South High\u0027 school?", + "sql_context": "CREATE TABLE students_lifelong_learning (student_id INT, school_id INT, completed_course INT); INSERT INTO students_lifelong_learning VALUES (1, 1, 1); INSERT INTO students_lifelong_learning VALUES (2, 1, 0); INSERT INTO students_lifelong_learning VALUES (3, 2, 1); INSERT INTO students_lifelong_learning VALUES (4, 2, 1); CREATE TABLE school_roster (student_id INT, school_id INT, school_name VARCHAR(255)); INSERT INTO school_roster VALUES (1, 1, \u0027South High\u0027); INSERT INTO school_roster VALUES (2, 1, \u0027South High\u0027); INSERT INTO school_roster VALUES (3, 2, \u0027North Middle\u0027); INSERT INTO school_roster VALUES (4, 2, \u0027North Middle\u0027);", + "sql": "SELECT s.school_name, 100.0 * SUM(CASE WHEN sl.completed_course \u003d 1 THEN 1 ELSE 0 END) / COUNT(sr.student_id) AS completion_percentage FROM school_roster sr INNER JOIN students_lifelong_learning sl ON sr.student_id \u003d sl.student_id INNER JOIN schools s ON sr.school_id \u003d s.school_id WHERE s.school_name \u003d \u0027South High\u0027 GROUP BY s.school_name;", + "sql_explanation": "This query performs an inner join between the school_roster, students_lifelong_learning, and schools tables, using the student_id and school_id as common keys. It then calculates the percentage of students who have completed a lifelong learning course in \u0027South High\u0027 school by using a conditional sum, filtering the results based on school_name, and grouping the results by school_name." +}, { + "id": "188", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many professional development courses have been completed by teachers in each school district, broken down by subject area?", + "sql_context": "CREATE TABLE professional_development_courses (course_id INT, teacher_id INT, course_completion_date DATE, course_subject VARCHAR(50)); CREATE TABLE teachers (teacher_id INT, district_id INT); CREATE TABLE school_districts (district_id INT, district_name VARCHAR(50));", + "sql": "SELECT sd.district_name, pdc.course_subject, COUNT(pdc.course_id) FROM professional_development_courses pdc INNER JOIN teachers t ON pdc.teacher_id \u003d t.teacher_id INNER JOIN school_districts sd ON t.district_id \u003d sd.district_id GROUP BY sd.district_name, pdc.course_subject;", + "sql_explanation": "This query calculates the number of professional development courses completed by teachers in each school district, broken down by subject area, by joining the professional_development_courses, teachers, and school_districts tables on the teacher_id and district_id columns. It then groups the results by the district_name and course_subject columns using the GROUP BY clause, and calculates the count of course_id for each group using the COUNT() function." +}, { + "id": "70", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the earliest year an artwork was exhibited for artists from Japan?", + "sql_context": "CREATE TABLE Artists (ArtistID INT PRIMARY KEY, Name VARCHAR(255), Nationality VARCHAR(255)); CREATE TABLE Artworks (ArtworkID INT PRIMARY KEY, Title VARCHAR(255), ArtistID INT, Year INT); CREATE TABLE Exhibitions (ExhibitionID INT PRIMARY KEY, Name VARCHAR(255), StartDate DATE, EndDate DATE); CREATE TABLE ExhibitionArtworks (ExhibitionID INT, ArtworkID INT);", + "sql": "SELECT MIN(Artworks.Year) AS EarliestYear FROM Artists INNER JOIN Artworks ON Artists.ArtistID \u003d Artworks.ArtistID INNER JOIN ExhibitionArtworks ON Artworks.ArtworkID \u003d ExhibitionArtworks.ArtworkID INNER JOIN Exhibitions ON ExhibitionArtworks.ExhibitionID \u003d Exhibitions.ExhibitionID WHERE Artists.Nationality \u003d \u0027Japanese\u0027;", + "sql_explanation": "This query calculates the earliest year an artwork was exhibited for artists from Japan. The Artists table is inner joined with the Artworks table to connect artists to their artworks. Then, the result is inner joined with the ExhibitionArtworks table to connect artworks to their exhibitions. The result is then inner joined with the Exhibitions table to include exhibition information, such as start date. The MIN function calculates the earliest year an artwork was exhibited for artists from Japan." +}, { + "id": "135", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue generated by the Louvre from Impressionist paintings between 1950 and 1970?", + "sql_context": "CREATE TABLE Artworks (artwork_id INT, name VARCHAR(255), artist_id INT, date_sold DATE, price DECIMAL(10,2), museum_id INT); CREATE TABLE Artists (artist_id INT, name VARCHAR(255), nationality VARCHAR(255), gender VARCHAR(255)); CREATE TABLE Museums (museum_id INT, name VARCHAR(255));", + "sql": "SELECT SUM(Artworks.price) FROM Artworks INNER JOIN Artists ON Artworks.artist_id \u003d Artists.artist_id INNER JOIN Museums ON Artworks.museum_id \u003d Museums.museum_id WHERE Artists.nationality \u003d \u0027Impressionist\u0027 AND Museums.name \u003d \u0027The Louvre\u0027 AND YEAR(Artworks.date_sold) BETWEEN 1950 AND 1970;", + "sql_explanation": "This query calculates the total revenue generated by the Louvre from Impressionist paintings between 1950 and 1970. It first joins the Artworks, Artists, and Museums tables on the relevant fields. Then, it filters the results to only include rows where the artist\u0027s nationality is \u0027Impressionist\u0027, the museum name is \u0027The Louvre\u0027, and the year of the date sold is between 1950 and 1970. Finally, it calculates the sum of the price field for the selected rows." +}, { + "id": "322", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all artworks that were exhibited in more than one gallery, along with the names of the galleries and the total number of exhibitions.", + "sql_context": "CREATE TABLE Artworks (id INT, title VARCHAR(50)); CREATE TABLE Exhibitions (id INT, artwork_id INT, gallery_id INT); CREATE TABLE Galleries (id INT, name VARCHAR(30));", + "sql": "SELECT a.title, GROUP_CONCAT(g.name) as gallery_names, COUNT(DISTINCT e.gallery_id) as num_exhibitions FROM Artworks a JOIN Exhibitions e ON a.id \u003d e.artwork_id JOIN Galleries g ON e.gallery_id \u003d g.id GROUP BY a.title HAVING num_exhibitions \u003e 1;", + "sql_explanation": "This query lists all artworks that were exhibited in more than one gallery. It joins the Artworks, Exhibitions, and Galleries tables on their respective IDs, groups the data by artwork title, concatenates the names of the galleries where each artwork was exhibited, and counts the distinct number of exhibitions for each artwork. The HAVING clause is used to filter the results and only return artworks exhibited in more than one gallery." +}, { + "id": "1685", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all collaborative artworks along with the names of their creators.", + "sql_context": "CREATE TABLE artists (id INT, name VARCHAR(50), gender VARCHAR(50));CREATE TABLE collaborations (id INT, artwork VARCHAR(50), artist1_id INT, artist2_id INT); INSERT INTO artists (id, name, gender) VALUES (1, \u0027Banksy\u0027, \u0027male\u0027), (2, \u0027Invader\u0027, \u0027male\u0027); INSERT INTO collaborations (id, artwork, artist1_id, artist2_id) VALUES (1, \u0027Artwork X\u0027, 1, 2);", + "sql": "SELECT c.artwork, a1.name, a2.name FROM collaborations c INNER JOIN artists a1 ON c.artist1_id \u003d a1.id INNER JOIN artists a2 ON c.artist2_id \u003d a2.id;", + "sql_explanation": "This SQL query joins the \u0027collaborations\u0027 table with the \u0027artists\u0027 table twice, first for artist1_id and then for artist2_id. It returns all collaborative artworks and their creators\u0027 names." +}, { + "id": "63", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total premium amount and number of policies for policyholders who have a risk assessment score greater than 80 and live in the state of California?", + "sql_context": "CREATE TABLE Policyholders (Id INT PRIMARY KEY, FirstName VARCHAR(50), LastName VARCHAR(50), Age INT, Gender VARCHAR(10), State VARCHAR(50)); CREATE TABLE Policies (Id INT PRIMARY KEY, PolicyholderId INT, PolicyType VARCHAR(50), CoverageAmount DECIMAL(10,2), FOREIGN KEY (PolicyholderId) REFERENCES Policyholders(Id)); CREATE TABLE Underwriting (Id INT PRIMARY KEY, PolicyholderId INT, RiskAssessmentScore INT, Premium DECIMAL(10,2), FOREIGN KEY (PolicyholderId) REFERENCES Policyholders(Id));", + "sql": "SELECT P.State, U.RiskAssessmentScore, SUM(U.Premium) as TotalPremiumAmount, COUNT(P.Id) as NumberOfPolicies FROM Policyholders P JOIN Policies PL ON P.Id \u003d PL.PolicyholderId JOIN Underwriting U ON P.Id \u003d U.PolicyholderId WHERE P.State \u003d \u0027California\u0027 AND U.RiskAssessmentScore \u003e 80 GROUP BY P.State ORDER BY TotalPremiumAmount DESC;", + "sql_explanation": "This query retrieves the state, risk assessment score, total premium amount, and number of policies for policyholders who have a risk assessment score greater than 80 and live in the state of California. It does this by joining the Policyholders, Policies, and Underwriting tables, filtering for policyholders who have a risk assessment score greater than 80 and live in California, and then grouping the results by state. Finally, it calculates the total premium amount and the number of policies, and orders the results by total premium amount in descending order." +}, { + "id": "18", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total donation amount and number of volunteers for each program category in H1 2023, and how many categories had no activity?", + "sql_context": "CREATE TABLE ProgramCategories (ProgramCategoryID INT, ProgramName TEXT, Category TEXT); INSERT INTO ProgramCategories (ProgramCategoryID, ProgramName, Category) VALUES (1, \u0027Kids Education\u0027, \u0027Education\u0027), (2, \u0027Medical Research\u0027, \u0027Health\u0027); CREATE TABLE ProgramDonations (ProgramCategoryID INT, DonationAmount DECIMAL(10,2), DonationDate DATE); INSERT INTO ProgramDonations (ProgramCategoryID, DonationAmount, DonationDate) VALUES (1, 250.00, \u00272023-01-15\u0027), (2, 450.00, \u00272023-03-30\u0027); CREATE TABLE ProgramVolunteers (ProgramCategoryID INT, VolunteerCount INT); INSERT INTO ProgramVolunteers (ProgramCategoryID, VolunteerCount) VALUES (1, 15), (2, 25);", + "sql": "SELECT Category, SUM(DonationAmount) as TotalDonation, SUM(VolunteerCount) as NumVolunteers FROM ProgramCategories JOIN ProgramDonations ON ProgramCategories.ProgramCategoryID \u003d ProgramDonations.ProgramCategoryID JOIN ProgramVolunteers ON ProgramCategories.ProgramCategoryID \u003d ProgramVolunteers.ProgramCategoryID WHERE DonationDate BETWEEN \u00272023-01-01\u0027 AND \u00272023-06-30\u0027 GROUP BY Category HAVING SUM(DonationAmount) \u003e 0 AND SUM(VolunteerCount) \u003e 0;", + "sql_explanation": "The SQL query calculates the total donation amount and number of volunteers for each program category, filtering by the date range of H1 2023, and excludes categories with no activity." +}, { + "id": "83", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average donation amount per volunteer by program category?", + "sql_context": "CREATE TABLE volunteers (volunteer_id INT, org_id INT);CREATE TABLE programs (program_id INT, program_category_id INT);CREATE TABLE donations (donation_id INT, donor_id INT, program_id INT, donation_amount DECIMAL(10,2)); INSERT INTO volunteers (volunteer_id, org_id) VALUES (1, 1), (2, 1), (3, 2), (4, 2), (5, 3), (6, 3), (7, 3); INSERT INTO programs (program_id, program_category_id) VALUES (1, 1), (2, 1), (3, 2), (4, 3); INSERT INTO donations (donation_id, donor_id, program_id, donation_amount) VALUES (1, 1, 1, 500.00), (2, 2, 1, 750.00), (3, 3, 2, 300.00), (4, 4, 3, 400.00), (5, 5, 3, 600.00), (6, 1, 1, 800.00), (7, 2, 2, 500.00);", + "sql": "SELECT pc.program_category_name, AVG(d.donation_amount) as avg_donation_amount_per_volunteer FROM programs p JOIN donations d ON p.program_id \u003d d.program_id JOIN volunteers v ON p.org_id \u003d v.org_id JOIN program_categories pc ON p.program_category_id \u003d pc.program_category_id GROUP BY pc.program_category_name;", + "sql_explanation": "This query performs a series of joins between the volunteers, programs, donations, and program_categories tables on the appropriate columns. It calculates the average donation amount per volunteer by program category by grouping the results by program category name. The query then returns the average donation amount per volunteer for each program category." +}, { + "id": "581", + "domain": "archeology", + "domain_description": "Detailed records on excavation sites, artifact analysis, historical context, and public outreach in archeology.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the most common artifact types at each excavation site?", + "sql_context": "CREATE TABLE ArtifactTypes (TypeID INT, ArtifactID INT, ArtifactType TEXT); INSERT INTO ArtifactTypes (TypeID, ArtifactID, ArtifactType) VALUES (1, 1, \u0027Pottery\u0027); INSERT INTO ArtifactTypes (TypeID, ArtifactID, ArtifactType) VALUES (2, 2, \u0027Jewelry\u0027); INSERT INTO ArtifactTypes (TypeID, ArtifactID, ArtifactType) VALUES (3, 3, \u0027Furniture\u0027); INSERT INTO ArtifactTypes (TypeID, ArtifactID, ArtifactType) VALUES (4, 4, \u0027Statues\u0027);", + "sql": "SELECT e.SiteName, t.ArtifactType, COUNT(*) AS Count FROM ExcavationSites e JOIN ArtifactAnalysis a ON e.SiteID \u003d a.SiteID JOIN ArtifactTypes t ON a.ArtifactID \u003d t.ArtifactID GROUP BY e.SiteName, t.ArtifactType;", + "sql_explanation": "This query first joins the ExcavationSites, ArtifactAnalysis, and ArtifactTypes tables. Then, it groups the data by the SiteName and ArtifactType, and calculates the count of each combination." +}, { + "id": "589", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which countries have adopted electric vehicles the most?", + "sql_context": "CREATE TABLE Country (id INT, name TEXT); CREATE TABLE Vehicle (id INT, name TEXT, country_id INT); CREATE TABLE ElectricVehicle (id INT, vehicle_id INT); INSERT INTO Country (id, name) VALUES (1, \u0027USA\u0027), (2, \u0027China\u0027), (3, \u0027Germany\u0027); INSERT INTO Vehicle (id, name, country_id) VALUES (1, \u0027Model S\u0027, 1), (2, \u0027Camry\u0027, 1), (3, \u0027Tesla Semi\u0027, 2); INSERT INTO ElectricVehicle (id, vehicle_id) VALUES (1, 1), (2, 3);", + "sql": "SELECT Country.name, COUNT(*) FROM Country INNER JOIN Vehicle ON Country.id \u003d Vehicle.country_id INNER JOIN ElectricVehicle ON Vehicle.id \u003d ElectricVehicle.vehicle_id GROUP BY Country.id ORDER BY COUNT(*) DESC;", + "sql_explanation": "This query lists the countries with the most electric vehicle adoptions by joining the Country, Vehicle, and ElectricVehicle tables, and then using the COUNT function to count the number of electric vehicles per country, and finally ordering the results by the count in descending order." +}, { + "id": "609", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all digital assets and their respective smart contract names in the Gaming category.", + "sql_context": "CREATE TABLE Asset_Smart_Contracts (id INT PRIMARY KEY, digital_asset_id INT, smart_contract_id INT, FOREIGN KEY (digital_asset_id) REFERENCES Digital_Assets(id), FOREIGN KEY (smart_contract_id) REFERENCES Smart_Contracts(id)); INSERT INTO Asset_Smart_Contracts (id, digital_asset_id, smart_contract_id) VALUES (1, 1, 2); INSERT INTO Asset_Smart_Contracts (id, digital_asset_id, smart_contract_id) VALUES (2, 2, 4);", + "sql": "SELECT da.name, sc.name FROM Digital_Assets da INNER JOIN Asset_Smart_Contracts asc ON da.id \u003d asc.digital_asset_id INNER JOIN Smart_Contracts sc ON asc.smart_contract_id \u003d sc.id WHERE sc.category \u003d \u0027Gaming\u0027;", + "sql_explanation": "This query performs a 3-way join between Digital_Assets, Asset_Smart_Contracts, and Smart_Contracts tables, filters for Gaming category, and returns the digital asset names and their respective smart contract names." +}, { + "id": "758", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the names of users and the assets they own that are subject to any regulation.", + "sql_context": "CREATE TABLE user_assets (user_id INT, asset_name VARCHAR(255)); INSERT INTO user_assets (user_id, asset_name) VALUES (1, \u0027Asset1\u0027), (2, \u0027Asset2\u0027), (3, \u0027Asset3\u0027);", + "sql": "SELECT u.name, d.name FROM users u INNER JOIN user_assets ua ON u.id \u003d ua.user_id INNER JOIN digital_assets d ON ua.asset_name \u003d d.name INNER JOIN regulatory_frameworks r ON d.name \u003d r.asset_name;", + "sql_explanation": "This query performs a join between the \u0027users\u0027, \u0027user_assets\u0027, \u0027digital_assets\u0027, and \u0027regulatory_frameworks\u0027 tables. The selected columns are the user\u0027s name and the digital asset\u0027s name." +}, { + "id": "178", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average salary of government employees in Canada, broken down by province and job category?", + "sql_context": "CREATE TABLE provinces (id INT, name VARCHAR(255)); INSERT INTO provinces (id, name) VALUES (1, \u0027Ontario\u0027), (2, \u0027Quebec\u0027), (3, \u0027British Columbia\u0027); CREATE TABLE job_categories (id INT, name VARCHAR(255)); INSERT INTO job_categories (id, name) VALUES (1, \u0027Management\u0027), (2, \u0027Administration\u0027), (3, \u0027Engineering\u0027); CREATE TABLE employees (id INT, province_id INT, job_category_id INT, salary INT); INSERT INTO employees (id, province_id, job_category_id, salary) VALUES (1, 1, 1, 80000), (2, 1, 2, 50000), (3, 2, 1, 85000), (4, 2, 3, 60000);", + "sql": "SELECT provinces.name, job_categories.name, AVG(employees.salary) AS avg_salary FROM employees INNER JOIN provinces ON employees.province_id \u003d provinces.id INNER JOIN job_categories ON employees.job_category_id \u003d job_categories.id GROUP BY provinces.name, job_categories.name;", + "sql_explanation": "This SQL query first performs a double inner join between the employees table, the provinces table on the province_id and id columns, and the job_categories table on the job_category_id and id columns. Then, it groups the results by the provinces.name and job_categories.name columns. Finally, it calculates the average of the employees.salary column to find the average salary of government employees in each province and job category." +}, { + "id": "739", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average outcome value for \u0027HealthImprovement\u0027 in \u0027USA\u0027?", + "sql_context": "CREATE TABLE Agency (id INT, Name VARCHAR(255), Sector VARCHAR(255), Country VARCHAR(255)); INSERT INTO Agency (id, Name, Sector, Country) VALUES (1, \u0027AgencyX\u0027, \u0027Environment\u0027, \u0027USA\u0027); INSERT INTO Agency (id, Name, Sector, Country) VALUES (2, \u0027AgencyY\u0027, \u0027Healthcare\u0027, \u0027Canada\u0027); CREATE TABLE Project (id INT, Agency_id INT, Name VARCHAR(255), Country VARCHAR(255)); INSERT INTO Project (id, Agency_id, Name, Country) VALUES (1, 1, \u0027ProjectA\u0027, \u0027USA\u0027); INSERT INTO Project (id, Agency_id, Name, Country) VALUES (2, 2, \u0027ProjectB\u0027, \u0027Canada\u0027); CREATE TABLE Outcome (id INT, Project_id INT, Metric VARCHAR(255), Value FLOAT); INSERT INTO Outcome (id, Project_id, Metric, Value) VALUES (1, 1, \u0027EmissionReduction\u0027, 10000); INSERT INTO Outcome (id, Project_id, Metric, Value) VALUES (2, 2, \u0027HealthImprovement\u0027, 500);", + "sql": "SELECT AVG(Value) FROM Agency INNER JOIN Project ON Agency.id \u003d Project.Agency_id INNER JOIN Outcome ON Project.id \u003d Outcome.Project_id WHERE Agency.Country \u003d \u0027USA\u0027 AND Metric \u003d \u0027HealthImprovement\u0027;", + "sql_explanation": "Retrieve the average outcome value for HealthImprovement in the USA." +}, { + "id": "87", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many hours have volunteers from Japan contributed to educational programs in the past 6 months?", + "sql_context": "CREATE TABLE volunteer_hours (id INT, volunteer_id INT, program_id INT, hours DECIMAL(10,2), contribution_date DATE); CREATE TABLE volunteers (id INT, country VARCHAR(50)); CREATE TABLE programs (id INT, focus_area VARCHAR(50)); INSERT INTO volunteer_hours (id, volunteer_id, program_id, hours, contribution_date) VALUES (1, 1, 1, 2.0, \u00272021-03-15\u0027); INSERT INTO volunteers (id, country) VALUES (1, \u0027Japan\u0027); INSERT INTO programs (id, focus_area) VALUES (1, \u0027Tutor Kids\u0027);", + "sql": "SELECT SUM(hours) FROM volunteer_hours JOIN volunteers ON volunteer_hours.volunteer_id \u003d volunteers.id JOIN programs ON volunteer_hours.program_id \u003d programs.id WHERE volunteers.country \u003d \u0027Japan\u0027 AND programs.focus_area \u003d \u0027educational programs\u0027 AND contribution_date \u003e\u003d DATE_SUB(CURDATE(), INTERVAL 6 MONTH);", + "sql_explanation": "This query calculates the total hours contributed by volunteers from Japan to educational programs in the past 6 months by summing the hours field. It filters hours based on the country, focus area, and contribution date using a JOIN clause and date comparison, which is not shown here." +}, { + "id": "118", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which programs have the highest and lowest number of volunteers?", + "sql_context": "CREATE TABLE programs (id INT, name VARCHAR(255)); INSERT INTO programs (id, name) VALUES (1, \u0027Education\u0027), (2, \u0027Health\u0027), (3, \u0027Environment\u0027); CREATE TABLE volunteers (id INT, name VARCHAR(255), program_id INT);", + "sql": "SELECT p.name, COUNT(*) as num_volunteers FROM programs p JOIN volunteers v ON p.id \u003d v.program_id GROUP BY p.id ORDER BY num_volunteers DESC LIMIT 1; SELECT p.name, COUNT(*) as num_volunteers FROM programs p JOIN volunteers v ON p.id \u003d v.program_id GROUP BY p.id ORDER BY num_volunteers LIMIT 1;", + "sql_explanation": "These queries find the program with the highest and lowest number of volunteers, respectively. The first query orders the programs by the number of volunteers in descending order and selects the first one, while the second query orders the programs by the number of volunteers in ascending order and selects the first one." +}, { + "id": "198", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many volunteers have signed up for each program?", + "sql_context": "CREATE TABLE Volunteers (VolunteerID INT, Name TEXT); INSERT INTO Volunteers (VolunteerID, Name) VALUES (1, \u0027Alice\u0027), (2, \u0027Bob\u0027), (3, \u0027Charlie\u0027); CREATE TABLE VolunteerPrograms (VolunteerID INT, ProgramID INT); INSERT INTO VolunteerPrograms (VolunteerID, ProgramID) VALUES (1, 1), (2, 1), (3, 2);", + "sql": "SELECT Programs.ProgramName, COUNT(VolunteerPrograms.VolunteerID) FROM VolunteerPrograms INNER JOIN Volunteers ON VolunteerPrograms.VolunteerID \u003d Volunteers.VolunteerID INNER JOIN Programs ON VolunteerPrograms.ProgramID \u003d Programs.ProgramID GROUP BY Programs.ProgramName;", + "sql_explanation": "This query counts the number of volunteers for each program by using INNER JOIN to combine data from VolunteerPrograms, Volunteers, and Programs tables. The COUNT function is applied to the VolunteerID column, and the GROUP BY statement is used to group records based on the ProgramName column." +}, { + "id": "622", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the average donation amount for each program category?", + "sql_context": "CREATE TABLE ProgramCategories (CategoryID INT, Category VARCHAR(20)); INSERT INTO ProgramCategories (CategoryID, Category) VALUES (1, \u0027Education\u0027), (2, \u0027Health\u0027), (3, \u0027Environment\u0027); CREATE TABLE Programs (ProgramID INT, CategoryID INT, ProgramName VARCHAR(50)); INSERT INTO Programs (ProgramID, CategoryID, ProgramName) VALUES (100, 1, \u0027Youth Education\u0027), (200, 2, \u0027Health Awareness\u0027), (300, 3, \u0027Environmental Cleanup\u0027); CREATE TABLE Donations (DonationID INT, ProgramID INT, DonationAmount DECIMAL(10,2)); INSERT INTO Donations (DonationID, ProgramID, DonationAmount) VALUES (1, 100, 50.00), (2, 100, 100.00), (3, 200, 200.00), (4, 200, 300.00), (5, 300, 400.00), (6, 300, 500.00), (7, 300, 600.00), (8, 300, 700.00);", + "sql": "SELECT pc.Category, AVG(d.DonationAmount) AS AverageDonationAmount FROM ProgramCategories pc JOIN Programs p ON pc.CategoryID \u003d p.CategoryID JOIN Donations d ON p.ProgramID \u003d d.ProgramID GROUP BY pc.Category;", + "sql_explanation": "This query calculates the average donation amount for each program category. It starts by joining the ProgramCategories, Programs, and Donations tables based on the relationships between them. Then, it groups the results by Category and calculates the AVG of DonationAmount for each group." +}, { + "id": "686", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the total number of volunteers for each program category?", + "sql_context": "CREATE TABLE ProgramCategories (CategoryID INT, Category VARCHAR(20)); INSERT INTO ProgramCategories (CategoryID, Category) VALUES (1, \u0027Education\u0027), (2, \u0027Health\u0027), (3, \u0027Environment\u0027); CREATE TABLE Programs (ProgramID INT, CategoryID INT, ProgramName VARCHAR(50)); INSERT INTO Programs (ProgramID, CategoryID, ProgramName) VALUES (100, 1, \u0027Youth Education\u0027), (200, 2, \u0027Health Awareness\u0027), (300, 3, \u0027Environmental Cleanup\u0027); CREATE TABLE Volunteers (VolunteerID INT, ProgramID INT, VolunteerName VARCHAR(50)); INSERT INTO Volunteers (VolunteerID, ProgramID, VolunteerName) VALUES (1001, 100, \u0027Alex\u0027), (1002, 100, \u0027Bella\u0027), (2001, 200, \u0027Charlie\u0027), (3001, 300, \u0027David\u0027), (3002, 300, \u0027Ella\u0027);", + "sql": "SELECT pc.Category, COUNT(v.VolunteerID) AS TotalVolunteers FROM ProgramCategories pc JOIN Programs p ON pc.CategoryID \u003d p.CategoryID JOIN Volunteers v ON p.ProgramID \u003d v.ProgramID GROUP BY pc.Category;", + "sql_explanation": "This query calculates the total number of volunteers for each program category. It starts by joining the ProgramCategories, Programs, and Volunteers tables based on the relationships between them. Then, it groups the results by Category and calculates the COUNT of VolunteerID for each group." +}, { + "id": "323", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total funding received by organizations focusing on technology for social good in Asia?", + "sql_context": "CREATE TABLE contributor (contributor_id INT, contributor_name VARCHAR(255), amount DECIMAL(10,2)); INSERT INTO contributor (contributor_id, contributor_name, amount) VALUES (1, \u0027Tech for Good Foundation\u0027, 600000), (2, \u0027AI Ethics Alliance\u0027, 450000), (3, \u0027Digital Responsibility Fund\u0027, 500000), (4, \u0027Inclusive AI Coalition\u0027, 300000), (5, \u0027Social Tech Fund Asia\u0027, 800000); CREATE TABLE contributor_project (contributor_id INT, project_id INT); INSERT INTO contributor_project (contributor_id, project_id) VALUES (1, 1), (2, 1), (3, 1), (4, 2), (5, 3); CREATE TABLE project (project_id INT, project_name VARCHAR(255), location VARCHAR(50)); INSERT INTO project (project_id, project_name, location) VALUES (1, \u0027Ethical AI Development\u0027, \u0027North America\u0027), (2, \u0027Accessible Tech Education\u0027, \u0027Asia\u0027), (3, \u0027Digital Inclusion Program\u0027, \u0027Africa\u0027);", + "sql": "SELECT SUM(amount) as total_funding FROM contributor JOIN contributor_project ON contributor.contributor_id \u003d contributor_project.contributor_id JOIN project ON contributor_project.project_id \u003d project.project_id WHERE project.location \u003d \u0027Asia\u0027;", + "sql_explanation": "The SQL query calculates the total funding received by organizations focusing on technology for social good in Asia. It joins the contributor, contributor_project, and project tables and filters the records based on the project location. It then calculates the sum of the amount for the filtered records." +}, { + "id": "268", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total crop yield for each crop grown in each province in Argentina?", + "sql_context": "CREATE TABLE provinces (id INT, name TEXT, country TEXT); INSERT INTO provinces (id, name, country) VALUES (1, \u0027Buenos Aires\u0027, \u0027Argentina\u0027), (2, \u0027Cordoba\u0027, \u0027Argentina\u0027);", + "sql": "SELECT crops.name, provinces.name as province_name, SUM(crop_yield.yield) FROM crop_yield JOIN farms ON crop_yield.farm_id \u003d farms.id JOIN crops ON crop_yield.crop_id \u003d crops.id JOIN provinces ON farms.id \u003d provinces.id GROUP BY crops.name, provinces.name;", + "sql_explanation": "This SQL query calculates the total crop yield for each crop grown in each province in Argentina by joining the crop_yield, farms, crops, and provinces tables on the farm_id and id columns. It then calculates the sum of the yield column for each name and provinces.name in the crops and provinces tables using the GROUP BY clause." +}, { + "id": "92", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the owners with more than one property in sustainable neighborhoods?", + "sql_context": "CREATE TABLE Neighborhood (id INT, name VARCHAR(255), city VARCHAR(255), country VARCHAR(255), sustainable_rating FLOAT); CREATE TABLE Property (id INT, neighborhood VARCHAR(255), price FLOAT); CREATE TABLE Owner (id INT, name VARCHAR(255), email VARCHAR(255), property_id INT);", + "sql": "SELECT Owner.name, COUNT(DISTINCT Property.id) as property_count FROM Owner LEFT JOIN Property ON Owner.id \u003d Property.property_id LEFT JOIN Neighborhood ON Property.neighborhood \u003d Neighborhood.name WHERE Neighborhood.sustainable_rating IS NOT NULL GROUP BY Owner.name HAVING COUNT(DISTINCT Property.id) \u003e 1", + "sql_explanation": "This query retrieves the names of owners who have more than one property in sustainable neighborhoods. It performs a LEFT JOIN between the Owner, Property, and Neighborhood tables, filters the results to only show properties located in sustainable neighborhoods, groups them by owner name, and uses the HAVING clause to only include owners with more than one distinct property in sustainable neighborhoods." +}, { + "id": "936", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average sqft of co-owned properties in Queens?", + "sql_context": "CREATE TABLE borough (borough_id INT, name VARCHAR(255)); INSERT INTO borough (borough_id, name) VALUES (1, \u0027Manhattan\u0027), (2, \u0027Brooklyn\u0027), (3, \u0027Queens\u0027), (4, \u0027Bronx\u0027), (5, \u0027Staten Island\u0027); CREATE TABLE property (property_id INT, sqft INT, borough_id INT); INSERT INTO property (property_id, sqft, borough_id) VALUES (1, 1000, 1), (2, 1200, 2), (3, 1500, 3), (4, 800, 1), (5, 1100, 3); CREATE TABLE co_ownership (co_ownership_id INT, property_id INT); INSERT INTO co_ownership (co_ownership_id, property_id) VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5);", + "sql": "SELECT AVG(p.sqft) as avg_sqft FROM property p INNER JOIN borough b ON p.borough_id \u003d b.borough_id INNER JOIN co_ownership co ON p.property_id \u003d co.property_id WHERE b.name \u003d \u0027Queens\u0027;", + "sql_explanation": "This SQL query joins the \u0027property\u0027, \u0027borough\u0027, and \u0027co_ownership\u0027 tables. It then filters the results to only include properties in Queens and calculates the average sqft for the co-owned properties in that borough." +}, { + "id": "376", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which artists have performed at Jazzville during 2020?", + "sql_context": "CREATE TABLE Artists (ArtistID int, ArtistName varchar(100)); INSERT INTO Artists (ArtistID, ArtistName) VALUES (1, \u0027John Coltrane\u0027), (2, \u0027Miles Davis\u0027); CREATE TABLE Venues (VenueID int, VenueName varchar(100)); INSERT INTO Venues (VenueID, VenueName) VALUES (1, \u0027Jazzville\u0027); CREATE TABLE Performances (PerformanceID int, ArtistID int, VenueID int, PerformanceDate date); INSERT INTO Performances (PerformanceID, ArtistID, VenueID, PerformanceDate) VALUES (1, 1, 1, \u00272020-01-01\u0027), (2, 2, 1, \u00272019-12-31\u0027);", + "sql": "SELECT Artists.ArtistName FROM Artists INNER JOIN Performances ON Artists.ArtistID \u003d Performances.ArtistID INNER JOIN Venues ON Performances.VenueID \u003d Venues.VenueID WHERE Venues.VenueName \u003d \u0027Jazzville\u0027 AND YEAR(PerformanceDate) \u003d 2020;", + "sql_explanation": "This query first joins the Artists, Performances, and Venues tables based on their respective IDs. Then, it filters the results by VenueName and the YEAR of PerformanceDate, returning the names of artists who performed at Jazzville in 2020." +}, { + "id": "709", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum number of artworks created by artists who have works in the \u0027MoMA\u0027 museum?", + "sql_context": "CREATE TABLE artist_museums (artist_id INT, museum_name TEXT); INSERT INTO artist_museums (artist_id, museum_name) VALUES (1, \u0027MoMA\u0027), (2, \u0027Met\u0027), (3, \u0027Tate\u0027), (4, \u0027MoMA\u0027), (5, \u0027Tate\u0027); CREATE TABLE artworks (id INT, artist_id INT, title TEXT, museum_id INT); INSERT INTO artworks (id, artist_id, title, museum_id) VALUES (1, 1, \u0027Dora Maar au Chat\u0027, 1), (2, 2, \u0027Red Painting\u0027, NULL), (3, 3, \u0027Untitled\u0027, 3), (4, 4, \u0027The Persistence of Memory\u0027, 1), (5, 5, \u0027Composition with Red Blue and Yellow\u0027, NULL); CREATE TABLE museums (id INT, name TEXT); INSERT INTO museums (id, name) VALUES (1, \u0027MoMA\u0027), (2, \u0027Met\u0027), (3, \u0027Tate\u0027); CREATE TABLE museum_artworks (museum_id INT, artwork_id INT); INSERT INTO museum_artworks (museum_id, artwork_id) VALUES (1, 1), (1, 4), (3, 3);", + "sql": "SELECT MIN(artworks.id) FROM artworks JOIN artist_museums ON artworks.artist_id \u003d artist_museums.artist_id JOIN museum_artworks ON artworks.id \u003d museum_artworks.artwork_id WHERE museums.name \u003d \u0027MoMA\u0027;", + "sql_explanation": "Find the minimum number of artworks created by artists who have works in the \u0027MoMA\u0027 museum using the MIN function, JOINs, and the WHERE clause." +}, { + "id": "863", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many artworks were created by female artists in Spain?", + "sql_context": "CREATE TABLE Artists (id INT, name VARCHAR(50), gender VARCHAR(6)); INSERT INTO Artists (id, name, gender) VALUES (1, \u0027Picasso\u0027, \u0027male\u0027), (2, \u0027Dali\u0027, \u0027male\u0027), (3, \u0027MirÃŗ\u0027, \u0027male\u0027), (4, \u0027Gaudí\u0027, \u0027male\u0027), (5, \u0027Majano\u0027, \u0027female\u0027); CREATE TABLE Artworks (id INT, artist_id INT, title VARCHAR(50)); INSERT INTO Artworks (id, artist_id, title) VALUES (1, 1, \u0027Guernica\u0027), (2, 3, \u0027The Farm\u0027), (3, 5, \u0027The Garden\u0027); CREATE TABLE Countries (id INT, name VARCHAR(20)); INSERT INTO Countries (id, name) VALUES (1, \u0027Spain\u0027);", + "sql": "SELECT COUNT(*) FROM Artworks JOIN Artists ON Artworks.artist_id \u003d Artists.id JOIN Countries ON Artists.country \u003d Countries.name WHERE Artists.gender \u003d \u0027female\u0027 AND Countries.name \u003d \u0027Spain\u0027;", + "sql_explanation": "Count the number of artworks created by female artists in Spain by joining Artworks, Artists, and Countries tables, filtering rows with artist gender female and country Spain." +}, { + "id": "445", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average contract amount for plumbing projects managed by contractors from California?", + "sql_context": "CREATE TABLE Contractors (Id INT, Name VARCHAR(50), LicenseNumber VARCHAR(50), City VARCHAR(50), State VARCHAR(2), Specialty VARCHAR(50)); CREATE TABLE ContractorProjects (ContractorId INT, ProjectId INT, ContractStartDate DATE, ContractEndDate DATE, ContractAmount DECIMAL(10,2)); CREATE TABLE Projects (Id INT, Name VARCHAR(50), City VARCHAR(50), StartDate DATE, EndDate DATE, Sustainable BOOLEAN);", + "sql": "SELECT AVG(cp.ContractAmount) FROM ContractorProjects cp JOIN Contractors c ON cp.ContractorId \u003d c.Id JOIN Projects p ON cp.ProjectId \u003d p.Id WHERE c.State \u003d \u0027CA\u0027 AND c.LicenseNumber IS NOT NULL AND p.Specialty \u003d \u0027Plumbing\u0027;", + "sql_explanation": "This query calculates the average contract amount for plumbing projects managed by contractors from California." +}, { + "id": "212", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which organic suppliers have never provided ingredients to restaurants located in a coastal city?", + "sql_context": "CREATE TABLE suppliers (supplier_id INT, organic BOOLEAN); CREATE TABLE ingredients (ingredient_id INT, supplier_id INT, restaurant_id INT, is_organic BOOLEAN); CREATE TABLE restaurants (restaurant_id INT, city VARCHAR(255)); INSERT INTO suppliers VALUES (1, true); INSERT INTO suppliers VALUES (2, false); INSERT INTO ingredients VALUES (1, 1, 1, true); INSERT INTO ingredients VALUES (2, 1, 2, false); INSERT INTO ingredients VALUES (3, 2, 3, false); INSERT INTO restaurants VALUES (1, \u0027Miami\u0027); INSERT INTO restaurants VALUES (2, \u0027Atlanta\u0027); INSERT INTO restaurants VALUES (3, \u0027Phoenix\u0027);", + "sql": "SELECT s.supplier_id FROM suppliers s LEFT JOIN ingredients i ON s.supplier_id \u003d i.supplier_id RIGHT JOIN restaurants r ON i.restaurant_id \u003d r.restaurant_id WHERE s.organic \u003d true AND r.city NOT LIKE \u0027%coast%\u0027 GROUP BY s.supplier_id HAVING COUNT(i.ingredient_id) \u003d 0;", + "sql_explanation": "This query performs a left join between the suppliers and ingredients tables and a right join between the ingredients and restaurants tables, allowing it to identify organic suppliers who have not provided ingredients to coastal restaurants. It then filters the data to only include organic suppliers and restaurants that are not located in a coastal city. Finally, it groups the data by supplier and filters the results to only include suppliers who have not provided any ingredients to coastal restaurants." +}, { + "id": "359", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total revenue for \u0027Organic Vegan\u0027 menu category", + "sql_context": "CREATE TABLE restaurants (id INT, name VARCHAR(50), category VARCHAR(50)); INSERT INTO restaurants (id, name, category) VALUES (1, \u0027Vegan Haven\u0027, \u0027Organic Vegan\u0027); CREATE TABLE menu_items (id INT, name VARCHAR(50), category VARCHAR(50), price DECIMAL(5,2)); INSERT INTO menu_items (id, name, category, price) VALUES (101, \u0027Quinoa Salad\u0027, \u0027Organic Vegan\u0027, 12.99), (102, \u0027Tofu Stir Fry\u0027, \u0027Organic Vegan\u0027, 15.50); CREATE TABLE orders (id INT, menu_item_id INT, quantity INT, order_date DATE); INSERT INTO orders (id, menu_item_id, quantity, order_date) VALUES (1001, 101, 2, \u00272021-08-01\u0027), (1002, 102, 1, \u00272021-08-02\u0027), (1003, 101, 3, \u00272021-08-03\u0027);", + "sql": "SELECT SUM(menu_items.price * orders.quantity) AS total_revenue FROM orders JOIN menu_items ON orders.menu_item_id \u003d menu_items.id JOIN restaurants ON menu_items.category \u003d restaurants.category WHERE restaurants.category \u003d \u0027Organic Vegan\u0027;", + "sql_explanation": "The query calculates the total revenue generated by the \u0027Organic Vegan\u0027 menu category. It first joins the \u0027orders\u0027, \u0027menu_items\u0027, and \u0027restaurants\u0027 tables based on their relationships. It then filters the records where the category is \u0027Organic Vegan\u0027. Finally, it calculates the revenue by summing the product of menu_item prices and their corresponding order quantities." +}, { + "id": "683", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the vendors with a sustainability score above 80 that supply to restaurants in a specific city (e.g., \u0027New York\u0027).", + "sql_context": "CREATE TABLE Restaurants (id INT, name VARCHAR(255), city VARCHAR(255), revenue FLOAT); CREATE TABLE Menu (id INT, rest_id INT, item VARCHAR(255), price FLOAT); CREATE TABLE Vendors (id INT, name VARCHAR(255), sustainability_score INT); CREATE TABLE Menu_Vendors (menu_id INT, vendor_id INT);", + "sql": "SELECT V.name FROM Vendors V JOIN Menu_Vendors MV ON V.id \u003d MV.vendor_id JOIN Menu M ON MV.menu_id \u003d M.id JOIN Restaurants R ON M.rest_id \u003d R.id WHERE R.city \u003d \u0027New York\u0027 AND V.sustainability_score \u003e 80;", + "sql_explanation": "The SQL query identifies the vendors with a sustainability score above 80 that supply to restaurants in a specific city. It joins the Vendors, Menu_Vendors, Menu, and Restaurants tables on their respective id fields and filters the results by city and sustainability score." +}, { + "id": "155", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount donated by each donor to projects located in Africa, in descending order?", + "sql_context": "CREATE TABLE donors (donor_id INT, name TEXT, total_donation FLOAT);CREATE TABLE projects (project_id INT, name TEXT, location TEXT);CREATE TABLE donations (donation_id INT, donor_id INT, project_id INT, amount FLOAT);INSERT INTO donors VALUES (1, \u0027John Doe\u0027, 5000.00), (2, \u0027Jane Smith\u0027, 7000.00);INSERT INTO projects VALUES (1, \u0027Water Wells\u0027, \u0027Africa\u0027), (2, \u0027Education Center\u0027, \u0027Asia\u0027);INSERT INTO donations VALUES (1, 1, 1, 1000.00), (2, 1, 2, 2000.00), (3, 2, 1, 3000.00), (4, 2, 2, 4000.00);", + "sql": "SELECT d.name, SUM(donations.amount) as total_donated_africa FROM donors d INNER JOIN donations ON d.donor_id \u003d donations.donor_id INNER JOIN projects ON donations.project_id \u003d projects.project_id WHERE projects.location \u003d \u0027Africa\u0027 GROUP BY d.name ORDER BY total_donated_africa DESC;", + "sql_explanation": "The SQL query performs an inner join between the donors, donations, and projects tables, filtering records where the projects table\u0027s location is \u0027Africa\u0027. It then groups the records by the donors table\u0027s name and calculates the sum of the donations table\u0027s amount for each group. Finally, it orders the resulting table in descending order based on the total donated to Africa." +}, { + "id": "240", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total donation amount per organization located in South America for donors aged between 30 and 50?", + "sql_context": "CREATE TABLE Donors (DonorID INT PRIMARY KEY, Age INT, Gender VARCHAR(50), Location VARCHAR(50));", + "sql": "SELECT O.OrgName, SUM(D.DonationAmount) as TotalDonationAmount FROM Donors D INNER JOIN Donations DON ON D.DonorID \u003d DON.DonorID INNER JOIN Organizations O ON DON.OrgID \u003d O.OrgID WHERE D.Age BETWEEN 30 AND 50 AND O.Location \u003d \u0027South America\u0027 GROUP BY O.OrgName;", + "sql_explanation": "The SQL query calculates the total donation amount per organization located in South America for donors aged between 30 and 50 by joining Donors, Donations, and Organizations table, filtering on Age and Location column, and applying the SUM function on DonationAmount column, grouping by OrgName." +}, { + "id": "267", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many donations have been made to organizations focused on education by donors from the US and Canada in the last 12 months?", + "sql_context": "CREATE TABLE donors (id INT, name VARCHAR(255), country VARCHAR(255));CREATE TABLE donations (id INT, donor_id INT, cause_id INT, amount DECIMAL(10, 2), donation_date DATE);CREATE TABLE causes (id INT, name VARCHAR(255), category VARCHAR(255));CREATE VIEW v_us_canada AS SELECT \u0027US\u0027 AS country UNION ALL SELECT \u0027Canada\u0027;", + "sql": "SELECT COUNT(*) FROM donations d INNER JOIN donors dn ON d.donor_id \u003d dn.id INNER JOIN causes c ON d.cause_id \u003d c.id INNER JOIN v_us_canada v ON dn.country \u003d v.country WHERE d.donation_date \u003e\u003d DATE_SUB(NOW(), INTERVAL 12 MONTH) AND c.category \u003d \u0027education\u0027;", + "sql_explanation": "Join the donors, donations, and causes tables, along with the v_us_canada view, filter the data for the last 12 months and organizations focused on education, and calculate the total number of donations made by donors from the US and Canada." +}, { + "id": "314", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average donation amount for donors from Nigeria, grouped by cause?", + "sql_context": "CREATE TABLE donors (donor_id INT, donor_name TEXT, donor_country TEXT); INSERT INTO donors (donor_id, donor_name, donor_country) VALUES (1, \u0027John Doe\u0027, \u0027Nigeria\u0027), (2, \u0027Jane Smith\u0027, \u0027USA\u0027), (3, \u0027Alice Johnson\u0027, \u0027Canada\u0027); CREATE TABLE donations (donation_id INT, donor_id INT, cause TEXT, amount DECIMAL(10,2)); INSERT INTO donations (donation_id, donor_id, cause, amount) VALUES (1, 1, \u0027Education\u0027, 500.00), (2, 1, \u0027Health\u0027, 300.00), (3, 2, \u0027Environment\u0027, 750.00), (4, 2, \u0027Education\u0027, 250.00), (5, 3, \u0027Health\u0027, 600.00); CREATE TABLE causes (cause_id INT, cause TEXT); INSERT INTO causes (cause_id, cause) VALUES (1, \u0027Education\u0027), (2, \u0027Health\u0027), (3, \u0027Environment\u0027);", + "sql": "SELECT d.donor_country, c.cause, AVG(donations.amount) as avg_donation FROM donors d JOIN donations ON d.donor_id \u003d donations.donor_id JOIN causes c ON donations.cause \u003d c.cause WHERE d.donor_country \u003d \u0027Nigeria\u0027 GROUP BY d.donor_country, c.cause;", + "sql_explanation": "This query calculates the average donation amount for donors from Nigeria, grouped by cause. It joins the \u0027donors\u0027, \u0027donations\u0027, and \u0027causes\u0027 tables, then filters for donors from Nigeria and groups the results by the country, cause, and calculates the average donation amount for each group." +}, { + "id": "427", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total amount donated to organizations focused on climate change by donors from the US?", + "sql_context": "CREATE TABLE donors (id INT, country VARCHAR(255)); INSERT INTO donors (id, country) VALUES (1, \u0027United States\u0027); INSERT INTO donors (id, country) VALUES (2, \u0027Canada\u0027); CREATE TABLE donations (id INT, donor_id INT, organization_id INT, amount DECIMAL(10,2), donation_date DATE); INSERT INTO donations (id, donor_id, organization_id, amount, donation_date) VALUES (1, 1, 3, 5000, \u00272021-06-15\u0027); CREATE TABLE organizations (id INT, name VARCHAR(255), focus VARCHAR(255)); INSERT INTO organizations (id, name, focus) VALUES (3, \u0027Climate Foundation\u0027, \u0027Climate Change\u0027);", + "sql": "SELECT SUM(amount) FROM donations JOIN donors ON donations.donor_id \u003d donors.id JOIN organizations ON donations.organization_id \u003d organizations.id WHERE donors.country \u003d \u0027United States\u0027 AND organizations.focus \u003d \u0027Climate Change\u0027;", + "sql_explanation": "The SQL query first joins the donations table with the donors table based on the donor_id foreign key. Next, it joins the organizations table with the donations table based on the organization_id foreign key. This allows us to connect donors, donations, and organizations. The query then filters the results to only include donations made by donors from the United States to organizations focused on climate change. Finally, it calculates the sum of the amount values for these donations." +}, { + "id": "461", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average donation amount for donors aged 18-24 who have donated to organizations focused on global health?", + "sql_context": "CREATE TABLE donors (id INT, age INT, name VARCHAR(255)); INSERT INTO donors (id, age, name) VALUES (1, 19, \u0027Global Health Donor\u0027); CREATE TABLE donations (id INT, donor_id INT, organization_id INT, amount DECIMAL(10,2), donation_date DATE); INSERT INTO donations (id, donor_id, organization_id, amount, donation_date) VALUES (1, 1, 3, 5000, \u00272021-06-15\u0027); CREATE TABLE organizations (id INT, name VARCHAR(255), focus VARCHAR(255)); INSERT INTO organizations (id, name, focus) VALUES (3, \u0027Global Health Initiative\u0027, \u0027Global Health\u0027);", + "sql": "SELECT AVG(amount) FROM donations JOIN donors ON donations.donor_id \u003d donors.id JOIN organizations ON donations.organization_id \u003d organizations.id WHERE donors.age BETWEEN 18 AND 24 AND organizations.focus \u003d \u0027Global Health\u0027;", + "sql_explanation": "The SQL query first joins the donations table with the donors table based on the donor_id foreign key. Next, it joins the organizations table with the donations table based on the organization_id foreign key. This allows us to connect donors, donations, and organizations. The query then filters the results to only include donations made by donors aged between 18 and 24 to organizations focused on global health. Finally, it calculates the average of the amount values for these donations." +}, { + "id": "570", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the donors and their total donation amounts for the \u0027Climate Change\u0027 cause, joining the donors, donations, and causes tables.", + "sql_context": "CREATE TABLE donors (id INT, name VARCHAR(255)); INSERT INTO donors (id, name) VALUES (1, \u0027John Doe\u0027), (2, \u0027Jane Smith\u0027), (3, \u0027Alice Johnson\u0027); CREATE TABLE donations (id INT, donor_id INT, cause_id INT, amount DECIMAL(10, 2)); INSERT INTO donations (id, donor_id, cause_id, amount) VALUES (1, 1, 1, 500), (2, 1, 2, 250), (3, 2, 2, 750), (4, 3, 1, 1000); CREATE TABLE causes (id INT, name VARCHAR(255)); INSERT INTO causes (id, name) VALUES (1, \u0027Climate Change\u0027), (2, \u0027Human Rights\u0027);", + "sql": "SELECT d.name, SUM(donations.amount) as total_donation FROM donors d JOIN donations ON d.id \u003d donations.donor_id JOIN causes ON donations.cause_id \u003d causes.id WHERE causes.name \u003d \u0027Climate Change\u0027 GROUP BY d.name;", + "sql_explanation": "Join the donors, donations, and causes tables, filter for the \u0027Climate Change\u0027 cause, and display the donors and their total donation amounts." +}, { + "id": "1393", + "domain": "philanthropy", + "domain_description": "Philanthropy data on effective altruism, impact investing, philanthropic trends, and donor demographics.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the sum of donations for each cause?", + "sql_context": "CREATE TABLE Donors (DonorID int, DonorName text); INSERT INTO Donors VALUES (1, \u0027John Doe\u0027); INSERT INTO Donors VALUES (2, \u0027Jane Smith\u0027);CREATE TABLE Donations (DonationID int, DonorID int, DonationAmount numeric, Cause text); INSERT INTO Donations VALUES (1, 1, 500, \u0027Education\u0027); INSERT INTO Donations VALUES (2, 2, 1000, \u0027Health\u0027);CREATE TABLE Causes (CauseID int, Cause text); INSERT INTO Causes VALUES (1, \u0027Education\u0027); INSERT INTO Causes VALUES (2, \u0027Health\u0027);", + "sql": "SELECT C.Cause, SUM(D.DonationAmount) FROM Donors D INNER JOIN Donations DD ON D.DonorID \u003d DD.DonorID INNER JOIN Causes C ON DD.Cause \u003d C.Cause GROUP BY C.Cause;", + "sql_explanation": "This query calculates the sum of donations for each cause by joining the Donors, Donations, and Causes tables on DonorID and Cause and grouping by cause." +}, { + "id": "127", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average distance from rural hospitals to the nearest clinic in each state?", + "sql_context": "CREATE TABLE hospitals (id INT, name TEXT, location TEXT, state TEXT); INSERT INTO hospitals (id, name, location, state) VALUES (1, \u0027Hospital A\u0027, \u0027Rural Texas\u0027, \u0027Texas\u0027), (2, \u0027Hospital B\u0027, \u0027Rural California\u0027, \u0027California\u0027); CREATE TABLE clinics (id INT, name TEXT, location TEXT, state TEXT); INSERT INTO clinics (id, name, location, state) VALUES (1, \u0027Clinic A\u0027, \u0027Rural Texas\u0027, \u0027Texas\u0027), (2, \u0027Clinic B\u0027, \u0027Rural California\u0027, \u0027California\u0027); CREATE TABLE distance (hospital_id INT, clinic_id INT, distance FLOAT); INSERT INTO distance (hospital_id, clinic_id, distance) VALUES (1, 1, 15.0), (1, 2, 20.0), (2, 1, 25.0), (2, 2, 30.0);", + "sql": "SELECT h.state, AVG(d.distance) AS avg_distance FROM hospitals h INNER JOIN distance d ON h.id \u003d d.hospital_id INNER JOIN (SELECT hospital_id, MIN(distance) AS min_distance FROM distance GROUP BY hospital_id) m ON d.hospital_id \u003d m.hospital_id AND d.distance \u003d m.min_distance GROUP BY h.state;", + "sql_explanation": "Joins hospitals, distance, and a subquery on the distance column to calculate the average distance from rural hospitals to the nearest clinic in each state." +}, { + "id": "395", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of rural hospitals and clinics in each region, and the number of clinics within a 30-km radius of each hospital?", + "sql_context": "CREATE TABLE hospitals (id INT, name TEXT, location TEXT, num_beds INT, region TEXT); INSERT INTO hospitals (id, name, location, num_beds, region) VALUES (1, \u0027Hospital A\u0027, \u0027Rural Andalusia\u0027, 200, \u0027Andalusia\u0027), (2, \u0027Hospital B\u0027, \u0027Rural Madrid\u0027, 250, \u0027Madrid\u0027); CREATE TABLE clinics (id INT, name TEXT, location TEXT, num_beds INT, region TEXT); INSERT INTO clinics (id, name, location, num_beds, region) VALUES (1, \u0027Clinic A\u0027, \u0027Rural Andalusia\u0027, 50, \u0027Andalusia\u0027), (2, \u0027Clinic B\u0027, \u0027Rural Madrid\u0027, 75, \u0027Madrid\u0027); CREATE TABLE distance (hospital_id INT, clinic_id INT, distance FLOAT); INSERT INTO distance (hospital_id, clinic_id, distance) VALUES (1, 1, 25.0), (1, 2, 30.0), (2, 1, 35.0), (2, 2, 40.0);", + "sql": "SELECT h.region, COUNT(h.id) AS num_hospitals, COUNT(c.id) AS num_clinics_within_30_km FROM hospitals h INNER JOIN distance d ON h.id \u003d d.hospital_id INNER JOIN clinics c ON d.clinic_id \u003d c.id WHERE d.distance \u003c\u003d 30 GROUP BY h.region;", + "sql_explanation": "Joins hospitals, distance, and clinics tables on the id column and calculates the number of rural hospitals and clinics within a 30-km radius for each region." +}, { + "id": "335", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List posts with hashtags related to \u0027food\u0027 and \u0027restaurants\u0027 in descending order of likes, excluding posts with less than 10 likes.", + "sql_context": "CREATE TABLE users (id INT, name VARCHAR(255), page_name VARCHAR(255)); CREATE TABLE posts (id INT, user_id INT, page_name VARCHAR(255), content TEXT); CREATE TABLE likes (id INT, user_id INT, post_id INT); CREATE TABLE hashtags (id INT, post_id INT, tag VARCHAR(255));", + "sql": "SELECT DISTINCT posts.id, posts.content FROM posts JOIN hashtags ON posts.id \u003d hashtags.post_id JOIN likes ON posts.id \u003d likes.post_id WHERE hashtags.tag IN (\u0027food\u0027, \u0027restaurants\u0027) GROUP BY posts.id HAVING COUNT(*) \u003e 10 ORDER BY COUNT(*) DESC;", + "sql_explanation": "Join the posts, hashtags, and likes tables, filtering for hashtags \u0027food\u0027 and \u0027restaurants\u0027. Then, group by post ID and filter for posts with more than 10 likes. Finally, select distinct post IDs and content in descending order of likes." +}, { + "id": "347", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find users who have never posted about #food and have a private profile.", + "sql_context": "CREATE TABLE users (id INT, name VARCHAR(50), gender VARCHAR(10), public_profile BOOLEAN); INSERT INTO users (id, name, gender, public_profile) VALUES (1, \u0027Anna\u0027, \u0027Female\u0027, TRUE); INSERT INTO users (id, name, gender, public_profile) VALUES (2, \u0027Beto\u0027, \u0027Male\u0027, FALSE); INSERT INTO users (id, name, gender, public_profile) VALUES (3, \u0027Carla\u0027, \u0027Female\u0027, TRUE); CREATE TABLE posts (id INT, user_id INT, content TEXT, timestamp TIMESTAMP); INSERT INTO posts (id, user_id, content, timestamp) VALUES (1, 1, \u0027I love #food\u0027, \u00272021-06-15 10:00:00\u0027); INSERT INTO posts (id, user_id, content, timestamp) VALUES (2, 3, \u0027Reading a great book right now\u0027, \u00272021-06-20 15:30:00\u0027); CREATE TABLE post_topics (post_id INT, topic_id INT); INSERT INTO post_topics (post_id, topic_id) VALUES (1, 1); INSERT INTO post_topics (post_id, topic_id) VALUES (2, 3); CREATE TABLE topics (id INT, name VARCHAR(50)); INSERT INTO topics (id, name) VALUES (1, \u0027#food\u0027); INSERT INTO topics (id, name) VALUES (2, \u0027#books\u0027);", + "sql": "SELECT users.name FROM users LEFT JOIN posts ON users.id \u003d posts.user_id LEFT JOIN post_topics ON posts.id \u003d post_topics.post_id LEFT JOIN topics ON post_topics.topic_id \u003d topics.id WHERE users.public_profile \u003d FALSE AND topics.name IS NULL;", + "sql_explanation": "This SQL query joins the \u0027users\u0027, \u0027posts\u0027, \u0027post_topics\u0027, and \u0027topics\u0027 tables, filters the results to show only users with a private profile who have never posted about #food, and returns their names." +}, { + "id": "649", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of posts with the hashtag \"#music\" for users from the UK in the \"user_posts\" and \"post_hashtags\" tables?", + "sql_context": "CREATE TABLE user_posts (post_id INT, user_id INT, hashtags VARCHAR(255)); INSERT INTO user_posts (post_id, user_id) VALUES (1, 1), (2, 2), (3, 3); CREATE TABLE post_hashtags (post_id INT, hashtags VARCHAR(255)); INSERT INTO post_hashtags (post_id, hashtags) VALUES (1, \u0027#music\u0027), (1, \u0027#food\u0027), (2, \u0027#nature\u0027), (3, \u0027#music\u0027), (3, \u0027#travel\u0027); CREATE TABLE user_profiles (id INT, country VARCHAR(255)); INSERT INTO user_profiles (id, country) VALUES (1, \u0027UK\u0027), (2, \u0027USA\u0027), (3, \u0027Canada\u0027);", + "sql": "SELECT COUNT(DISTINCT up.post_id) FROM user_posts up JOIN post_hashtags ph ON up.post_id \u003d ph.post_id JOIN user_profiles upr ON up.user_id \u003d upr.id WHERE ph.hashtags LIKE \u0027%#music%\u0027 AND upr.country \u003d \u0027UK\u0027;", + "sql_explanation": "This query finds the total number of posts with the hashtag \"#music\" for users from the UK in the \"user_posts\" and \"post_hashtags\" tables. It does this by joining the three tables on the post_id and user_id and then filtering for posts with the hashtag \"#music\" and users from the UK. The query then counts the number of distinct post_ids that meet this criteria." +}, { + "id": "3", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify unions in New York with the highest increase in wage increases in collective bargaining contracts compared to the previous contract.", + "sql_context": " CREATE TABLE UnionNegotiations (id INT PRIMARY KEY, union_id INT, negotiation_date DATE); CREATE TABLE CollectiveBargaining (id INT PRIMARY KEY, union_id INT, contract_start DATE, contract_end DATE, wage_increase DECIMAL(5,2)); CREATE TABLE UnionMembers (id INT PRIMARY KEY, name VARCHAR(50), state VARCHAR(2), union_id INT, FOREIGN KEY (union_id) REFERENCES UnionNegotiations(union_id)); ", + "sql": " SELECT u.name, u.state, c.wage_increase, c.contract_end, (SELECT wage_increase FROM CollectiveBargaining cb WHERE cb.contract_end \u003c c.contract_end AND cb.union_id \u003d c.union_id ORDER BY contract_end DESC LIMIT 1) AS previous_wage_increase FROM UnionMembers u JOIN UnionNegotiations n ON u.union_id \u003d n.union_id JOIN CollectiveBargaining c ON u.union_id \u003d c.union_id WHERE u.state \u003d \u0027NY\u0027 ORDER BY c.wage_increase - (SELECT wage_increase FROM CollectiveBargaining cb WHERE cb.contract_end \u003c c.contract_end AND cb.union_id \u003d c.union_id ORDER BY contract_end DESC LIMIT 1) DESC LIMIT 10; ", + "sql_explanation": "This query joins the UnionMembers, UnionNegotiations, and CollectiveBargaining tables and uses correlated subqueries to calculate the wage increase between two consecutive collective bargaining contracts. It filters data based on union locations and orders the results in descending order of wage increases. The query limits the output to the top 10 results. " +}, { + "id": "724", + "domain": "food industry", + "domain_description": "Food safety records, nutrition data, supply chain transparency, and sustainable food trends.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total cost of fair trade coffee products in the last 6 months?", + "sql_context": "CREATE TABLE Dates (date DATE); CREATE TABLE Sales (sale_id INT, date DATE, product_id INT, quantity INT, cost INT); CREATE TABLE Products (product_id INT, product_name VARCHAR(255), is_fair_trade BOOLEAN);", + "sql": "SELECT SUM(s.cost) as total_cost FROM Sales s JOIN Dates d ON s.date \u003d d.date JOIN Products p ON s.product_id \u003d p.product_id WHERE d.date \u003e\u003d DATE(NOW()) - INTERVAL 6 MONTH AND p.is_fair_trade \u003d TRUE;", + "sql_explanation": "This query joins the Sales, Dates, and Products tables on their respective fields. It filters for sales in the last 6 months by subtracting 6 months from the current date. Then, it filters for fair trade products by using the is_fair_trade column in the Products table. Finally, it calculates the sum of costs for the matching rows." +}, { + "id": "142", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 3 most ordered vegetarian dishes in each country.", + "sql_context": "CREATE TABLE Customers (CustomerID INT, Name TEXT, Country TEXT, IsVegetarian BOOLEAN); CREATE TABLE Orders (OrderID INT, MenuItemID INT, CustomerID INT, Quantity INT); CREATE TABLE MenuItems (MenuItemID INT, Name TEXT, IsVegetarian BOOLEAN, Country TEXT);", + "sql": "SELECT Country, Name, SUM(Quantity) AS TotalOrdered FROM Customers INNER JOIN Orders ON Customers.CustomerID \u003d Orders.CustomerID INNER JOIN MenuItems ON Orders.MenuItemID \u003d MenuItems.MenuItemID WHERE IsVegetarian \u003d TRUE GROUP BY Country, Name ORDER BY Country, TotalOrdered DESC LIMIT 3;", + "sql_explanation": "This query identifies the top 3 most ordered vegetarian dishes in each country by joining the \u0027Customers\u0027, \u0027Orders\u0027, and \u0027MenuItems\u0027 tables, filtering for vegetarian dishes, grouping by \u0027Country\u0027 and \u0027Name\u0027, and ordering by \u0027Country\u0027 and the descending \u0027TotalOrdered\u0027 sum. The \u0027LIMIT 3\u0027 clause ensures only the top 3 results are returned for each \u0027Country\u0027." +}, { + "id": "185", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which city has the highest sales for item \u0027Burger\u0027?", + "sql_context": "CREATE TABLE sales_data (sale_id INT, location_id INT, item_id INT, quantity_sold INT, sale_date DATE); INSERT INTO sales_data (sale_id, location_id, item_id, quantity_sold, sale_date) VALUES (1, 1, 1, 50, \u00272021-05-01\u0027), (2, 2, 2, 90, \u00272021-05-02\u0027);", + "sql": "SELECT location_id, SUM(quantity_sold) as total_sold FROM sales_data JOIN locations ON sales_data.location_id \u003d locations.location_id JOIN menu_items ON sales_data.item_id \u003d menu_items.item_id WHERE item_name \u003d \u0027Burger\u0027 GROUP BY location_id ORDER BY total_sold DESC LIMIT 1;", + "sql_explanation": "Determines the city with the highest sales for item \u0027Burger\u0027 by joining the sales_data, locations, and menu_items tables, then filtering for the item_name \u0027Burger\u0027, and calculating the total quantity sold for each location. The result is then ordered in descending order by total_sold, and the location_id with the highest sales is returned." +}, { + "id": "195", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which ingredients are used in more than two vegan dishes?", + "sql_context": "CREATE TABLE ingredients (id INT, name VARCHAR(255)); INSERT INTO ingredients (id, name) VALUES (1, \u0027Tomatoes\u0027), (2, \u0027Onions\u0027), (3, \u0027Garlic\u0027), (4, \u0027Cheese\u0027), (5, \u0027Tofu\u0027); CREATE TABLE dishes (id INT, name VARCHAR(255), is_vegan BOOLEAN); INSERT INTO dishes (id, name, is_vegan) VALUES (1, \u0027Pizza Margherita\u0027, false), (2, \u0027Vegan Tacos\u0027, true), (3, \u0027Chana Masala\u0027, true); CREATE TABLE dish_ingredients (dish_id INT, ingredient_id INT); INSERT INTO dish_ingredients (dish_id, ingredient_id) VALUES (1, 1), (1, 2), (1, 3), (2, 2), (2, 3), (2, 5), (3, 2), (3, 3), (3, 5);", + "sql": "SELECT ingredient.name FROM ingredients INNER JOIN dish_ingredients ON ingredients.id \u003d dish_ingredients.ingredient_id INNER JOIN dishes ON dish_ingredients.dish_id \u003d dishes.id WHERE dishes.is_vegan \u003d true GROUP BY ingredient.id HAVING COUNT(dish_ingredients.dish_id) \u003e 2;", + "sql_explanation": "The SQL query joins the ingredients, dish_ingredients, and dishes tables based on their respective IDs, filters for vegan dishes only, and then groups the results by ingredient name. The HAVING clause is used to filter for ingredients that are used in more than two vegan dishes. The COUNT function is used to count the total number of dishes that use each ingredient." +}, { + "id": "425", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the revenue generated from the sale of organic dishes in the last month?", + "sql_context": "CREATE TABLE inventory (item_id INT, quantity INT, unit_price DECIMAL(5,2), is_organic BOOLEAN); INSERT INTO inventory (item_id, quantity, unit_price, is_organic) VALUES (1, 10, 12.99, true), (2, 20, 7.50, false), (3, 30, 9.99, true), (4, 40, 15.49, false), (5, 50, 8.99, true); CREATE TABLE orders (order_id INT, item_id INT, order_date DATE); INSERT INTO orders (order_id, item_id, order_date) VALUES (1, 1, \u00272022-01-01\u0027), (2, 3, \u00272022-01-02\u0027), (3, 2, \u00272022-01-03\u0027), (4, 4, \u00272022-01-04\u0027), (5, 5, \u00272022-01-05\u0027); CREATE TABLE menu_items (item_id INT, name TEXT, is_organic BOOLEAN); INSERT INTO menu_items (item_id, name, is_organic) VALUES (1, \u0027Quinoa Salad\u0027, true), (2, \u0027Beef Burger\u0027, false), (3, \u0027Chickpea Curry\u0027, true), (4, \u0027Cheesecake\u0027, false), (5, \u0027Veggie Pizza\u0027, true);", + "sql": "SELECT SUM(i.unit_price * o.quantity) as revenue FROM inventory i JOIN orders o ON i.item_id \u003d o.item_id JOIN menu_items m ON i.item_id \u003d m.item_id WHERE m.is_organic \u003d true AND o.order_date BETWEEN \u00272022-02-01\u0027 AND \u00272022-02-28\u0027;", + "sql_explanation": "This query joins the inventory, orders, and menu_items tables based on item_id. It then filters the records for organic dishes and orders in the last month. Lastly, it calculates the revenue generated from the sale of organic dishes in the last month by summing the product of unit_price and quantity columns." +}, { + "id": "465", + "domain": "food services", + "domain_description": "Menu engineering, customer preferences, inventory management, and sustainability initiatives.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all customers who have a history of purchasing vegetarian items and their total spending", + "sql_context": "CREATE TABLE customers (customer_id INT, is_vegetarian BOOLEAN); INSERT INTO customers (customer_id, is_vegetarian) VALUES (1, TRUE), (2, FALSE), (3, TRUE); CREATE TABLE orders (order_id INT, customer_id INT, order_total DECIMAL(10,2)); INSERT INTO orders (order_id, customer_id, order_total) VALUES (1, 1, 50.00), (2, 2, 30.00), (3, 1, 60.00); CREATE TABLE order_items (order_id INT, menu_item VARCHAR(255), is_vegetarian BOOLEAN); INSERT INTO order_items (order_id, menu_item, is_vegetarian) VALUES (1, \u0027Veggie Burger\u0027, TRUE), (2, \u0027Steak\u0027, FALSE), (3, \u0027Falafel Wrap\u0027, TRUE);", + "sql": "SELECT C.customer_id, SUM(O.order_total) as total_spending FROM customers C JOIN orders O ON C.customer_id \u003d O.customer_id JOIN order_items OI ON O.order_id \u003d OI.order_id WHERE OI.is_vegetarian \u003d TRUE GROUP BY C.customer_id;", + "sql_explanation": "This query joins the customers, orders, and order_items tables on the customer_id and order_id columns. It then filters for vegetarian orders and calculates the total spending for each customer by grouping and summing the order_total column." +}, { + "id": "31", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the success rate of cognitive behavioral therapy (CBT) for patients with depression in the African American community?", + "sql_context": "CREATE TABLE therapy_approaches (approach_id INT, name VARCHAR(255)); CREATE TABLE patients (patient_id INT, age INT, gender VARCHAR(10), condition VARCHAR(255), ethnicity VARCHAR(255)); CREATE TABLE therapy_sessions (session_id INT, patient_id INT, therapist_id INT, session_date DATE, success BOOLEAN, approach_id INT);", + "sql": "SELECT AVG(CASE WHEN therapy_sessions.success THEN 1 ELSE 0 END) AS success_rate FROM therapy_sessions JOIN patients ON therapy_sessions.patient_id \u003d patients.patient_id JOIN therapy_approaches ON therapy_sessions.approach_id \u003d therapy_approaches.approach_id WHERE patients.condition \u003d \u0027depression\u0027 AND patients.ethnicity \u003d \u0027African American\u0027 AND therapy_approaches.name \u003d \u0027cognitive behavioral therapy\u0027;", + "sql_explanation": "The SQL query calculates the success rate of cognitive behavioral therapy (CBT) for patients with depression in the African American community. It first calculates the number of successful therapy sessions for patients with depression and African American ethnicity who have undergone CBT using a subquery with the WHERE clause. Then, it performs a join between therapy_sessions, patients, and therapy_approaches tables to connect the required data. Finally, it calculates the success rate of CBT for patients with depression in the African American community using the AVG aggregate function." +}, { + "id": "305", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which mental health conditions have the highest success rates for treatment, and how many patients have been treated for each?", + "sql_context": "CREATE TABLE mental_health_conditions (id INT, name VARCHAR(50), prevalence FLOAT); CREATE TABLE treatments (id INT, condition_id INT, name VARCHAR(50), approach VARCHAR(50), success_rate FLOAT); CREATE TABLE patient_outcomes (id INT, treatment_id INT, patient_id INT);", + "sql": "SELECT mhc.name, t.name, COUNT(po.patient_id) as patient_count FROM mental_health_conditions mhc JOIN treatments t ON mhc.id \u003d t.condition_id JOIN patient_outcomes po ON t.id \u003d po.treatment_id GROUP BY mhc.name, t.name ORDER BY t.success_rate DESC;", + "sql_explanation": "We join the mental_health_conditions, treatments, and patient_outcomes tables on the appropriate IDs, and return the name of each mental health condition and treatment, as well as the number of patients treated for each. We order the results by treatment success rate in descending order." +}, { + "id": "279", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of hospitals and clinics in each city?", + "sql_context": "CREATE TABLE cities (city_id INT, city_name VARCHAR(255)); INSERT INTO cities VALUES (1, \u0027CityA\u0027), (2, \u0027CityB\u0027); CREATE TABLE hospitals (hospital_id INT, hospital_name VARCHAR(255), city_id INT); INSERT INTO hospitals VALUES (1, \u0027Hospital1\u0027, 1), (2, \u0027Hospital2\u0027, 2); CREATE TABLE clinics (clinic_id INT, clinic_name VARCHAR(255), city_id INT); INSERT INTO clinics VALUES (1, \u0027Clinic1\u0027, 1), (2, \u0027Clinic2\u0027, 2);", + "sql": "SELECT cities.city_name, COUNT(hospitals.hospital_id) AS hospitals, COUNT(clinics.clinic_id) AS clinics FROM cities LEFT JOIN hospitals ON cities.city_id \u003d hospitals.city_id LEFT JOIN clinics ON cities.city_id \u003d clinics.city_id GROUP BY cities.city_name;", + "sql_explanation": "This query joins the cities, hospitals, and clinics tables on the city_id field. It then groups the results by city name and calculates the number of hospitals and clinics for each city." +}, { + "id": "17", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the response time for the top 5 most common crime types in each community district in the past month?", + "sql_context": "CREATE TABLE community_districts (cd_number INT, community_name VARCHAR(255)); INSERT INTO community_districts (cd_number, community_name) VALUES (1, \u0027Manhattan 1\u0027), (2, \u0027Manhattan 2\u0027), (3, \u0027Manhattan 3\u0027); CREATE TABLE crime_data (crime_date DATE, cd_number INT, crime_type VARCHAR(255), response_time INT);", + "sql": "SELECT cd.community_name, ct.crime_type, AVG(cd.response_time) as avg_response_time FROM community_districts cd JOIN crime_data cd ON cd.cd_number \u003d cd.cd_number JOIN (SELECT crime_type, cd_number, COUNT(*) as count FROM crime_data WHERE crime_date \u003e\u003d CURDATE() - INTERVAL 1 MONTH GROUP BY crime_type, cd_number ORDER BY count DESC LIMIT 5) ct ON cd.cd_number \u003d ct.cd_number WHERE cd.crime_date \u003e\u003d CURDATE() - INTERVAL 1 MONTH GROUP BY cd.community_name, ct.crime_type;", + "sql_explanation": "This query calculates the response time for the top 5 most common crime types in each community district in the past month by joining the community_districts table with the crime_data table based on the cd_number. It then filters the crimes to only those committed in the past month, groups the crimes by crime type and community district, and selects the top 5 crime types based on their frequency. Finally, it calculates the average response time for each crime type in each community district." +}, { + "id": "227", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average response time for emergency calls and crime reports in each district?", + "sql_context": "CREATE TABLE districts (did INT, name VARCHAR(50), PRIMARY KEY(did)); CREATE TABLE emergencies (eid INT, did INT, responded_at TIMESTAMP, PRIMARY KEY(eid), FOREIGN KEY (did) REFERENCES districts(did)); CREATE TABLE crimes (cid INT, did INT, committed_at TIMESTAMP, PRIMARY KEY(cid), FOREIGN KEY (did) REFERENCES districts(did));", + "sql": "SELECT d.name, AVG(IF(e.eid IS NOT NULL, TIMESTAMPDIFF(MINUTE, e.responded_at, NOW()), TIMESTAMPDIFF(MINUTE, c.committed_at, NOW()))) as avg_response_time FROM districts d LEFT JOIN emergencies e ON d.did \u003d e.did LEFT JOIN crimes c ON d.did \u003d c.did GROUP BY d.name;", + "sql_explanation": "We join the \u0027districts\u0027, \u0027emergencies\u0027, and \u0027crimes\u0027 tables based on the district ID using a LEFT JOIN. Then, we calculate the average response time for each district by finding the difference between the response time or committed time and the current time, converting it to minutes using TIMESTAMPDIFF, and averaging the results for each district." +}, { + "id": "375", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of unique customers who purchased size 18 garments in Brazil?", + "sql_context": "CREATE TABLE Customers (id INT, customerID INT, country VARCHAR(50)); INSERT INTO Customers (id, customerID, country) VALUES (1, 4001, \u0027Brazil\u0027), (2, 4002, \u0027Argentina\u0027), (3, 4003, \u0027Brazil\u0027), (4, 4004, \u0027Chile\u0027); CREATE TABLE Sales (id INT, customerID INT, garmentID INT, quantity INT, saleDate DATE); INSERT INTO Sales (id, customerID, garmentID, quantity, saleDate) VALUES (1, 4001, 405, 1, \u00272021-01-10\u0027), (2, 4002, 406, 2, \u00272021-02-15\u0027), (3, 4003, 407, 1, \u00272021-03-20\u0027), (4, 4004, 408, 3, \u00272021-04-12\u0027); CREATE TABLE Garments (id INT, garmentID INT, size INT, country VARCHAR(50)); INSERT INTO Garments (id, garmentID, size, country) VALUES (1, 405, 18, \u0027Brazil\u0027), (2, 406, 12, \u0027Argentina\u0027), (3, 407, 10, \u0027Chile\u0027), (4, 408, 16, \u0027Brazil\u0027);", + "sql": "SELECT COUNT(DISTINCT Customers.customerID) FROM Customers INNER JOIN Sales ON Customers.customerID \u003d Sales.customerID INNER JOIN Garments ON Sales.garmentID \u003d Garments.garmentID WHERE Garments.size \u003d 18 AND Customers.country \u003d \u0027Brazil\u0027;", + "sql_explanation": "This query calculates the number of unique customers who purchased size 18 garments in Brazil by selecting the customerID from the Customers table and joining it with the Sales and Garments tables on the customerID and garmentID columns, respectively. It then filters the results to include only size 18 garments purchased in Brazil and counts the number of distinct customerIDs." +}, { + "id": "544", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average rating for sustainable garments in each customer segment?", + "sql_context": "CREATE TABLE Customers (CustomerID INT, CustomerSegment TEXT); CREATE TABLE GarmentRatings (RatingID INT, CustomerID INT, GarmentID INT, Rating DECIMAL); CREATE TABLE Garments (GarmentID INT, GarmentName TEXT, IsSustainable BOOLEAN); INSERT INTO Customers VALUES (1, \u0027Plus Size\u0027), (2, \u0027Petite\u0027), (3, \u0027Standard\u0027); INSERT INTO GarmentRatings VALUES (1, 1, 1, 4.5), (2, 2, 2, 3.5), (3, 3, 3, 4.0); INSERT INTO Garments VALUES (1, \u0027Garment1\u0027, TRUE), (2, \u0027Garment2\u0027, FALSE), (3, \u0027Garment3\u0027, TRUE);", + "sql": "SELECT c.CustomerSegment, AVG(gr.Rating) FROM Customers c JOIN GarmentRatings gr ON c.CustomerID \u003d gr.CustomerID JOIN Garments g ON gr.GarmentID \u003d g.GarmentID WHERE g.IsSustainable \u003d TRUE GROUP BY c.CustomerSegment;", + "sql_explanation": "The SQL query calculates the average rating for sustainable garments in each customer segment by joining Customers, GarmentRatings, and Garments tables based on their relationships and filtering records based on the IsSustainable column. The query then groups the records based on the CustomerSegment column and computes the average rating for sustainable garments for each customer segment." +}, { + "id": "603", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average price of recycled polyester clothing manufactured in size S?", + "sql_context": "CREATE TABLE textile_sources (source_id INT, material VARCHAR(50)); INSERT INTO textile_sources (source_id, material) VALUES (1, \u0027Recycled Polyester\u0027); CREATE TABLE garment_sizes (size_id INT, size VARCHAR(10)); INSERT INTO garment_sizes (size_id, size) VALUES (1, \u0027S\u0027); CREATE TABLE products (product_id INT, name VARCHAR(50), price DECIMAL(5, 2), source_id INT, size_id INT); INSERT INTO products (product_id, name, price, source_id, size_id) VALUES (1, \u0027Recycled Polyester Top\u0027, 30.99, 1, 1);", + "sql": "SELECT AVG(p.price) FROM products p INNER JOIN textile_sources ts ON p.source_id \u003d ts.source_id INNER JOIN garment_sizes gs ON p.size_id \u003d gs.size_id WHERE ts.material \u003d \u0027Recycled Polyester\u0027 AND gs.size \u003d \u0027S\u0027;", + "sql_explanation": "The query first joins the products, textile_sources, and garment_sizes tables based on their respective IDs. It then filters the results for recycled polyester clothing manufactured in size S. Lastly, it calculates the average price of these products." +}, { + "id": "1355", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many customers have made purchases for both regular and sustainable fashion items?", + "sql_context": "CREATE TABLE customers (id INT, name TEXT); CREATE TABLE orders (id INT, customer_id INT, item_id INT, order_value DECIMAL, is_sustainable BOOLEAN); CREATE TABLE items (id INT, name TEXT, category TEXT);", + "sql": "SELECT COUNT(DISTINCT c.id) FROM customers c JOIN orders o ON c.id \u003d o.customer_id JOIN items i ON o.item_id \u003d i.id WHERE i.category IN (\u0027regular\u0027, \u0027sustainable\u0027);", + "sql_explanation": "This SQL query counts the number of distinct customers who have made purchases for both regular and sustainable fashion items. It does this by joining the customers, orders, and items tables, and filtering for only rows where the item category is either \u0027regular\u0027 or \u0027sustainable\u0027. It then uses the COUNT function with the DISTINCT keyword to count the unique customer IDs." +}, { + "id": "182", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of cases won by attorneys in the \u0027Boston\u0027 office?", + "sql_context": "CREATE TABLE attorneys (attorney_id INT, office VARCHAR(50)); INSERT INTO attorneys VALUES (1, \u0027Boston\u0027); CREATE TABLE cases (case_id INT, attorney_id INT, case_outcome VARCHAR(10));", + "sql": "SELECT 100.0 * COUNT(*) / (SELECT COUNT(*) FROM cases INNER JOIN attorneys ON cases.attorney_id \u003d attorneys.attorney_id) AS percentage_won FROM cases INNER JOIN attorneys ON cases.attorney_id \u003d attorneys.attorney_id WHERE attorneys.office \u003d \u0027Boston\u0027 AND case_outcome \u003d \u0027won\u0027;", + "sql_explanation": "This query calculates the percentage of cases won by attorneys in the \u0027Boston\u0027 office. It first performs an inner join between the cases and attorneys tables on the attorney_id column, then filters the results to only include rows where the attorney\u0027s office is \u0027Boston\u0027 and the case was won. It then calculates the percentage of these cases by dividing the count of won cases by the total number of cases." +}, { + "id": "762", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total billing amount for each attorney, grouped by their respective offices?", + "sql_context": "CREATE TABLE Attorneys (AttorneyID INT, OfficeID INT, Name VARCHAR(50), AverageBilling FLOAT); CREATE TABLE Billing (BillingID INT, CaseID INT, Amount FLOAT); CREATE TABLE Cases (CaseID INT, AttorneyID INT);", + "sql": "SELECT A.OfficeID, A.Name, SUM(B.Amount) AS TotalBilling FROM Attorneys A INNER JOIN Cases C ON A.AttorneyID \u003d C.AttorneyID INNER JOIN Billing B ON C.CaseID \u003d B.CaseID GROUP BY A.OfficeID, A.Name;", + "sql_explanation": "This query performs an inner join on the Attorneys, Cases, and Billing tables, based on the AttorneyID and CaseID. It groups the results by the office ID and name, then calculates the total billing amount for each group." +}, { + "id": "230", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Percentage of users listening to hip-hop streams in the US", + "sql_context": "CREATE TABLE genres (id INT, name TEXT); INSERT INTO genres (id, name) VALUES (1, \u0027Hip-hop\u0027);", + "sql": "SELECT ROUND(100 * SUM(CASE WHEN users.country \u003d \u0027United States\u0027 AND genre_id \u003d 1 THEN 1 ELSE 0 END) / COUNT(DISTINCT users.id), 2) AS hiphop_percentage FROM users JOIN user_genres ON users.id \u003d user_genres.user_id JOIN genres ON user_genres.genre_id \u003d genres.id;", + "sql_explanation": "This query calculates the percentage of users in the US who listen to hip-hop streams by counting the number of users listening to hip-hop, dividing it by the total number of users in the US, and multiplying by 100." +}, { + "id": "61", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the number of vessels that visited \u0027Port of Mumbai\u0027 in June 2022 but did not carry any perishable cargo.", + "sql_context": "CREATE TABLE vessels (id INT, name TEXT); CREATE TABLE cargo (id INT, perishable BOOLEAN, vessel_id INT, port_id INT, loaded_date DATE); CREATE TABLE ports (id INT, name TEXT); INSERT INTO vessels (id, name) VALUES (1, \u0027Vessel C\u0027), (2, \u0027Vessel D\u0027); INSERT INTO ports (id, name) VALUES (4, \u0027Port of Mumbai\u0027); INSERT INTO cargo (id, perishable, vessel_id, port_id, loaded_date) VALUES (1, true, 1, 4, \u00272022-06-20\u0027), (2, false, 2, 4, \u00272022-06-25\u0027), (3, false, 1, 4, \u00272022-06-01\u0027);", + "sql": "SELECT COUNT(DISTINCT vessels.id) FROM vessels LEFT JOIN cargo ON vessels.id \u003d cargo.vessel_id LEFT JOIN ports ON cargo.port_id \u003d ports.id WHERE ports.name \u003d \u0027Port of Mumbai\u0027 AND cargo.loaded_date \u003e\u003d DATE(\u00272022-06-01\u0027) AND cargo.loaded_date \u003c\u003d DATE(\u00272022-06-30\u0027) AND cargo.perishable \u003d false GROUP BY vessels.id HAVING COUNT(cargo.id) \u003d 0;", + "sql_explanation": "This SQL query identifies the number of vessels that visited \u0027Port of Mumbai\u0027 in June 2022 but did not carry any perishable cargo by joining \u0027vessels\u0027, \u0027cargo\u0027, and \u0027ports\u0027 tables on their respective foreign keys, filtering the results based on the port name, loaded date, and perishable flag, and counting the distinct vessel IDs that have no matching cargo records." +}, { + "id": "181", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of containers handled daily by each port agent in \u0027Casablanca\u0027?", + "sql_context": "CREATE TABLE port (port_id INT, name TEXT);CREATE TABLE port_agent (port_agent_id INT, port_id INT, name TEXT);CREATE TABLE container (container_id INT, port_agent_id INT, handled_at DATETIME);INSERT INTO port VALUES (12, \u0027Casablanca\u0027);", + "sql": "SELECT port_agent.name, AVG(COUNT(container.container_id)) FROM port_agent JOIN port ON port_agent.port_id \u003d port.port_id JOIN container ON port_agent.port_agent_id \u003d container.port_agent_id WHERE port.name \u003d \u0027Casablanca\u0027 GROUP BY port_agent.name, DATE(container.handled_at);", + "sql_explanation": "The SQL query joins the \u0027port_agent\u0027 and \u0027container\u0027 tables using the \u0027port_agent_id\u0027 and then filters the results by the \u0027port\u0027 table to only include records from the \u0027Casablanca\u0027 port. It then groups the results by port agent name and the date the container was handled, and calculates the average number of containers handled per day by each port agent." +}, { + "id": "282", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the number of unique ports visited by vessels, grouped by vessel type.", + "sql_context": "CREATE TABLE VESSEL_TYPES (ID INT, NAME VARCHAR(50)); INSERT INTO VESSEL_TYPES VALUES (1, \u0027Container Ship\u0027); INSERT INTO VESSEL_TYPES VALUES (2, \u0027Bulk Carrier\u0027);", + "sql": "SELECT VT.NAME AS VESSEL_TYPE, COUNT(DISTINCT P.PORT) AS PORT_COUNT, RANK() OVER(ORDER BY COUNT(DISTINCT P.PORT) DESC) AS RANK FROM PORT_CALLS PC JOIN VESSELS V ON PC.VESSEL_ID \u003d V.ID JOIN VESSEL_TYPES VT ON V.VESSEL_TYPE \u003d VT.ID GROUP BY VT.ID, VT.NAME", + "sql_explanation": "This query calculates the number of unique ports visited by vessels grouped by their vessel type, assigns a rank based on the number of unique ports visited in descending order, and displays the vessel type, port count, and rank." +}, { + "id": "401", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many unique vessels visited each country?", + "sql_context": "CREATE TABLE ports (port_id INT, port_name TEXT, country TEXT); INSERT INTO ports (port_id, port_name, country) VALUES (1, \u0027Port A\u0027, \u0027USA\u0027), (2, \u0027Port B\u0027, \u0027Canada\u0027), (3, \u0027Port C\u0027, \u0027USA\u0027); CREATE TABLE visits (visit_id INT, vessel_id INT, port_id INT); INSERT INTO visits (visit_id, vessel_id, port_id) VALUES (1, 1, 1), (2, 2, 1), (3, 3, 2), (4, 1, 3);", + "sql": "SELECT countries, COUNT(DISTINCT vessels.vessel_id) FROM ports JOIN visits ON ports.port_id \u003d visits.port_id JOIN (SELECT DISTINCT vessel_id, port_id FROM visits) AS vessels ON visits.vessel_id \u003d vessels.vessel_id GROUP BY countries;", + "sql_explanation": "This query calculates the number of unique vessels that visited each country. It performs a join between the ports and visits tables and performs another join with a subquery that identifies unique vessel_id and port_id pairs. It then groups the results by country and calculates the count of distinct vessel_id for each country." +}, { + "id": "506", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average cargo weight per vessel for vessels that have visited ports in Africa?", + "sql_context": "CREATE TABLE Vessels (VesselID INT, Name VARCHAR(255), Type VARCHAR(255), Flag VARCHAR(255)); CREATE TABLE PortVisits (VisitID INT, VesselID INT, Port VARCHAR(255), VisitDate DATE); CREATE TABLE Cargo (CargoID INT, VesselID INT, Weight INT); INSERT INTO Vessels (VesselID, Name, Type, Flag) VALUES (1, \u0027Sea Serpent\u0027, \u0027Cargo\u0027, \u0027Panama\u0027); INSERT INTO PortVisits (VisitID, VesselID, Port, VisitDate) VALUES (1, 1, \u0027Cape Town\u0027, \u00272022-01-02\u0027); INSERT INTO Cargo (CargoID, VesselID, Weight) VALUES (1, 1, 5000), (2, 1, 3000);", + "sql": "SELECT AVG(Cargo.Weight) FROM Vessels INNER JOIN PortVisits ON Vessels.VesselID \u003d PortVisits.VesselID INNER JOIN Cargo ON Vessels.VesselID \u003d Cargo.VesselID WHERE PortVisits.Port LIKE \u0027Africa%\u0027 GROUP BY Vessels.VesselID;", + "sql_explanation": "This query uses INNER JOINs to combine the Vessels, PortVisits, and Cargo tables, selecting the average weight from the Cargo table where the Port in the PortVisits table starts with \u0027Africa\u0027. The query groups the result by the VesselID column in the Vessels table." +}, { + "id": "722", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average weight of cargo handled by vessels in the \u0027Bulk Carrier\u0027 type at each port?", + "sql_context": "CREATE TABLE ports (id INT, name VARCHAR(50), location VARCHAR(50), un_code VARCHAR(10)); CREATE TABLE vessels (id INT, name VARCHAR(50), type VARCHAR(50), year_built INT, port_id INT); CREATE TABLE cargo (id INT, description VARCHAR(50), weight FLOAT, port_id INT, vessel_id INT); CREATE VIEW vessel_cargo AS SELECT v.name AS vessel_name, c.description AS cargo_description, c.weight FROM vessels v JOIN cargo c ON v.id \u003d c.vessel_id;", + "sql": "SELECT p.name AS port_name, AVG(vc.weight) AS avg_weight FROM ports p JOIN vessels v ON p.id \u003d v.port_id JOIN vessel_cargo vc ON v.name \u003d vc.vessel_name WHERE v.type \u003d \u0027Bulk Carrier\u0027 GROUP BY p.name;", + "sql_explanation": "This query calculates the average weight of cargo handled by vessels in the \u0027Bulk Carrier\u0027 type at each port. It does this by using a subquery to find the weight of cargo handled by vessels in the \u0027Bulk Carrier\u0027 type and joins the ports, vessels, and vessel_cargo views on their respective IDs and names. Then, it groups the data by port name and calculates the average weight of cargo for each group." +}, { + "id": "761", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average weight of containers handled by each crane in port \u0027Seattle\u0027?", + "sql_context": "CREATE TABLE port (port_id INT, name TEXT);CREATE TABLE crane (crane_id INT, port_id INT, name TEXT);CREATE TABLE container (container_id INT, crane_id INT, weight INT);INSERT INTO port VALUES (1, \u0027Seattle\u0027);", + "sql": "SELECT crane.name, AVG(container.weight) FROM crane JOIN port ON crane.port_id \u003d port.port_id JOIN container ON crane.crane_id \u003d container.crane_id WHERE port.name \u003d \u0027Seattle\u0027 GROUP BY crane.name;", + "sql_explanation": "The SQL query joins the \u0027crane\u0027 and \u0027container\u0027 tables using the \u0027crane_id\u0027 and then filters the results by the \u0027port\u0027 table to only include records from the \u0027Seattle\u0027 port. It then groups the results by crane name and calculates the average weight of the containers handled by each crane." +}, { + "id": "221", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Get the name and country of the top 2 customers for cruelty-free cosmetics", + "sql_context": "CREATE TABLE customers (id INT, name VARCHAR(50), country VARCHAR(50)); CREATE TABLE purchases (id INT, customer_id INT, product_id INT, quantity INT); CREATE TABLE products (id INT, name VARCHAR(50), cruelty_free BOOLEAN);", + "sql": "SELECT customers.name, customers.country FROM customers JOIN purchases ON customers.id \u003d purchases.customer_id JOIN products ON purchases.product_id \u003d products.id WHERE products.cruelty_free \u003d TRUE GROUP BY customers.id ORDER BY SUM(purchases.quantity) DESC LIMIT 2;", + "sql_explanation": "This SQL query gets the name and country of the top 2 customers for cruelty-free cosmetics by joining the \u0027customers\u0027, \u0027purchases\u0027, and \u0027products\u0027 table on the appropriate columns, filtering for cruelty-free products, grouping the results by customer, calculating the total quantity of purchases for each customer, ordering the results by the total quantity in descending order, and limiting the results to the top 2 customers." +}, { + "id": "959", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the cosmetic products that have a sustainability score of 80 or higher and are preferred by customers who are 30 or under?", + "sql_context": "CREATE TABLE suppliers (id INT PRIMARY KEY, name VARCHAR(100), product VARCHAR(100), country VARCHAR(100), sustainability_score INT); CREATE TABLE cosmetics_sales (id INT PRIMARY KEY, product VARCHAR(100), quantity INT, revenue FLOAT, supplier_id INT, FOREIGN KEY (supplier_id) REFERENCES suppliers(id)); CREATE TABLE customers (id INT PRIMARY KEY, name VARCHAR(100), age INT, location VARCHAR(100)); CREATE TABLE consumer_preferences (id INT PRIMARY KEY, customer_id INT, product VARCHAR(100), preference INT, FOREIGN KEY (customer_id) REFERENCES customers(id));", + "sql": "SELECT cp.product FROM consumer_preferences cp JOIN customers c ON cp.customer_id \u003d c.id JOIN suppliers s ON cp.product \u003d s.product WHERE c.age \u003c\u003d 30 AND s.sustainability_score \u003e\u003d 80;", + "sql_explanation": "This SQL query finds the cosmetic products that have a sustainability score of 80 or higher and are preferred by customers who are 30 or under. It does this by joining the consumer_preferences, customers and suppliers tables on the customer_id and product columns and filtering for rows where the age is less than or equal to 30 and the sustainability_score is greater than or equal to 80." +}, { + "id": "95", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total quantity of sustainable materials in inventory for factories located in \u0027Asia\u0027?", + "sql_context": "CREATE TABLE countries (country_id INT, name VARCHAR(255), region VARCHAR(255)); INSERT INTO countries VALUES (1, \u0027India\u0027, \u0027Asia\u0027); INSERT INTO countries VALUES (2, \u0027USA\u0027, \u0027North America\u0027); CREATE TABLE factories (factory_id INT, name VARCHAR(255), location VARCHAR(255), country_id INT); INSERT INTO factories VALUES (1, \u0027Eco-Friendly Factory A\u0027, \u0027New York, NY\u0027, 1); INSERT INTO factories VALUES (2, \u0027Fairtrade Factory B\u0027, \u0027Delhi, India\u0027, 2); CREATE TABLE inventory (inventory_id INT, material_id INT, factory_id INT, quantity INT); INSERT INTO inventory VALUES (1, 1, 1, 2000); INSERT INTO inventory VALUES (2, 2, 2, 3000); INSERT INTO inventory VALUES (3, 3, 1, 1500); CREATE TABLE materials (material_id INT, name VARCHAR(255), is_sustainable BOOLEAN); INSERT INTO materials VALUES (1, \u0027Organic Cotton\u0027, true); INSERT INTO materials VALUES (2, \u0027Recycled Polyester\u0027, true); INSERT INTO materials VALUES (3, \u0027Conventional Cotton\u0027, false);", + "sql": "SELECT SUM(inventory.quantity) FROM inventory JOIN factories ON inventory.factory_id \u003d factories.factory_id JOIN materials ON inventory.material_id \u003d materials.material_id WHERE materials.is_sustainable \u003d true AND factories.country_id IN (SELECT country_id FROM countries WHERE countries.region \u003d \u0027Asia\u0027);", + "sql_explanation": "Join the inventory, factories, and materials tables on their common columns, filter for sustainable materials and factories located in the \u0027Asia\u0027 region, then calculate the total quantity of sustainable materials in inventory for those factories." +}, { + "id": "138", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which fair labor practice certifications do factories in Vietnam have?", + "sql_context": "CREATE TABLE Factories (FactoryID INT, FactoryName VARCHAR(50), Location VARCHAR(50)); CREATE TABLE Certifications (Certification VARCHAR(50)); CREATE TABLE FactoryCertifications (FactoryID INT, CertificationID INT); INSERT INTO FactoryCertifications (FactoryID, CertificationID) VALUES (1, 1), (1, 2), (2, 1), (3, 2); INSERT INTO Certifications (CertificationID, Certification) VALUES (1, \u0027Fair Trade\u0027), (2, \u0027SA8000\u0027); INSERT INTO Factories (FactoryID, FactoryName, Location) VALUES (1, \u0027Factory A\u0027, \u0027Vietnam\u0027), (2, \u0027Factory B\u0027, \u0027Indonesia\u0027), (3, \u0027Factory C\u0027, \u0027Vietnam\u0027);", + "sql": "SELECT Factories.FactoryName, Certifications.Certification FROM Factories JOIN FactoryCertifications ON Factories.FactoryID \u003d FactoryCertifications.FactoryID JOIN Certifications ON FactoryCertifications.CertificationID \u003d Certifications.CertificationID WHERE Factories.Location \u003d \u0027Vietnam\u0027;", + "sql_explanation": "We join the Factories, FactoryCertifications, and Certifications tables based on their respective IDs and filter the data where Factories.Location is \u0027Vietnam\u0027. Then, we return the FactoryName and Certification for the matching records." +}, { + "id": "273", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total sales volume for manufacturers in India who use sustainable materials?", + "sql_context": "CREATE TABLE manufacturers (id INT, name VARCHAR(50), country VARCHAR(50)); INSERT INTO manufacturers (id, name, country) VALUES (1, \u0027Manufacturer A\u0027, \u0027India\u0027), (2, \u0027Manufacturer B\u0027, \u0027India\u0027), (3, \u0027Manufacturer C\u0027, \u0027USA\u0027); CREATE TABLE material_sourcing (id INT, manufacturer_id INT, sustainable_sourcing BOOLEAN); INSERT INTO material_sourcing (id, manufacturer_id, sustainable_sourcing) VALUES (1, 1, true), (2, 2, true), (3, 3, false); CREATE TABLE sales_volume (id INT, manufacturer_id INT, volume INT); INSERT INTO sales_volume (id, manufacturer_id, volume) VALUES (1, 1, 500), (2, 2, 250), (3, 3, 750);", + "sql": "SELECT m.name, SUM(SV.volume) as total_sales_volume FROM sales_volume SV JOIN manufacturers m ON SV.manufacturer_id \u003d m.id JOIN material_sourcing MS ON m.id \u003d MS.manufacturer_id WHERE m.country \u003d \u0027India\u0027 AND MS.sustainable_sourcing \u003d true GROUP BY m.name;", + "sql_explanation": "The SQL query performs an inner join between the sales_volume, manufacturers, and material_sourcing tables on the manufacturer_id column. It then selects the name column from the manufacturers table and the sum of the volume column from the sales_volume table, filtering the results to only include rows where the country column in the manufacturers table is India and the sustainable_sourcing column in the material_sourcing table is true. The results are grouped by the name column in the manufacturers table. This returns the total sales volume for manufacturers in India who use sustainable materials." +}, { + "id": "815", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average price of eco-friendly materials for brands located in Africa?", + "sql_context": "CREATE TABLE brands (brand_id INT, name VARCHAR(255), region VARCHAR(255)); INSERT INTO brands (brand_id, name, region) VALUES (1, \u0027EcoAfrica\u0027, \u0027Africa\u0027), (2, \u0027GreenGlobe\u0027, \u0027Europe\u0027); CREATE TABLE materials (material_id INT, name VARCHAR(255), is_eco_friendly BOOLEAN, price DECIMAL(10,2)); INSERT INTO materials (material_id, name, is_eco_friendly, price) VALUES (1, \u0027Bamboo Cloth\u0027, TRUE, 10.50), (2, \u0027Regular Polyester\u0027, FALSE, 8.25); CREATE TABLE brand_materials (brand_id INT, material_id INT, price DECIMAL(10,2)); INSERT INTO brand_materials (brand_id, material_id, price) VALUES (1, 1, 11.00), (1, 2, 8.50), (2, 1, 9.50), (2, 2, 7.75);", + "sql": "SELECT AVG(bm.price) FROM brand_materials bm JOIN brands b ON bm.brand_id \u003d b.brand_id JOIN materials m ON bm.material_id \u003d m.material_id WHERE b.region \u003d \u0027Africa\u0027 AND m.is_eco_friendly \u003d TRUE;", + "sql_explanation": "This query calculates the average price of eco-friendly materials for brands located in Africa. It first joins the brand_materials table with the brands and materials tables based on brand_id and material_id. Then, it filters the records where region is \u0027Africa\u0027 and is_eco_friendly is TRUE. Lastly, it calculates the average price of those records." +}, { + "id": "899", + "domain": "ethical fashion", + "domain_description": "Extensive data on sustainable materials, fair labor practices, circular economy, and consumer awareness in ethical fashion.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which companies use a specific sustainable material (e.g., organic cotton) and have fair labor practices?", + "sql_context": "CREATE TABLE Companies (id INT, name VARCHAR(255), region VARCHAR(255)); INSERT INTO Companies (id, name, region) VALUES (1, \u0027CompanyA\u0027, \u0027Asia-Pacific\u0027), (2, \u0027CompanyB\u0027, \u0027Europe\u0027), (3, \u0027CompanyC\u0027, \u0027Asia-Pacific\u0027); CREATE TABLE Materials (id INT, company_id INT, material VARCHAR(255), quantity INT); INSERT INTO Materials (id, company_id, material, quantity) VALUES (1, 1, \u0027Organic cotton\u0027, 500), (2, 1, \u0027Recycled polyester\u0027, 300), (3, 2, \u0027Organic linen\u0027, 400), (4, 3, \u0027Organic cotton\u0027, 600), (5, 3, \u0027Tencel\u0027, 700); CREATE TABLE Labor (id INT, company_id INT, fair BOOLEAN); INSERT INTO Labor (id, company_id, fair) VALUES (1, 1, FALSE), (2, 2, TRUE), (3, 3, TRUE);", + "sql": "SELECT Companies.name FROM Companies JOIN Materials ON Companies.id \u003d Materials.company_id JOIN Labor ON Companies.id \u003d Labor.company_id WHERE material \u003d \u0027Organic cotton\u0027 AND fair \u003d TRUE;", + "sql_explanation": "Join the Companies, Materials, and Labor tables based on company_id, filter the results for companies that use organic cotton and have fair labor practices, and return the company names." +}, { + "id": "19", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 5 players with the highest scores in the \u0027historical_tournaments\u0027 view, including their scores and the names of the tournaments they participated in.", + "sql_context": "CREATE VIEW historical_tournaments AS SELECT tournaments.tournament_name, players.player_name, players.score FROM tournaments JOIN players_scores ON tournaments.tournament_id \u003d players_scores.tournament_id JOIN players ON players_scores.player_id \u003d players.player_id; CREATE TABLE tournaments (tournament_id INT, tournament_name TEXT); CREATE TABLE players_scores (player_id INT, tournament_id INT, score INT); CREATE TABLE players (player_id INT, player_name TEXT);", + "sql": "SELECT players.player_name, MAX(players_scores.score) as high_score, tournaments.tournament_name FROM historical_tournaments JOIN players ON historical_tournaments.player_id \u003d players.player_id JOIN players_scores ON historical_tournaments.player_id \u003d players_scores.player_id JOIN tournaments ON players_scores.tournament_id \u003d tournaments.tournament_id GROUP BY players.player_id, tournaments.tournament_name ORDER BY high_score DESC LIMIT 5;", + "sql_explanation": "This query finds the top 5 players with the highest scores in the \u0027historical_tournaments\u0027 view. It does this by joining the \u0027historical_tournaments\u0027, \u0027players\u0027, \u0027players_scores\u0027, and \u0027tournaments\u0027 tables, grouping the results by \u0027player_id\u0027 and \u0027tournament_name\u0027, and ordering them by the maximum \u0027score\u0027 column value in descending order. The \u0027LIMIT 5\u0027 clause limits the results to 5 rows." +}, { + "id": "284", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total prize pool for tournaments in which a player with the name \u0027John Doe\u0027 has participated?", + "sql_context": "CREATE TABLE tournaments (id INT, name VARCHAR(50), prize_pool INT); CREATE TABLE tournament_participation (id INT, tournament_id INT, player_id INT); INSERT INTO tournaments VALUES (1, \u0027Tournament1\u0027, 70000); INSERT INTO tournaments VALUES (2, \u0027Tournament2\u0027, 30000); INSERT INTO tournament_participation VALUES (1, 1, 1); INSERT INTO tournament_participation VALUES (2, 2, 2); INSERT INTO tournament_participation VALUES (3, 1, 3);", + "sql": "SELECT SUM(tournaments.prize_pool) FROM tournaments INNER JOIN tournament_participation ON tournaments.id \u003d tournament_participation.tournament_id INNER JOIN players ON tournament_participation.player_id \u003d players.id WHERE players.name \u003d \u0027John Doe\u0027;", + "sql_explanation": "This query first joins the \u0027tournaments\u0027 table with the \u0027tournament_participation\u0027 table on the \u0027tournament_id\u0027 column, and then joins the result with the \u0027players\u0027 table on the \u0027player_id\u0027 column. It then filters the results to only include rows where the player\u0027s name is \u0027John Doe\u0027, and calculates the total prize pool for the tournaments in which he has participated." +}, { + "id": "295", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total prize pool for tournaments in which a player with the name \u0027Chloe Lee\u0027 has participated?", + "sql_context": "CREATE TABLE tournaments (id INT, name VARCHAR(50), prize_pool INT); CREATE TABLE tournament_participation (id INT, tournament_id INT, player_id INT); INSERT INTO tournaments VALUES (1, \u0027Tournament1\u0027, 70000); INSERT INTO tournaments VALUES (2, \u0027Tournament2\u0027, 30000); INSERT INTO tournament_participation VALUES (1, 1, 1); INSERT INTO tournament_participation VALUES (2, 2, 2); INSERT INTO tournament_participation VALUES (3, 1, 3); INSERT INTO tournament_participation VALUES (4, 2, 3);", + "sql": "SELECT SUM(tournaments.prize_pool) FROM tournaments INNER JOIN tournament_participation ON tournaments.id \u003d tournament_participation.tournament_id INNER JOIN players ON tournament_participation.player_id \u003d players.id WHERE players.name \u003d \u0027Chloe Lee\u0027;", + "sql_explanation": "This query first joins the \u0027tournaments\u0027 table with the \u0027tournament_participation\u0027 table on the \u0027tournament_id\u0027 column, and then joins the result with the \u0027players\u0027 table on the \u0027player_id\u0027 column. It then filters the results to only include rows where the player\u0027s name is \u0027Chloe Lee\u0027, and calculates the total prize pool for the tournaments in which she has participated." +}, { + "id": "69", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many job applications were received for each open position?", + "sql_context": "CREATE TABLE job_openings (id INT, position_id INT, department_id INT, open_date DATE); CREATE TABLE applications (id INT, application_date DATE, position_id INT, applicant_id INT); INSERT INTO job_openings (id, position_id, department_id, open_date) VALUES (1, 101, 1, \u00272022-01-05\u0027), (2, 102, 2, \u00272022-01-07\u0027), (3, 103, 3, \u00272022-01-10\u0027); INSERT INTO applications (id, application_date, position_id, applicant_id) VALUES (1, \u00272022-01-06\u0027, 101, 1001), (2, \u00272022-01-08\u0027, 101, 1002), (3, \u00272022-01-09\u0027, 102, 1003), (4, \u00272022-01-12\u0027, 103, 1004), (5, \u00272022-01-15\u0027, 103, 1005);", + "sql": "SELECT positions.position_id, departments.name as department_name, COUNT(applications.id) as applications_count FROM applications JOIN job_openings as positions ON applications.position_id \u003d positions.position_id JOIN departments ON positions.department_id \u003d departments.id GROUP BY positions.position_id, departments.name;", + "sql_explanation": "The query joins job openings and applications tables, counting the number of applications for each open position and providing the number of job applications received for each open position." +}, { + "id": "157", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the percentage of employees who are male and female in each department in the \"employee\", \"department\", and \"gender\" tables", + "sql_context": "CREATE TABLE employee (id INT, department_id INT, gender_id INT); CREATE TABLE department (id INT, name TEXT); CREATE TABLE gender (id INT, name TEXT);", + "sql": "SELECT d.name, (COUNT(CASE WHEN g.name \u003d \u0027male\u0027 THEN 1 END) / COUNT(*)) * 100 AS pct_male, (COUNT(CASE WHEN g.name \u003d \u0027female\u0027 THEN 1 END) / COUNT(*)) * 100 AS pct_female FROM department d JOIN employee e ON d.id \u003d e.department_id JOIN gender g ON e.gender_id \u003d g.id GROUP BY d.name;", + "sql_explanation": "The query joins the \"employee\", \"department\", and \"gender\" tables on the department_id and gender_id columns. It then groups the results by department name and calculates the percentage of male and female employees in each department." +}, { + "id": "443", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the names and job titles of employees who have been with the company for more than 10 years in the \u0027hr\u0027 schema\u0027s \u0027employee_details\u0027 and \u0027employee_hires\u0027 tables", + "sql_context": "CREATE TABLE hr.employee_details (id INT, employee_id INT, first_name VARCHAR(50), last_name VARCHAR(50), department VARCHAR(50), birth_date DATE); CREATE TABLE hr.employee_hires (id INT, employee_id INT, hire_date DATE, job_id VARCHAR(20)); CREATE TABLE hr.jobs (id INT, job_id VARCHAR(20), job_title VARCHAR(50));", + "sql": "SELECT e.first_name, e.last_name, j.job_title FROM hr.employee_details e INNER JOIN hr.employee_hires h ON e.employee_id \u003d h.employee_id INNER JOIN hr.jobs j ON h.job_id \u003d j.job_id WHERE DATEDIFF(CURDATE(), h.hire_date) \u003e 3650;", + "sql_explanation": "The SQL query performs inner joins between the \u0027employee_details\u0027, \u0027employee_hires\u0027, and \u0027jobs\u0027 tables on the \u0027employee_id\u0027 and \u0027job_id\u0027 columns, respectively. It then filters the results to only include rows where the difference between the current date and the \u0027hire_date\u0027 column is more than 3650 days (i.e., employees who have been with the company for more than 10 years)." +}, { + "id": "52", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the total amount of resources (coal, iron, gold, silver) mined by each mine.", + "sql_context": "CREATE TABLE mine (id INT, name VARCHAR(50), location VARCHAR(50));CREATE TABLE coal_mine (mine_id INT, amount INT);CREATE TABLE iron_mine (mine_id INT, amount INT);CREATE TABLE gold_mine (mine_id INT, amount INT);CREATE TABLE silver_mine (mine_id INT, amount INT);", + "sql": "SELECT m.name, m.location, SUM(COALESCE(c.amount, 0) + COALESCE(i.amount, 0) + COALESCE(g.amount, 0) + COALESCE(s.amount, 0)) AS total_amount FROM mine m LEFT JOIN coal_mine c ON m.id \u003d c.mine_id LEFT JOIN iron_mine i ON m.id \u003d i.mine_id LEFT JOIN gold_mine g ON m.id \u003d g.mine_id LEFT JOIN silver_mine s ON m.id \u003d s.mine_id GROUP BY m.id, m.name, m.location;", + "sql_explanation": "We join the mine, coal_mine, iron_mine, gold_mine, and silver_mine tables based on the mine_id. Then, we calculate the total amount of resources mined by each mine, grouped by the mine table\u0027s id, name, and location. We use the COALESCE function to replace NULL values with 0." +}, { + "id": "115", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the drugs and their respective total sales for rare diseases indication with sales greater than the average sales for infectious diseases.", + "sql_context": "CREATE TABLE sales (id INT, drug_id INT, quarter INT, year INT, revenue FLOAT); INSERT INTO sales (id, drug_id, quarter, year, revenue) VALUES (1, 1, 1, 2022, 1500000); CREATE TABLE drugs (id INT, name VARCHAR(50), company VARCHAR(50), indication VARCHAR(50)); INSERT INTO drugs (id, name, company, indication) VALUES (1, \u0027DrugA\u0027, \u0027ABC Corp\u0027, \u0027Rare_Diseases\u0027);", + "sql": "SELECT s.drug_id, d.name, SUM(s.revenue) as total_sales FROM sales s JOIN drugs d ON s.drug_id \u003d d.id WHERE d.indication \u003d \u0027Rare_Diseases\u0027 GROUP BY s.drug_id HAVING total_sales \u003e (SELECT AVG(s2.revenue) FROM sales s2 JOIN drugs d2 ON s2.drug_id \u003d d2.id WHERE d2.indication \u003d \u0027Infectious_Diseases\u0027)", + "sql_explanation": "This query calculates the total sales of rare diseases drugs with sales greater than the average sales for infectious diseases drugs." +}, { + "id": "26", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of repeat attendees for each event type in 2021?", + "sql_context": "CREATE TABLE Events (EventID INT, EventTypeID INT, EventDate DATE); CREATE TABLE EventAttendance (EventID INT, AudienceID INT); CREATE TABLE Audience (AudienceID INT, AudienceName VARCHAR(50)); INSERT INTO Events (EventID, EventTypeID, EventDate) VALUES (1, 1, \u00272021-01-01\u0027), (2, 1, \u00272021-02-01\u0027), (3, 2, \u00272021-03-01\u0027); INSERT INTO EventAttendance (EventID, AudienceID) VALUES (1, 1), (1, 2), (2, 1), (2, 3), (3, 1), (3, 2); INSERT INTO Audience (AudienceID, AudienceName) VALUES (1, \u0027Alice\u0027), (2, \u0027Bob\u0027), (3, \u0027Charlie\u0027);", + "sql": "SELECT et.EventTypeName, COUNT(DISTINCT ea.AudienceID) as NumRepeatAttendees FROM EventAttendance ea INNER JOIN Events e ON ea.EventID \u003d e.EventID INNER JOIN EventTypes et ON e.EventTypeID \u003d et.EventTypeID INNER JOIN (SELECT AudienceID, COUNT(EventID) as NumEvents FROM EventAttendance GROUP BY AudienceID HAVING COUNT(EventID) \u003e 1) repeat_attendees ON ea.AudienceID \u003d repeat_attendees.AudienceID GROUP BY et.EventTypeName;", + "sql_explanation": "This query joins the EventAttendance, Events, and EventTypes tables and filters the Events table to only include events that occurred in 2021. It then joins the result with a subquery that identifies repeat attendees and calculates the number of repeat attendees for each event type." +}, { + "id": "604", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum number of military satellites owned by countries in the \u0027Africa\u0027 region?", + "sql_context": "CREATE TABLE regions (id INT, name VARCHAR(255)); CREATE TABLE countries (id INT, name VARCHAR(255), region_id INT); CREATE TABLE military_satellites (id INT, country_id INT, number INT);", + "sql": "SELECT c.name as country, MIN(ms.number) as min_satellites_owned FROM regions r JOIN countries c ON r.id \u003d c.region_id JOIN military_satellites ms ON c.id \u003d ms.country_id WHERE r.name \u003d \u0027Africa\u0027 GROUP BY c.id;", + "sql_explanation": "Join the regions, countries, and military_satellites tables, filter the results based on the name column of the regions table, group the results by country_id, and calculate the minimum number of military satellites owned by countries in the \u0027Africa\u0027 region." +}, { + "id": "1089", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify all cybersecurity incidents and the corresponding national security agencies involved, if any.", + "sql_context": "CREATE TABLE CybersecurityIncidents (IncidentID INT, IncidentName VARCHAR(255), IncidentDate DATE);CREATE TABLE AgencyIncidents (AgencyID INT, IncidentID INT, AgencyName VARCHAR(255));", + "sql": "SELECT I.IncidentName, A.AgencyName FROM CybersecurityIncidents I LEFT JOIN AgencyIncidents AI ON I.IncidentID \u003d AI.IncidentID LEFT JOIN Agencies A ON AI.AgencyID \u003d A.AgencyID;", + "sql_explanation": "This query performs a left join between CybersecurityIncidents and AgencyIncidents, and then another left join between AgencyIncidents and Agencies. This allows for identifying all cybersecurity incidents and the corresponding national security agencies involved, if any." +}, { + "id": "49", + "domain": "fitness industry", + "domain_description": "Workout data, membership demographics, wearable technology metrics, and wellness trends.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue generated from male members in the Midwest who used the cycling classes in the past 3 months?", + "sql_context": "CREATE TABLE Members (MemberID INT, Gender VARCHAR(10), Region VARCHAR(20), MembershipDate DATE); INSERT INTO Members (MemberID, Gender, Region, MembershipDate) VALUES (5, \u0027Male\u0027, \u0027Midwest\u0027, \u00272021-02-01\u0027); CREATE TABLE Classes (ClassID INT, ClassType VARCHAR(20), Duration INT, MemberID INT); INSERT INTO Classes (ClassID, ClassType, Duration, MemberID) VALUES (50, \u0027Cycling\u0027, 60, 5); CREATE TABLE Transactions (TransactionID INT, MemberID INT, Service VARCHAR(20), Amount DECIMAL(5,2)); INSERT INTO Transactions (TransactionID, MemberID, Service, Amount) VALUES (500, 5, \u0027Cycling\u0027, 100.00);", + "sql": "SELECT SUM(Transactions.Amount) FROM Members INNER JOIN Classes ON Members.MemberID \u003d Classes.MemberID INNER JOIN Transactions ON Members.MemberID \u003d Transactions.MemberID WHERE Members.Gender \u003d \u0027Male\u0027 AND Members.Region \u003d \u0027Midwest\u0027 AND Classes.ClassType \u003d \u0027Cycling\u0027 AND Transactions.TransactionDate BETWEEN DATE_SUB(CURRENT_DATE, INTERVAL 3 MONTH) AND CURRENT_DATE;", + "sql_explanation": "The SQL query calculates the total revenue generated from male members in the Midwest who used the cycling classes in the past 3 months. It joins the \u0027Members\u0027, \u0027Classes\u0027, and \u0027Transactions\u0027 tables based on the \u0027MemberID\u0027 column. The WHERE clause filters the male members from the Midwest, the cycling classes, and the transactions within the past 3 months." +}, { + "id": "193", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many projects were completed in each sector by year?", + "sql_context": "CREATE TABLE Projects (ProjectID int, Sector varchar(50), Year int, Completed int); INSERT INTO Projects (ProjectID, Sector, Year, Completed) VALUES (1, \u0027Health\u0027, 2018, 1), (2, \u0027Education\u0027, 2017, 0), (3, \u0027Health\u0027, 2019, 1), (4, \u0027Infrastructure\u0027, 2018, 1);", + "sql": "SELECT y.Year, s.Sector, COUNT(p.ProjectID) AS CompletedProjects FROM (SELECT DISTINCT Year FROM Projects) y CROSS JOIN (SELECT DISTINCT Sector FROM Projects) s LEFT JOIN Projects p ON y.Year \u003d p.Year AND s.Sector \u003d p.Sector WHERE p.Completed \u003d 1 GROUP BY y.Year, s.Sector;", + "sql_explanation": "The query creates a cartesian product between Years and Sectors, and then joins the Projects table to count the number of completed projects in each sector by year." +}, { + "id": "88", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the average price per gram of cannabis concentrate at each dispensary in Q2 2022?", + "sql_context": "CREATE TABLE concentrate_prices (dispensary_id INT, sale_date DATE, price DECIMAL(10, 2)); INSERT INTO concentrate_prices (dispensary_id, sale_date, price) VALUES (1, \u00272022-04-01\u0027, 15), (1, \u00272022-04-15\u0027, 14), (1, \u00272022-05-05\u0027, 16), (2, \u00272022-04-03\u0027, 18), (2, \u00272022-04-30\u0027, 17), (2, \u00272022-05-20\u0027, 19);", + "sql": "SELECT d.name, AVG(cp.price / cs.weight) as avg_price_per_gram FROM concentrate_sales cs JOIN concentrate_prices cp ON cs.dispensary_id \u003d cp.dispensary_id AND cs.sale_date \u003d cp.sale_date JOIN dispensaries d ON cs.dispensary_id \u003d d.id WHERE cs.sale_date BETWEEN \u00272022-04-01\u0027 AND \u00272022-06-30\u0027 GROUP BY d.name;", + "sql_explanation": "This query joins the concentrate_sales and concentrate_prices tables on the dispensary_id and sale_date foreign keys. It then filters the data for Q2 2022, calculates the average price per gram of cannabis concentrate for each dispensary using the AVG() function, and divides the price by the weight. The result is grouped by dispensary." +}, { + "id": "94", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List dispensaries that have sold more than 100 units of a specific strain, \u0027Blue Dream\u0027, in the last month.", + "sql_context": "CREATE TABLE Dispensaries (DispensaryID INT, DispensaryName VARCHAR(50)); CREATE TABLE Strains (StrainID INT, StrainName VARCHAR(50)); CREATE TABLE Sales (SaleID INT, DispensaryID INT, StrainID INT, QuantitySold INT, SaleDate DATE);", + "sql": "SELECT D.DispensaryID, D.DispensaryName FROM Dispensaries D JOIN Sales S ON D.DispensaryID \u003d S.DispensaryID JOIN Strains ST ON S.StrainID \u003d ST.StrainID WHERE StrainName \u003d \u0027Blue Dream\u0027 AND S.SaleDate \u003e\u003d DATEADD(month, -1, GETDATE()) GROUP BY D.DispensaryID, D.DispensaryName HAVING SUM(QuantitySold) \u003e 100;", + "sql_explanation": "Identify dispensaries that have sold more than 100 units of the specific strain, \u0027Blue Dream\u0027, in the last month." +}, { + "id": "208", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all dispensaries in Colorado with a retail price per gram above the state average for Sativa strains.", + "sql_context": "CREATE TABLE dispensaries (id INT, name VARCHAR(50), state VARCHAR(20)); CREATE TABLE strains (id INT, name VARCHAR(50), type VARCHAR(20), price DECIMAL(5,2)); INSERT INTO dispensaries (id, name, state) VALUES (1, \u0027Rocky Mountain\u0027, \u0027Colorado\u0027), (2, \u0027Mile High\u0027, \u0027Colorado\u0027); INSERT INTO strains (id, name, type, price) VALUES (1, \u0027Jack Herer\u0027, \u0027Sativa\u0027, 10.00), (2, \u0027Durban Poison\u0027, \u0027Sativa\u0027, 12.00);", + "sql": "SELECT d.name FROM dispensaries d JOIN (SELECT dispensary_id, AVG(price) as avg_price FROM strains WHERE type \u003d \u0027Sativa\u0027 GROUP BY dispensary_id) s ON d.id \u003d s.dispensary_id JOIN strains st ON s.dispensary_id \u003d st.id WHERE st.type \u003d \u0027Sativa\u0027 AND st.price \u003e s.avg_price;", + "sql_explanation": "Join the dispensaries and strains table on dispensary_id and filter for Sativa strains. Calculate the average retail price per gram for Sativa strains for each dispensary and join back to the strains table to return all dispensaries in Colorado with a retail price per gram above the state average." +}, { + "id": "72", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the customers with the highest and lowest balances in the \u0027Investment\u0027 division?", + "sql_context": "CREATE TABLE Customers (CustomerID int, Name varchar(50), Division varchar(50)); INSERT INTO Customers (CustomerID, Name, Division) VALUES (1, \u0027John Doe\u0027, \u0027Banking\u0027), (2, \u0027Jane Smith\u0027, \u0027Investment\u0027), (3, \u0027Mike Johnson\u0027, \u0027Banking\u0027); CREATE TABLE Accounts (AccountID int, CustomerID int, Balance decimal(10,2)); INSERT INTO Accounts (AccountID, CustomerID, Balance) VALUES (101, 1, 5000), (102, 1, 7000), (103, 2, 12000), (104, 3, 3000), (105, 2, 8000);", + "sql": "SELECT c.Name, a.Balance FROM Customers c INNER JOIN Accounts a ON c.CustomerID \u003d a.CustomerID WHERE c.Division \u003d \u0027Investment\u0027 ORDER BY a.Balance DESC LIMIT 1; SELECT c.Name, a.Balance FROM Customers c INNER JOIN Accounts a ON c.CustomerID \u003d a.CustomerID WHERE c.Division \u003d \u0027Investment\u0027 ORDER BY a.Balance ASC LIMIT 1;", + "sql_explanation": "The SQL query performs an inner join between the Customers and Accounts tables based on the CustomerID. It then filters for customers in the \u0027Investment\u0027 division and orders the results by balance in descending order (for the highest balance) and ascending order (for the lowest balance). The LIMIT clause is used to return only the top result for each query." +}, { + "id": "76", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average transaction value in the last week, split by product category and customer demographics?", + "sql_context": "CREATE TABLE transactions (transaction_id INT, customer_id INT, product_id INT, category_id INT, transaction_date DATE, amount DECIMAL(10,2)); CREATE TABLE customers (customer_id INT, age INT, gender VARCHAR(10), location VARCHAR(255)); CREATE TABLE products (product_id INT, name VARCHAR(255), category_id INT);", + "sql": "SELECT c.age, c.gender, p.category_id, AVG(t.amount) as avg_transaction_value FROM transactions t INNER JOIN customers c ON t.customer_id \u003d c.customer_id INNER JOIN products p ON t.product_id \u003d p.product_id WHERE t.transaction_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 WEEK) GROUP BY c.age, c.gender, p.category_id;", + "sql_explanation": "This query starts by selecting age, gender, category ID, and the average transaction amount. It then joins transactions, customers, and products tables. The WHERE clause filters transactions within the last week. The GROUP BY clause groups the result by customer age, gender, and product category." +}, { + "id": "6", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the number of mobile and broadband subscribers per region, and their respective percentage contributions to total subscribers in each service category.", + "sql_context": "CREATE TABLE MobileSubscribers (SubscriberID int, Region varchar(10), Service varchar(10)); CREATE TABLE BroadbandSubscribers (SubscriberID int, Region varchar(10), Service varchar(10)); INSERT INTO MobileSubscribers (SubscriberID, Region, Service) VALUES (1, \u0027North\u0027, \u0027mobile\u0027), (2, \u0027North\u0027, \u0027mobile\u0027), (3, \u0027South\u0027, \u0027mobile\u0027), (4, \u0027East\u0027, \u0027mobile\u0027), (5, \u0027West\u0027, \u0027broadband\u0027); INSERT INTO BroadbandSubscribers (SubscriberID, Region, Service) VALUES (1, \u0027North\u0027, \u0027broadband\u0027), (2, \u0027South\u0027, \u0027broadband\u0027), (3, \u0027East\u0027, \u0027broadband\u0027), (4, \u0027West\u0027, \u0027broadband\u0027), (5, \u0027North\u0027, \u0027broadband\u0027);", + "sql": "SELECT R.Region, S.Service, COUNT(M.SubscriberID) AS MobileCount, COUNT(B.SubscriberID) AS BroadbandCount, (COUNT(M.SubscriberID)::float / (COUNT(M.SubscriberID) + COUNT(B.SubscriberID))) * 100 AS MobilePercent, (COUNT(B.SubscriberID)::float / (COUNT(M.SubscriberID) + COUNT(B.SubscriberID))) * 100 AS BroadbandPercent FROM MobileSubscribers M FULL OUTER JOIN BroadbandSubscribers B ON M.Region \u003d B.Region AND M.Service \u003d B.Service JOIN Regions R ON M.Region \u003d R.Region JOIN Services S ON M.Service \u003d S.Service GROUP BY R.Region, S.Service;", + "sql_explanation": "Performs a FULL OUTER JOIN on MobileSubscribers and BroadbandSubscribers tables, grouped by Region and Service. Calculates the count of subscribers for mobile and broadband and their respective percentage contributions to total subscribers in each service category." +}, { + "id": "301", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average usage time for mobile plans in each country?", + "sql_context": "CREATE TABLE usage (id INT, subscriber_id INT, plan_id INT, usage_time DECIMAL(10,2)); CREATE TABLE mobile_plans (id INT, name VARCHAR(255), type VARCHAR(255), price DECIMAL(10,2)); CREATE TABLE subscribers (id INT, name VARCHAR(255), plan_id INT, country VARCHAR(255)); CREATE TABLE countries (id INT, name VARCHAR(255));", + "sql": "SELECT countries.name AS country, AVG(usage_time) FROM usage JOIN mobile_plans ON usage.plan_id \u003d mobile_plans.id JOIN subscribers ON usage.subscriber_id \u003d subscribers.id JOIN countries ON subscribers.country \u003d countries.id GROUP BY countries.name;", + "sql_explanation": "This query retrieves the average usage time for mobile plans in each country by performing a join on the \"usage\" table, \"mobile_plans\" table, \"subscribers\" table, and \"countries\" table. It then groups the results by the \"country\" column and calculates the average usage time." +}, { + "id": "214", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total value of military equipment sales for each defense contractor, grouped by region?", + "sql_context": "CREATE TABLE contractor_regions (contractor_id INT, region VARCHAR(255)); INSERT INTO contractor_regions (contractor_id, region) VALUES (1, \u0027North America\u0027), (2, \u0027North America\u0027), (3, \u0027Europe\u0027);", + "sql": "SELECT r.region, d.contractor_name, SUM(s.sale_value) as total_sales FROM defense_contractors d INNER JOIN contractor_regions r ON d.contractor_id \u003d r.contractor_id INNER JOIN military_sales s ON d.contractor_id \u003d s.contractor_id GROUP BY r.region, d.contractor_name;", + "sql_explanation": "This query joins the defense_contractors, contractor_regions, and military_sales tables on the contractor_id field. It then calculates the total sales for each defense contractor by summing the sale_value field and grouping by region and contractor_name." +}, { + "id": "424", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total budget allocated for auditory assistance support programs in the \u0027West\u0027 region?", + "sql_context": "CREATE TABLE Regions (RegionID INT, RegionName VARCHAR(50)); CREATE TABLE SupportServices (ServiceID INT, ServiceName VARCHAR(50), ServiceType VARCHAR(50), Budget DECIMAL(10,2)); CREATE TABLE SupportPrograms (ProgramID INT, ProgramName VARCHAR(50), Budget DECIMAL(10,2), RegionID INT, ServiceID INT); INSERT INTO Regions (RegionID, RegionName) VALUES (1, \u0027Northeast\u0027), (2, \u0027Southeast\u0027), (3, \u0027Midwest\u0027), (4, \u0027South\u0027), (5, \u0027West\u0027); INSERT INTO SupportServices (ServiceID, ServiceName, ServiceType, Budget) VALUES (1, \u0027ASL Interpreter\u0027, \u0027SignLanguage\u0027, 15000), (2, \u0027Wheelchair Ramp\u0027, \u0027PhysicalAccess\u0027, 8000), (3, \u0027Braille Materials\u0027, \u0027VisualAssistance\u0027, 12000), (4, \u0027Assistive Listening Devices\u0027, \u0027AuditoryAssistance\u0027, 10000); INSERT INTO SupportPrograms (ProgramID, ProgramName, Budget, RegionID, ServiceID) VALUES (1, \u0027Braille Materials\u0027, 12000, 1, 3), (2, \u0027Low Vision Aids\u0027, 15000, 1, 4), (3, \u0027Color Contrast Software\u0027, 9000, 2, 4), (4, \u0027Screen Magnifiers\u0027, 11000, 2, 4);", + "sql": "SELECT SUM(sp.Budget) as TotalBudget FROM SupportPrograms sp JOIN Regions r ON sp.RegionID \u003d r.RegionID JOIN SupportServices ss ON sp.ServiceID \u003d ss.ServiceID WHERE ss.ServiceType \u003d \u0027AuditoryAssistance\u0027 AND r.RegionName \u003d \u0027West\u0027;", + "sql_explanation": "This query calculates the total budget for auditory assistance support programs in the \u0027West\u0027 region. It performs an inner join between the SupportPrograms, Regions, and SupportServices tables on the RegionID and ServiceID columns. Then, it filters the results based on the ServiceType column being \u0027AuditoryAssistance\u0027 and the RegionName column being \u0027West\u0027. Lastly, it calculates the total budget for the filtered results using the SUM function." +}, { + "id": "508", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of students with mobility impairments enrolled in support programs in the Northeast?", + "sql_context": "CREATE TABLE Students (student_id INT, name VARCHAR(50), mobility_impairment BOOLEAN); CREATE TABLE Support_Programs (program_id INT, state VARCHAR(50), student_id INT); CREATE TABLE Regions (region VARCHAR(50), state VARCHAR(50));", + "sql": "SELECT COUNT(DISTINCT S.student_id) FROM Students S INNER JOIN Support_Programs SP ON S.student_id \u003d SP.student_id INNER JOIN Regions R ON S.state \u003d R.state WHERE S.mobility_impairment \u003d TRUE AND R.region \u003d \u0027Northeast\u0027;", + "sql_explanation": "This query first creates three tables named \u0027Students\u0027, \u0027Support_Programs\u0027, and \u0027Regions\u0027 with 3, 3, and 2 columns, respectively. Then it inserts a sample record for a student with mobility impairment in the \u0027Students\u0027 table. The query then counts the total number of students with mobility impairments who are enrolled in support programs in the Northeast by performing an inner join on the \u0027Students\u0027, \u0027Support_Programs\u0027, and \u0027Regions\u0027 tables and filtering the data based on the given conditions." +}, { + "id": "571", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all students without accommodations who are enrolled in specific programs.", + "sql_context": "CREATE TABLE Student_Program_Mapping (student_id INT, program_id INT);", + "sql": "SELECT s.name as student_name FROM Students s LEFT JOIN Student_Program_Mapping spm ON s.id \u003d spm.student_id LEFT JOIN Support_Programs sp ON spm.program_id \u003d sp.id WHERE sp.name \u003d \u0027Special Ed\u0027 AND sp.id IS NULL;", + "sql_explanation": "This query joins the Students, Student_Program_Mapping, and Support_Programs tables using student_id and program_id, and lists all students without accommodations who are enrolled in specific programs." +}, { + "id": "584", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find total cost of accommodations per disability type for students in Texas.", + "sql_context": "CREATE TABLE Accommodations (id INT, student_id INT, disability_type VARCHAR(50), cost FLOAT);", + "sql": "SELECT d.disability_type, SUM(a.cost) as total_cost FROM Accommodations a JOIN Students s ON a.student_id \u003d s.id JOIN Disabilities d ON a.disability_type \u003d d.type WHERE s.state \u003d \u0027TX\u0027 GROUP BY d.disability_type;", + "sql_explanation": "This query joins the Accommodations, Students, and Disabilities tables using student_id and disability_type. It filters students from Texas and calculates the total cost per disability type." +}, { + "id": "156", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the total budget allocated to schools and hospitals in each city, ranked from highest to lowest.", + "sql_context": "CREATE TABLE cities (city_id INT, city_name VARCHAR(255)); CREATE TABLE schools (school_id INT, school_name VARCHAR(255), city_id INT, budget INT); CREATE TABLE hospitals (hospital_id INT, hospital_name VARCHAR(255), city_id INT, budget INT);", + "sql": "SELECT c.city_name, SUM(s.budget) as total_school_budget, SUM(h.budget) as total_hospital_budget FROM cities c LEFT JOIN schools s ON c.city_id \u003d s.city_id LEFT JOIN hospitals h ON c.city_id \u003d h.city_id GROUP BY c.city_name ORDER BY total_school_budget + total_hospital_budget DESC;", + "sql_explanation": "Join the cities, schools, and hospitals tables, and group by city to show the total budget allocated to schools and hospitals in each city. Use left joins to include cities with no schools or hospitals, and rank the results from highest to lowest total budget." +}, { + "id": "20", + "domain": "media entertainment", + "domain_description": "Audience demographics, viewership trends, content ratings, production budgets, and marketing strategies for movies, TV shows, and music.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the music genres with the lowest number of unique artists and their total album sales.", + "sql_context": "CREATE TABLE Artists (ArtistID INT, ArtistName VARCHAR(255), Genre VARCHAR(50)); CREATE TABLE Albums (AlbumID INT, AlbumName VARCHAR(255), Sales INT, ArtistID INT);", + "sql": "SELECT A.Genre, COUNT(DISTINCT A.ArtistName) AS Artists_Count, SUM(Albums.Sales) AS Total_Sales FROM Artists A INNER JOIN (SELECT ArtistID, COUNT(DISTINCT ArtistID) FROM Artists GROUP BY Genre HAVING COUNT(DISTINCT ArtistID) \u003d (SELECT MIN(ArtistCount) FROM (SELECT COUNT(DISTINCT ArtistID) AS ArtistCount FROM Artists GROUP BY Genre) T1)) B ON A.ArtistID \u003d B.ArtistID INNER JOIN Albums ON A.ArtistID \u003d Albums.ArtistID GROUP BY A.Genre;", + "sql_explanation": "This query identifies the music genres with the lowest number of unique artists and their total album sales. It first counts the number of unique artists for each genre and identifies the genre(s) with the lowest number of unique artists using a subquery. Then, it performs an inner join with the Artists table to get the genre information. Finally, it performs another inner join with the Albums table to calculate the total album sales for the identified genre(s)." +}, { + "id": "2191", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total revenue from ticket sales for each city?", + "sql_context": "CREATE TABLE cities (city_id INT, city VARCHAR(50));CREATE TABLE teams (team_id INT, team_name VARCHAR(50), city VARCHAR(50));CREATE TABLE tickets (ticket_id INT, team_id INT, price DECIMAL(5,2)); INSERT INTO cities (city_id, city) VALUES (1, \u0027Atlanta\u0027), (2, \u0027Boston\u0027); INSERT INTO teams (team_id, team_name, city) VALUES (1, \u0027Atlanta Hawks\u0027, \u0027Atlanta\u0027), (2, \u0027Boston Celtics\u0027, \u0027Boston\u0027); INSERT INTO tickets (ticket_id, team_id, price) VALUES (1, 1, 70.50), (2, 1, 80.00), (3, 2, 100.00);", + "sql": "SELECT c.city, SUM(t.price) FROM cities c JOIN teams t ON c.city \u003d t.city JOIN tickets ti ON t.team_id \u003d ti.team_id GROUP BY c.city;", + "sql_explanation": "We join the cities, teams, and tickets tables and then group by city to calculate the total revenue from ticket sales for each city." +}, { + "id": "116", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the total number of traditional artists and language speakers in each region.", + "sql_context": "CREATE TABLE regions (id INT, name VARCHAR); INSERT INTO regions (id, name) VALUES (1, \u0027Region A\u0027), (2, \u0027Region B\u0027); CREATE TABLE traditional_artists (id INT, region_id INT, art_type VARCHAR); INSERT INTO traditional_artists (id, region_id) VALUES (1, 1), (2, 2); CREATE TABLE language_speakers (id INT, region_id INT, language VARCHAR); INSERT INTO language_speakers (id, region_id) VALUES (1, 1), (2, 2);", + "sql": "SELECT regions.name, COUNT(traditional_artists.id) AS total_artists, COUNT(language_speakers.id) AS total_speakers FROM regions LEFT JOIN traditional_artists ON regions.id \u003d traditional_artists.region_id LEFT JOIN language_speakers ON regions.id \u003d language_speakers.region_id GROUP BY regions.id;", + "sql_explanation": "This query performs a left join on the regions, traditional_artists, and language_speakers tables, using the region_id as the common key. It then groups the results by the regions.id and counts the total number of traditional artists and language speakers in each region." +}, { + "id": "247", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the unique art forms in each region and the number of associated artifacts?", + "sql_context": "CREATE TABLE ArtForm (ArtFormID INT, ArtFormName VARCHAR(50), RegionID INT); INSERT INTO ArtForm (ArtFormID, ArtFormName, RegionID) VALUES (1, \u0027Batik\u0027, 1), (2, \u0027Ikat Weaving\u0027, 1), (3, \u0027Tambourine\u0027, 2), (4, \u0027Calligraphy\u0027, 2); CREATE TABLE Artifact (ArtifactID INT, ArtifactName VARCHAR(50), ArtFormID INT); INSERT INTO Artifact (ArtifactID, ArtifactName, ArtFormID) VALUES (1, \u0027Indonesian Batik Shawl\u0027, 1), (2, \u0027Bali Ikat Cloth\u0027, 1), (3, \u0027Tunisian Tambourine\u0027, 3), (4, \u0027Arabic Calligraphy Scroll\u0027, 4);", + "sql": "SELECT r.RegionName, a.ArtFormName, COUNT(a.ArtifactID) as ArtifactCount FROM ArtForm a JOIN (SELECT DISTINCT RegionID, RegionName FROM ArtForm) r ON a.RegionID \u003d r.RegionID JOIN Artifact art ON a.ArtFormID \u003d art.ArtFormID GROUP BY r.RegionName, a.ArtFormName;", + "sql_explanation": "The SQL query creates a derived table with distinct regions and region names from the ArtForm table. It then joins the ArtForm and Artifact tables on the ArtFormID foreign key and groups the results by RegionName and ArtFormName, counting the number of ArtifactIDs for each unique art form in each region." +}, { + "id": "211", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many IoT sensors were installed in rural and urban areas in each country in the past quarter?", + "sql_context": "CREATE TABLE country (id INTEGER, name TEXT);CREATE TABLE region (id INTEGER, country_id INTEGER, name TEXT, type TEXT);CREATE TABLE iot_sensor (id INTEGER, region_id INTEGER, installed_date DATE);", + "sql": "SELECT co.name as country, r.type as area_type, COUNT(s.id) as num_sensors FROM country co INNER JOIN region r ON co.id \u003d r.country_id INNER JOIN iot_sensor s ON r.id \u003d s.region_id WHERE s.installed_date \u003e\u003d DATEADD(quarter, -1, CURRENT_DATE) GROUP BY co.name, r.type;", + "sql_explanation": "The SQL query calculates the number of IoT sensors installed in rural and urban areas in each country in the past quarter by joining the country, region, and iot_sensor tables on the appropriate columns, filtering for records where the installed_date is within the past quarter, and then grouping the results by country and area type, counting the number of sensors for each group." +}, { + "id": "327", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total area of farmland and the number of crops for each farmer in the past month?", + "sql_context": "CREATE TABLE farmer (id INTEGER, name TEXT);CREATE TABLE farmland (id INTEGER, farmer_id INTEGER, type TEXT, area FLOAT, start_date DATE, end_date DATE);CREATE TABLE crop (id INTEGER, farmland_id INTEGER, type TEXT, planted_date DATE);", + "sql": "SELECT f.name, SUM(fl.area) as total_area, COUNT(c.id) as num_crops FROM farmer f INNER JOIN farmland fl ON f.id \u003d fl.farmer_id INNER JOIN crop c ON fl.id \u003d c.farmland_id WHERE c.planted_date \u003e\u003d DATEADD(month, -1, CURRENT_DATE) GROUP BY f.name;", + "sql_explanation": "The SQL query calculates the total area of farmland and the number of crops for each farmer in the past month by joining the farmer, farmland, and crop tables on the appropriate columns, filtering for records where the planted_date is within the past month, and then grouping the results by farmer name, calculating the total area and number of crops for each group." +}, { + "id": "360", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the maximum and minimum temperature for each crop type during the growing season", + "sql_context": "CREATE TABLE crop (id INT, name VARCHAR(255)); INSERT INTO crop (id, name) VALUES (1, \u0027Corn\u0027), (2, \u0027Soybeans\u0027), (3, \u0027Wheat\u0027); CREATE TABLE growing_season (id INT, crop_id INT, start_date DATE, end_date DATE); CREATE TABLE weather (id INT, crop_id INT, temperature DECIMAL(5,2), timestamp TIMESTAMP); INSERT INTO crop (id, name) VALUES (1, \u0027Corn\u0027), (2, \u0027Soybeans\u0027), (3, \u0027Wheat\u0027); INSERT INTO growing_season (id, crop_id, start_date, end_date) VALUES (1, 1, \u00272021-04-01\u0027, \u00272021-09-30\u0027), (2, 2, \u00272021-05-01\u0027, \u00272021-10-15\u0027), (3, 3, \u00272021-03-15\u0027, \u00272021-08-31\u0027); INSERT INTO weather (id, crop_id, temperature, timestamp) VALUES (1, 1, 22.5, \u00272021-04-02 10:00:00\u0027), (2, 2, 20.0, \u00272021-05-05 15:00:00\u0027), (3, 3, 28.0, \u00272021-03-20 09:00:00\u0027);", + "sql": "SELECT c.name, MAX(w.temperature) AS max_temp, MIN(w.temperature) AS min_temp FROM crop c JOIN growing_season gs ON c.id \u003d gs.crop_id JOIN weather w ON c.id \u003d w.crop_id AND w.timestamp BETWEEN gs.start_date AND gs.end_date GROUP BY c.name;", + "sql_explanation": "This query first joins the \u0027crop\u0027 table, \u0027growing_season\u0027 table, and \u0027weather\u0027 table based on crop_id and the timestamp within the growing season\u0027s start and end dates. Then, it groups the results by crop name and calculates the maximum and minimum temperature for each crop type during the growing season." +}, { + "id": "1258", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the names of farmers who grow crops with a growth stage of \u0027Harvest\u0027 but do not use Drip irrigation.", + "sql_context": "CREATE TABLE Farmers (id INT, name VARCHAR(50), age INT, country VARCHAR(50)); INSERT INTO Farmers (id, name, age, country) VALUES (1, \u0027Alexei Ivanov\u0027, 60, \u0027Russia\u0027); CREATE TABLE Crops (id INT, name VARCHAR(50), growth_stage VARCHAR(50), Farm_id INT); INSERT INTO Crops (id, name, growth_stage, Farm_id) VALUES (1, \u0027Wheat\u0027, \u0027Harvest\u0027, 1); CREATE TABLE Irrigation (id INT, Farm_id INT, irrigation_type VARCHAR(50), duration INT); INSERT INTO Irrigation (id, Farm_id, irrigation_type, duration) VALUES (1, 1, \u0027Sprinkler\u0027, 30);", + "sql": "SELECT f.name FROM Farmers f JOIN Crops c ON f.id \u003d c.Farm_id LEFT JOIN Irrigation i ON f.id \u003d i.Farm_id WHERE c.growth_stage \u003d \u0027Harvest\u0027 AND i.irrigation_type IS NULL;", + "sql_explanation": "This query joins the Farmers and Crops tables on the Farm_id column, then performs a left join on the Irrigation table with the same Farm_id column, filters on the growth_stage column, and checks for NULL in the irrigation_type column to find farmers who grow crops with a growth stage of \u0027Harvest\u0027 but do not use Drip irrigation." +}, { + "id": "388", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of bookings and revenue generated by each OTA for hotels in Berlin?", + "sql_context": "CREATE TABLE otas (ota_id INT, ota_name TEXT, bookings INT, revenue INT); INSERT INTO otas (ota_id, ota_name, bookings, revenue) VALUES (1, \u0027Booking.com\u0027, 100, 15000), (2, \u0027Expedia\u0027, 75, 11000), (3, \u0027Agoda\u0027, 50, 7500); CREATE TABLE bookings (booking_id INT, ota_id INT, hotel_id INT, bookings INT, revenue INT); INSERT INTO bookings (booking_id, ota_id, hotel_id, bookings, revenue) VALUES (1, 1, 1, 15, 225), (2, 1, 2, 10, 150), (3, 2, 3, 7, 110), (4, 2, 4, 3, 50), (5, 3, 5, 2, 35), (6, 3, 6, 1, 20); CREATE TABLE hotels (hotel_id INT, hotel_name TEXT, city TEXT); INSERT INTO hotels (hotel_id, hotel_name, city) VALUES (1, \u0027The Ritz Berlin\u0027, \u0027Berlin\u0027), (2, \u0027The Regent Berlin\u0027, \u0027Berlin\u0027), (3, \u0027The Mandala Hotel\u0027, \u0027Berlin\u0027), (4, \u0027The Adlon Kempinski Berlin\u0027, \u0027Berlin\u0027), (5, \u0027The Ritz-Carlton Berlin\u0027, \u0027Berlin\u0027), (6, \u0027The Hotel Adlon\u0027, \u0027Berlin\u0027);", + "sql": "SELECT ota_name, SUM(bookings) as total_bookings, SUM(revenue) as total_revenue FROM otas JOIN bookings ON otas.ota_id \u003d bookings.ota_id JOIN hotels ON bookings.hotel_id \u003d hotels.hotel_id WHERE hotels.city \u003d \u0027Berlin\u0027 GROUP BY ota_name;", + "sql_explanation": "The SQL query joins the otas, bookings, and hotels tables, filters the rows where the city is \u0027Berlin\u0027, groups the result by ota_name, and calculates the total number of bookings and revenue generated by each OTA for hotels in Berlin." +}, { + "id": "954", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many virtual tours have been engaged with for hotels that have implemented AI-powered solutions?", + "sql_context": "CREATE TABLE hotels (hotel_id INT, hotel_name TEXT, region TEXT); CREATE TABLE ai_solutions (solution_id INT, hotel_id INT, implemented_date DATE); CREATE TABLE virtual_tours (tour_id INT, hotel_id INT, engagement_score INT); INSERT INTO hotels (hotel_id, hotel_name, region) VALUES (1, \u0027Luxury Resort\u0027, \u0027APAC\u0027); INSERT INTO ai_solutions (solution_id, hotel_id, implemented_date) VALUES (1, 1, \u00272021-01-01\u0027); INSERT INTO virtual_tours (tour_id, hotel_id, engagement_score) VALUES (1, 1, 75);", + "sql": "SELECT COUNT(DISTINCT vt.hotel_id) AS total_tours_engaged FROM virtual_tours vt INNER JOIN hotels h ON vt.hotel_id \u003d h.hotel_id INNER JOIN ai_solutions ai ON h.hotel_id \u003d ai.hotel_id;", + "sql_explanation": "This SQL query counts the number of unique hotel_ids in the virtual_tours table where there is also a corresponding record in both the hotels and ai_solutions tables, indicating a hotel with implemented AI-powered solutions and virtual tour engagement data." +}, { + "id": "252", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many papers on algorithmic fairness were published in the past year by authors from each country, in the AI Research database?", + "sql_context": "CREATE TABLE authors (id INT, name VARCHAR(255), country VARCHAR(255)); INSERT INTO authors (id, name, country) VALUES (1, \u0027Alice\u0027, \u0027USA\u0027), (2, \u0027Bob\u0027, \u0027Canada\u0027); CREATE TABLE papers (id INT, title VARCHAR(255), published_date DATE, author_id INT); INSERT INTO papers (id, title, published_date, author_id) VALUES (1, \u0027Paper1\u0027, \u00272021-06-01\u0027, 1), (2, \u0027Paper2\u0027, \u00272020-12-25\u0027, 2); CREATE TABLE topics (id INT, paper_id INT, title VARCHAR(255)); INSERT INTO topics (id, paper_id, title) VALUES (1, 1, \u0027Algorithmic Fairness\u0027), (2, 2, \u0027AI Safety\u0027);", + "sql": "SELECT authors.country, COUNT(*) FROM papers JOIN authors ON papers.author_id \u003d authors.id JOIN topics ON papers.id \u003d topics.paper_id WHERE topics.title \u003d \u0027Algorithmic Fairness\u0027 AND YEAR(papers.published_date) \u003d YEAR(CURRENT_DATE()) GROUP BY authors.country;", + "sql_explanation": "This query counts the number of papers on algorithmic fairness published in the past year by authors from each country in the AI Research database. It joins the papers, authors, and topics tables based on their relationships and filters for papers published in the past year on algorithmic fairness. It then groups the results by the country of the authors and returns the count of the papers for each country." +}, { + "id": "875", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the researchers that have published papers in AI conferences held in the USA?", + "sql_context": "CREATE TABLE ai_conferences(id INT PRIMARY KEY, name VARCHAR(50), location VARCHAR(50)); INSERT INTO ai_conferences (id, name, location) VALUES (1, \u0027NeurIPS\u0027, \u0027USA\u0027), (2, \u0027ICML\u0027, \u0027Canada\u0027); CREATE TABLE ai_papers(id INT PRIMARY KEY, title VARCHAR(50), researcher_id INT, conference_id INT); INSERT INTO ai_papers (id, title, researcher_id, conference_id) VALUES (1, \u0027Fair AI\u0027, 1, 1), (2, \u0027AI Safety\u0027, 3, 2); CREATE TABLE researcher_conferences(researcher_id INT, conference_id INT); INSERT INTO researcher_conferences (researcher_id, conference_id) VALUES (1, 1), (2, 1), (3, 1), (3, 2);", + "sql": "SELECT DISTINCT r.name FROM ai_researcher r INNER JOIN researcher_conferences rc ON r.id \u003d rc.researcher_id INNER JOIN ai_conferences c ON rc.conference_id \u003d c.id WHERE c.location \u003d \u0027USA\u0027;", + "sql_explanation": "This query performs inner joins between the \u0027ai_researcher\u0027, \u0027researcher_conferences\u0027, and \u0027ai_conferences\u0027 tables, joining on the \u0027id\u0027 and \u0027researcher_id\u0027 columns, as well as the \u0027conference_id\u0027 columns. It then filters the results to only show rows where the \u0027location\u0027 column value in the \u0027ai_conferences\u0027 table is \u0027USA\u0027, and selects the distinct \u0027name\u0027 values from the \u0027ai_researcher\u0027 table." +}, { + "id": "177", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which organizations have received funding for climate change initiatives from investors based in the US?", + "sql_context": "CREATE TABLE organizations (org_id INT, org_name TEXT, country TEXT); INSERT INTO organizations (org_id, org_name, country) VALUES (1, \u0027Sample Org 1\u0027, \u0027USA\u0027), (2, \u0027Sample Org 2\u0027, \u0027Canada\u0027); CREATE TABLE investments (investment_id INT, org_id INT, investor_id INT, initiative_topic TEXT); INSERT INTO investments (investment_id, org_id, investor_id, initiative_topic) VALUES (1, 1, 3, \u0027Climate Change\u0027), (2, 2, 4, \u0027Education\u0027); CREATE TABLE investors (investor_id INT, investor_name TEXT, country TEXT); INSERT INTO investors (investor_id, investor_name, country) VALUES (3, \u0027US Investor 1\u0027, \u0027USA\u0027), (4, \u0027Canadian Investor 1\u0027, \u0027Canada\u0027);", + "sql": "SELECT organizations.org_name FROM organizations INNER JOIN investments ON organizations.org_id \u003d investments.org_id INNER JOIN investors ON investments.investor_id \u003d investors.investor_id WHERE organizations.country \u003d \u0027USA\u0027 AND investments.initiative_topic \u003d \u0027Climate Change\u0027;", + "sql_explanation": "This query first performs an inner join between the organizations and investments tables based on the org_id. Then, it performs another inner join between the resulting table and the investors table based on investor_id. The query filters the results to only include organizations from the USA that have received funding for climate change initiatives." +}, { + "id": "189", + "domain": "social impact investing", + "domain_description": "Detailed records on investment strategies, risk assessment, impact measurement, and ESG (Environmental, Social, and Governance) factors in social impact investing.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which social issues have the highest impact scores in projects located in Southeast Asia?", + "sql_context": "CREATE TABLE social_issues (id INT PRIMARY KEY, name VARCHAR(255), impact_score INT);CREATE TABLE projects (id INT PRIMARY KEY, name VARCHAR(255), location VARCHAR(255), budget DECIMAL(10,2));CREATE TABLE project_issues (project_id INT, issue_id INT, PRIMARY KEY (project_id, issue_id));CREATE VIEW high_impact_issues AS SELECT * FROM social_issues WHERE impact_score \u003e\u003d 75;", + "sql": "SELECT p.name, p.location, si.name as issue, si.impact_score FROM projects p JOIN project_issues pi ON p.id \u003d pi.project_id JOIN social_issues si ON pi.issue_id \u003d si.id WHERE p.location \u003d \u0027Southeast Asia\u0027 AND si.impact_score IN (SELECT impact_score FROM high_impact_issues);", + "sql_explanation": "This query joins the projects, project_issues, and social_issues tables, filtering for rows where the project location is Southeast Asia and the social issue impact score is in the high_impact_issues view. It returns the project name, location, associated high-impact social issues, and their impact scores." +}, { + "id": "110", + "domain": "startups venture capital", + "domain_description": "Company founding data, investment rounds, exit strategies, and diversity metrics in the startup ecosystem.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the investors in the most recent Series C round in the fintech sector?", + "sql_context": "CREATE TABLE investment_rounds (id INT, round_type TEXT, date DATE, company_id INT); INSERT INTO investment_rounds (id, round_type, date, company_id) VALUES (1, \u0027Series A\u0027, \u00272019-01-01\u0027, 1), (2, \u0027Series B\u0027, \u00272020-01-01\u0027, 2), (3, \u0027Series C\u0027, \u00272021-01-01\u0027, 3);", + "sql": "SELECT investors.name FROM investors JOIN investment_rounds ON investors.id \u003d investment_rounds.investor_id JOIN companies ON investment_rounds.company_id \u003d companies.id WHERE investment_rounds.round_type \u003d \u0027Series C\u0027 AND companies.industry \u003d \u0027Fintech\u0027 ORDER BY investment_rounds.date DESC LIMIT 1;", + "sql_explanation": "This query finds the investors in the most recent Series C round in the fintech sector. It does this by joining the investment_rounds and investors tables on the id field, and then joining the investment_rounds and companies tables on the company_id field. It filters for Series C rounds and the fintech sector, and then orders the results by date in descending order and selects the top row using LIMIT 1. Finally, it selects the name field from the investors table to get the name of the investor." +}, { + "id": "1219", + "domain": "startups venture capital", + "domain_description": "Company founding data, investment rounds, exit strategies, and diversity metrics in the startup ecosystem.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total funding for companies with female founders, in the healthcare industry, that have had an exit?", + "sql_context": "CREATE TABLE exit (id INT, company_id INT, type TEXT, value FLOAT); INSERT INTO exit (id, company_id, type, value) VALUES (1, 1, \u0027Acquisition\u0027, 200000000.0); CREATE TABLE company (id INT, name TEXT, industry TEXT, founder TEXT, PRIMARY KEY (id)); INSERT INTO company (id, name, industry, founder) VALUES (1, \u0027HeartCo\u0027, \u0027Healthcare\u0027, \u0027Female\u0027); CREATE TABLE investment (id INT, company_id INT, investor TEXT, year INT, amount FLOAT); INSERT INTO investment (id, company_id, investor, year, amount) VALUES (1, 1, \u0027Kleiner Perkins\u0027, 2017, 30000000.0);", + "sql": "SELECT SUM(i.amount) FROM investment i JOIN company c ON i.company_id \u003d c.id JOIN exit e ON c.id \u003d e.company_id WHERE c.founder \u003d \u0027Female\u0027 AND c.industry \u003d \u0027Healthcare\u0027;", + "sql_explanation": "This query joins the investment, company, and exit tables, filters for female-founded healthcare companies with exits, and calculates the total funding." +}, { + "id": "865", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "multiple_joins", + "sql_complexity_description": "two or more joins (specify inner, outer, cross)", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which buildings in Japan have the highest percentage of green roofs?", + "sql_context": "CREATE TABLE Building (id INT, name VARCHAR(50), city VARCHAR(50), country VARCHAR(50), sqft INT, PRIMARY KEY (id)); INSERT INTO Building (id, name, city, country, sqft) VALUES (9, \u0027Mount Fuji\u0027, \u0027Fujiyoshida\u0027, \u0027Japan\u0027, 3776600); INSERT INTO Building (id, name, city, country, sqft) VALUES (10, \u0027Himeji Castle\u0027, \u0027Himeji\u0027, \u0027Japan\u0027, 1045400); CREATE TABLE GreenRoof (id INT, building_id INT, planted_date DATE, size INT, PRIMARY KEY (id), FOREIGN KEY (building_id) REFERENCES Building (id)); INSERT INTO GreenRoof (id, building_id, planted_date, size) VALUES (9, 9, \u00272018-02-01\u0027, 400000); INSERT INTO GreenRoof (id, building_id, planted_date, size) VALUES (10, 10, \u00272015-06-01\u0027, 250000); CREATE TABLE Roof (id INT, building_id INT, size INT, PRIMARY KEY (id), FOREIGN KEY (building_id) REFERENCES Building (id)); INSERT INTO Roof (id, building_id, size) VALUES (9, 9, 800000); INSERT INTO Roof (id, building_id, size) VALUES (10, 10, 500000);", + "sql": "SELECT b.name, (g.size/r.size)*100 as \u0027% of Green Roof\u0027 FROM GreenRoof g JOIN Building b ON g.building_id \u003d b.id JOIN Roof r ON b.id \u003d r.building_id ORDER BY \u0027% of Green Roof\u0027 DESC LIMIT 1;", + "sql_explanation": "This SQL query returns the name of the building in Japan with the highest percentage of green roof." +}, { + "id": "317", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the names of the actors who have acted in both movies and TV shows.", + "sql_context": "CREATE TABLE actors (id INT, name VARCHAR(255)); CREATE TABLE roles (id INT, actor_id INT, media_id INT, media_type VARCHAR(10)); INSERT INTO actors VALUES (1, \u0027Actor A\u0027); INSERT INTO actors VALUES (2, \u0027Actor B\u0027); INSERT INTO roles VALUES (1, 1, 1, \u0027movie\u0027); INSERT INTO roles VALUES (2, 1, 2, \u0027tv_show\u0027); INSERT INTO roles VALUES (3, 2, 3, \u0027movie\u0027); INSERT INTO roles VALUES (4, 2, 4, \u0027tv_show\u0027);", + "sql": "SELECT a.name FROM actors a JOIN roles r ON a.id \u003d r.actor_id JOIN (SELECT media_id FROM roles WHERE media_type \u003d \u0027movie\u0027 INTERSECT SELECT media_id FROM roles WHERE media_type \u003d \u0027tv_show\u0027) intersect_table ON r.media_id \u003d intersect_table.media_id;", + "sql_explanation": "First, create two tables, actors and roles, with respective inserts. Then, find the names of the actors who have acted in both movies and TV shows using the JOIN and INTERSECT clauses." +}, { + "id": "613", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the titles of all videos and articles that contain words related to \u0027diversity\u0027 or \u0027inclusion\u0027.", + "sql_context": "CREATE TABLE articles (id INT, title TEXT, content TEXT); CREATE TABLE videos (id INT, title TEXT, url TEXT); INSERT INTO articles (id, title, content) VALUES (1, \u0027Article 1\u0027, \u0027This is an article about diversity\u0027); INSERT INTO videos (id, title, url) VALUES (1, \u0027Video 1\u0027, \u0027URL 1\u0027); INSERT INTO videos (id, title, url) VALUES (2, \u0027Video 2\u0027, \u0027URL 2\u0027); INSERT INTO articles (id, title, content) VALUES (2, \u0027Article 2\u0027, \u0027This is another article about inclusion\u0027);", + "sql": "SELECT title FROM articles WHERE lower(content) LIKE \u0027%diversity%\u0027 OR lower(content) LIKE \u0027%inclusion%\u0027 UNION SELECT title FROM videos WHERE lower(title) LIKE \u0027%diversity%\u0027 OR lower(title) LIKE \u0027%inclusion%\u0027;", + "sql_explanation": "The query first searches for the words \u0027diversity\u0027 or \u0027inclusion\u0027 in the content column of the articles table and returns the corresponding titles. Then, it searches for the words \u0027diversity\u0027 or \u0027inclusion\u0027 in the title column of the videos table and returns the corresponding titles. The UNION operator is used to combine the results from both queries." +}, { + "id": "702", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which media literacy programs have the highest and lowest average scores, and what are those scores?", + "sql_context": "CREATE TABLE programs (id INT, name VARCHAR(50), score INT); INSERT INTO programs (id, name, score) VALUES (1, \u0027Media Matters\u0027, 85), (2, \u0027News Literacy Project\u0027, 88), (3, \u0027Common Sense Media\u0027, 82);", + "sql": "SELECT name, score as highest_score FROM programs WHERE score \u003d (SELECT MAX(score) FROM programs) UNION SELECT name, score as lowest_score FROM programs WHERE score \u003d (SELECT MIN(score) FROM programs);", + "sql_explanation": "We first find the highest and lowest scores using subqueries with the MAX and MIN functions. Then, we use the UNION operator to combine the results from two SELECT statements: one to find the name and score of the highest-scoring program, and another to find the name and score of the lowest-scoring program." +}, { + "id": "1808", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Compare the CO2 emissions of the transportation sector in Japan and South Africa.", + "sql_context": "CREATE TABLE co2_emissions (country VARCHAR(20), sector VARCHAR(20), co2_emissions INT); INSERT INTO co2_emissions (country, sector, co2_emissions) VALUES (\u0027Japan\u0027, \u0027transportation\u0027, 240000), (\u0027South Africa\u0027, \u0027transportation\u0027, 180000);", + "sql": "SELECT co2_emissions FROM co2_emissions WHERE country \u003d \u0027Japan\u0027 INTERSECT SELECT co2_emissions FROM co2_emissions WHERE country \u003d \u0027South Africa\u0027;", + "sql_explanation": "The SQL query uses the INTERSECT operator to compare the CO2 emissions of the transportation sector in Japan and South Africa, returning only the common row(s) between the two queries." +}, { + "id": "2007", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average carbon price in the European Union Emissions Trading System over the last month?", + "sql_context": "CREATE TABLE carbon_prices (id INT, date DATE, price FLOAT); INSERT INTO carbon_prices (id, date, price) VALUES (1, \u00272022-01-01\u0027, 30.0), (2, \u00272022-01-02\u0027, 31.0), (3, \u00272022-01-03\u0027, 29.0);", + "sql": "SELECT AVG(price) FROM carbon_prices WHERE date \u003e\u003d DATEADD(day, -30, CURRENT_DATE) AND region \u003d \u0027European Union Emissions Trading System\u0027;", + "sql_explanation": "This query calculates the average carbon price in the European Union Emissions Trading System over the last month by taking the average of the price values in the carbon_prices table where the date is within the last 30 days and the region is the European Union Emissions Trading System." +}, { + "id": "780", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total number of players who have played both \"Arena Shooter\" and \"Battle Royale\" games.", + "sql_context": " CREATE TABLE Game (id INT, name VARCHAR(255)); INSERT INTO Game (id, name) VALUES (1, \u0027Arena Shooter\u0027), (2, \u0027Battle Royale\u0027); ", + "sql": "SELECT COUNT(DISTINCT PlayerId) FROM (SELECT PlayerId FROM GamePlayer G1 WHERE G1.name \u003d \u0027Arena Shooter\u0027 INTERSECT SELECT PlayerId FROM GamePlayer G2 WHERE G2.name \u003d \u0027Battle Royale\u0027) AS Subquery;", + "sql_explanation": "First, the INTERSECT operator is used to find the common PlayerId\u0027s that have played both \"Arena Shooter\" and \"Battle Royale\" games. Then, COUNT(DISTINCT PlayerId) is used to find the total number of players who have played both game types." +}, { + "id": "1218", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total number of games played by each team in the \u0027nba_games\u0027 table.", + "sql_context": "CREATE TABLE nba_teams (team_id INT, team_name VARCHAR(255)); INSERT INTO nba_teams (team_id, team_name) VALUES (1, \u0027Atlanta Hawks\u0027), (2, \u0027Boston Celtics\u0027); CREATE TABLE nba_games (game_id INT, home_team_id INT, away_team_id INT);", + "sql": "SELECT home_team_id AS team_id, COUNT(*) AS total_games FROM nba_games GROUP BY home_team_id UNION ALL SELECT away_team_id, COUNT(*) FROM nba_games GROUP BY away_team_id;", + "sql_explanation": "This query uses the UNION ALL operator to combine two SELECT statements. The first SELECT statement retrieves the COUNT of games where the home_team_id is equal to the team_id in the nba_teams table. The second SELECT statement retrieves the COUNT of games where the away_team_id is equal to the team_id in the nba_teams table. The result is a list of teams and their total number of games played." +}, { + "id": "42", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show all peacekeeping operations that have been conducted by the UN in the last decade, along with the number of participating countries.", + "sql_context": "CREATE TABLE PeacekeepingOperations (ID INT, OperationName TEXT, OperationDate DATE, ParticipatingCountries TEXT); INSERT INTO PeacekeepingOperations VALUES (1, \u0027Operation 1\u0027, \u00272012-01-01\u0027, \u0027USA, UK, France\u0027);", + "sql": "SELECT OperationName, ParticipatingCountries, COUNT(DISTINCT SUBSTRING_INDEX(ParticipatingCountries, \u0027,\u0027, n)) as NumberOfCountries FROM PeacekeepingOperations p CROSS JOIN (SELECT numbers.N FROM (SELECT 1 as N UNION ALL SELECT 2 UNION ALL SELECT 3) numbers) n WHERE OperationDate BETWEEN DATEADD(year, -10, GETDATE()) AND GETDATE() GROUP BY OperationName, ParticipatingCountries;", + "sql_explanation": "This query creates a table for PeacekeepingOperations and inserts data into it. The SQL query then uses a cross join to split the ParticipatingCountries column into multiple rows for each participating country. It filters the records based on the condition that OperationDate should be within the last 10 years. Lastly, it groups the records by OperationName and ParticipatingCountries and calculates the count of total participating countries." +}, { + "id": "47", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of peacekeeping operations and defense diplomacy events in the Indo-Pacific region?", + "sql_context": "CREATE TABLE Peacekeeping_Operations (Operation_ID INT, Operation_Name VARCHAR(50), Start_Date DATE, Region VARCHAR(50)); INSERT INTO Peacekeeping_Operations (Operation_ID, Operation_Name, Start_Date, Region) VALUES (1, \u0027Operation Pacific Partnership\u0027, \u00272006-01-01\u0027, \u0027Indo-Pacific\u0027); CREATE TABLE Defense_Diplomacy (Event_ID INT, Event_Name VARCHAR(50), Start_Date DATE, Region VARCHAR(50)); INSERT INTO Defense_Diplomacy (Event_ID, Event_Name, Start_Date, Region) VALUES (1, \u0027Defense Dialogue\u0027, \u00272018-01-01\u0027, \u0027Indo-Pacific\u0027);", + "sql": "SELECT SUM(CASE WHEN Table_Name \u003d \u0027Peacekeeping_Operations\u0027 THEN 1 ELSE 0 END) as Total_Peacekeeping_Operations, SUM(CASE WHEN Table_Name \u003d \u0027Defense_Diplomacy\u0027 THEN 1 ELSE 0 END) as Total_Defense_Diplomacy FROM (SELECT \u0027Peacekeeping_Operations\u0027 as Table_Name FROM Peacekeeping_Operations UNION ALL SELECT \u0027Defense_Diplomacy\u0027 as Table_Name FROM Defense_Diplomacy) as T;", + "sql_explanation": "This SQL query uses a subquery to count the number of peacekeeping operations and defense diplomacy events in the Indo-Pacific region by summing the results of a case statement that checks the table name." +}, { + "id": "2667", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which military aircraft are used by both the United States and China?", + "sql_context": "CREATE TABLE us_aircraft (id INT, name VARCHAR(50), country VARCHAR(50)); INSERT INTO us_aircraft (id, name, country) VALUES (1, \u0027F-15 Eagle\u0027, \u0027USA\u0027), (2, \u0027F-16 Fighting Falcon\u0027, \u0027USA\u0027); CREATE TABLE china_aircraft (id INT, name VARCHAR(50), country VARCHAR(50)); INSERT INTO china_aircraft (id, name, country) VALUES (3, \u0027J-10\u0027, \u0027China\u0027), (4, \u0027J-11\u0027, \u0027China\u0027);", + "sql": "SELECT name FROM us_aircraft WHERE country \u003d \u0027USA\u0027 INTERSECT SELECT name FROM china_aircraft WHERE country \u003d \u0027China\u0027;", + "sql_explanation": "This query first selects the names of military aircraft from the us_aircraft table where the country is \u0027USA\u0027. Then, it intersects the result with the names of military aircraft from the china_aircraft table where the country is \u0027China\u0027. This returns the common military aircraft names used by both countries." +}, { + "id": "703", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the names of all countries that had more tourists visiting in 2021 compared to 2020.", + "sql_context": "CREATE TABLE tourism_stats (country VARCHAR(20), year INT, tourists INT); INSERT INTO tourism_stats (country, year, tourists) VALUES (\u0027Japan\u0027, 2020, 12000), (\u0027Japan\u0027, 2021, 15000), (\u0027France\u0027, 2020, 18000), (\u0027France\u0027, 2021, 20000), (\u0027Germany\u0027, 2020, 10000), (\u0027Germany\u0027, 2021, 12000), (\u0027Italy\u0027, 2020, 9000), (\u0027Italy\u0027, 2021, 11000);", + "sql": "SELECT country FROM tourism_stats WHERE country IN (SELECT country FROM tourism_stats WHERE year \u003d 2021 INTERSECT SELECT country FROM tourism_stats WHERE year \u003d 2020) AND tourists_2021 \u003e tourists_2020;", + "sql_explanation": "This query first identifies the countries that appear in both the 2021 and 2020 data sets using the INTERSECT operator, and then checks the tourists column for those countries to see if there was an increase in visitors in 2021 compared to 2020." +}, { + "id": "530", + "domain": "cosmetics", + "domain_description": "Consumer preference data, ingredient sourcing information, product safety records, and cruelty-free certification metrics.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which products have passing safety inspections and use ingredients sourced from the USA and Mexico?", + "sql_context": "CREATE TABLE IngredientSources (ProductID INT, Ingredient VARCHAR(50), SourceCountry VARCHAR(50), PRIMARY KEY (ProductID, Ingredient)); INSERT INTO IngredientSources (ProductID, Ingredient, SourceCountry) VALUES (1, \u0027Water\u0027, \u0027Canada\u0027), (1, \u0027Glycerin\u0027, \u0027USA\u0027), (2, \u0027Water\u0027, \u0027Mexico\u0027), (2, \u0027Glycerin\u0027, \u0027Canada\u0027); CREATE TABLE ProductSafetyRecords (ProductID INT, InspectionDate DATE, Result ENUM(\u0027Pass\u0027, \u0027Fail\u0027)); INSERT INTO ProductSafetyRecords (ProductID, InspectionDate, Result) VALUES (1, \u00272021-01-01\u0027, \u0027Pass\u0027), (1, \u00272021-02-01\u0027, \u0027Pass\u0027), (2, \u00272021-01-01\u0027, \u0027Fail\u0027);", + "sql": "SELECT ProductID FROM IngredientSources WHERE SourceCountry IN (\u0027USA\u0027, \u0027Mexico\u0027) GROUP BY ProductID HAVING COUNT(DISTINCT SourceCountry) \u003d 2 INTERSECT SELECT ProductID FROM ProductSafetyRecords WHERE Result \u003d \u0027Pass\u0027;", + "sql_explanation": "Find products that have passing safety inspections and use ingredients sourced from both the USA and Mexico by grouping by ProductID and checking if there is a unique count (COUNT(DISTINCT SourceCountry) \u003d 2) for each SourceCountry, intersecting with the list of ProductID from the ProductSafetyRecords table that have passing inspections." +}, { + "id": "1300", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the mental health scores of students who have not participated in professional development programs and those who have.", + "sql_context": "CREATE TABLE student_scores (student_id INT, mental_health_score INT, participated_in_pd BOOLEAN); INSERT INTO student_scores (student_id, mental_health_score, participated_in_pd) VALUES (1, 75, true), (2, 80, false), (3, 60, true);", + "sql": "SELECT mental_health_score FROM student_scores WHERE participated_in_pd \u003d true UNION SELECT mental_health_score FROM student_scores WHERE participated_in_pd \u003d false;", + "sql_explanation": "This query uses UNION to combine the mental health scores (SELECT mental_health_score) of students who have participated in professional development programs (WHERE participated_in_pd \u003d true) and those who have not (WHERE participated_in_pd \u003d false)." +}, { + "id": "612", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the birthdates of policyholders who have policies in both the \u0027Auto\u0027 and \u0027Life\u0027 categories, and whose birthdate is before January 1, 1985?", + "sql_context": "CREATE TABLE Policyholder (PolicyholderID INT, Name TEXT, Birthdate DATE, PolicyType TEXT); INSERT INTO Policyholder (PolicyholderID, Name, Birthdate, PolicyType) VALUES (1, \u0027John Doe\u0027, \u00271980-05-01\u0027, \u0027Auto\u0027), (2, \u0027Jane Smith\u0027, \u00271970-10-15\u0027, \u0027Life\u0027), (3, \u0027Mike Johnson\u0027, \u00271975-02-22\u0027, \u0027Auto\u0027), (4, \u0027Alice Williams\u0027, \u00272000-03-27\u0027, \u0027Life\u0027);", + "sql": "SELECT Birthdate FROM Policyholder WHERE PolicyType \u003d \u0027Auto\u0027 INTERSECT SELECT Birthdate FROM Policyholder WHERE PolicyType \u003d \u0027Life\u0027 INTERSECT SELECT Birthdate FROM Policyholder WHERE Birthdate \u003c \u00271985-01-01\u0027;", + "sql_explanation": "First, we select the Birthdate column from the Policyholder table where the PolicyType is \u0027Auto\u0027. Then, we use the INTERSECT operator to find the common records between this set and the set of records where the PolicyType is \u0027Life\u0027. Next, we use the INTERSECT operator again to find the common records between this set and the set of records where the Birthdate is before January 1, 1985. This returns the birthdates of policyholders who have policies in both the \u0027Auto\u0027 and \u0027Life\u0027 categories, and whose birthdate is before January 1, 1985." +}, { + "id": "104", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all the unique tags used in articles related to \u0027climate change\u0027 across \u0027The New York Times\u0027 and \u0027The Washington Post\u0027.", + "sql_context": "CREATE TABLE nytimes (id INT, title VARCHAR(255), content TEXT, tags TEXT, pub_date DATE); INSERT INTO nytimes (id, title, content, tags, pub_date) VALUES (1, \u0027Title1\u0027, \u0027Content1\u0027, \u0027climate change,tag1\u0027, \u00272022-01-01\u0027); CREATE TABLE wa_post (id INT, title VARCHAR(255), content TEXT, tags TEXT, pub_date DATE); INSERT INTO wa_post (id, title, content, tags, pub_date) VALUES (1, \u0027Title2\u0027, \u0027Content2\u0027, \u0027climate change,tag2\u0027, \u00272022-01-02\u0027);", + "sql": "SELECT DISTINCT trim(SPLIT_PART(tags, \u0027,\u0027, n)) as tag FROM (SELECT tags, generate_series(1, ARRAY_LENGTH(string_to_array(tags, \u0027,\u0027))) as n FROM (SELECT tags FROM nytimes WHERE lower(tags) like \u0027%climate change%\u0027 UNION ALL SELECT tags FROM wa_post WHERE lower(tags) like \u0027%climate change%\u0027) subquery) sq;", + "sql_explanation": "This query first extracts the tags related to \u0027climate change\u0027 from the \u0027tags\u0027 column of the \u0027nytimes\u0027 and \u0027wa_post\u0027 tables. Then, it splits the tags using the SPLIT_PART function and trims any whitespaces. Finally, it selects the unique tags using the DISTINCT operator." +}, { + "id": "258", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify any investigative journalism projects published by \u0027CCTV\u0027 or \u0027Al Arabiya\u0027 but not by \u0027BBC Persian\u0027.", + "sql_context": "CREATE TABLE cctv (project_id INT, project_name VARCHAR(50), source VARCHAR(20), investigative_journalism BOOLEAN); INSERT INTO cctv (project_id, project_name, source, investigative_journalism) VALUES (1, \u0027Project A\u0027, \u0027CCTV\u0027, TRUE), (2, \u0027Project B\u0027, \u0027CCTV\u0027, FALSE); CREATE TABLE al_arabiya (project_id INT, project_name VARCHAR(50), source VARCHAR(20), investigative_journalism BOOLEAN); INSERT INTO al_arabiya (project_id, project_name, source, investigative_journalism) VALUES (3, \u0027Project C\u0027, \u0027Al Arabiya\u0027, TRUE), (4, \u0027Project D\u0027, \u0027Al Arabiya\u0027, FALSE); CREATE TABLE bbc_persian (project_id INT, project_name VARCHAR(50), source VARCHAR(20), investigative_journalism BOOLEAN); INSERT INTO bbc_persian (project_id, project_name, source, investigative_journalism) VALUES (5, \u0027Project E\u0027, \u0027BBC Persian\u0027, TRUE), (6, \u0027Project F\u0027, \u0027BBC Persian\u0027, FALSE);", + "sql": "SELECT project_name, source FROM cctv WHERE investigative_journalism \u003d TRUE UNION ALL SELECT project_name, source FROM al_arabiya WHERE investigative_journalism \u003d TRUE EXCEPT SELECT project_name, source FROM bbc_persian WHERE investigative_journalism \u003d TRUE;", + "sql_explanation": "Find investigative projects for \u0027CCTV\u0027 and \u0027Al Arabiya\u0027 and combine them using UNION ALL. Remove projects that also appear in \u0027BBC Persian\u0027 using EXCEPT." +}, { + "id": "5036", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the difference in the number of pieces between \u0027Painting\u0027 and \u0027Drawing\u0027 tables?", + "sql_context": "CREATE TABLE Painting (id INT PRIMARY KEY, name VARCHAR(50), artist VARCHAR(50), date DATE); CREATE TABLE Drawing (id INT PRIMARY KEY, name VARCHAR(50), artist VARCHAR(50), date DATE);", + "sql": "SELECT COUNT(*) FROM Painting EXCEPT SELECT COUNT(*) FROM Drawing;", + "sql_explanation": "This query uses the EXCEPT operator to calculate the difference between the number of rows in the \u0027Painting\u0027 table and the \u0027Drawing\u0027 table." +}, { + "id": "2988", + "domain": "oceanography", + "domain_description": "Marine life research data, ocean floor mapping projects, pollution control initiatives, and maritime law compliance.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of marine life research stations in the Atlantic and Pacific Oceans?", + "sql_context": "CREATE TABLE atlantic_ocean (id INT, station TEXT); CREATE TABLE pacific_ocean (id INT, station TEXT); INSERT INTO atlantic_ocean (id, station) VALUES (1, \u0027Research Station A\u0027), (2, \u0027Research Station B\u0027); INSERT INTO pacific_ocean (id, station) VALUES (1, \u0027Research Station C\u0027), (2, \u0027Research Station D\u0027);", + "sql": "SELECT COUNT(*) FROM (SELECT * FROM atlantic_ocean UNION ALL SELECT * FROM pacific_ocean) AS ocean_stations;", + "sql_explanation": "Calculate the total number of marine life research stations in the Atlantic and Pacific Oceans by using a subquery with the UNION ALL operator to combine the records from both tables and returning the count of records." +}, { + "id": "1955", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the minimum number of hospital beds in hospitals and clinics in rural areas?", + "sql_context": "CREATE TABLE Hospitals (name VARCHAR(255), location VARCHAR(255), type VARCHAR(255), num_beds INT); INSERT INTO Hospitals (name, location, type, num_beds) VALUES (\u0027Rural General Hospital\u0027, \u0027Springfield\u0027, \u0027Hospital\u0027, 50), (\u0027Rural Critical Access Hospital\u0027, \u0027Maplewood\u0027, \u0027Hospital\u0027, 10); CREATE TABLE Clinics (name VARCHAR(255), location VARCHAR(255), type VARCHAR(255), num_beds INT); INSERT INTO Clinics (name, location, type, num_beds) VALUES (\u0027Rural Community Health Center\u0027, \u0027Oakwood\u0027, \u0027Clinic\u0027, 0);", + "sql": "SELECT MIN(num_beds) FROM Hospitals WHERE location LIKE \u0027%rural%\u0027 UNION ALL SELECT MIN(num_beds) FROM Clinics WHERE location LIKE \u0027%rural%\u0027;", + "sql_explanation": "This SQL query calculates the minimum number of hospital beds in hospitals and clinics in rural areas by using the UNION ALL operator to combine the minimum values of the num_beds column from the Hospitals and Clinics tables, where the location contains the word \u0027rural\u0027." +}, { + "id": "2306", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of hospitals and clinics in rural areas of Canada and the United States?", + "sql_context": "CREATE TABLE hospitals (id INT, name TEXT, location TEXT, country TEXT); INSERT INTO hospitals (id, name, location, country) VALUES (1, \u0027Hospital A\u0027, \u0027Rural Canada\u0027, \u0027Canada\u0027), (2, \u0027Hospital B\u0027, \u0027Rural USA\u0027, \u0027USA\u0027); CREATE TABLE clinics (id INT, name TEXT, location TEXT, country TEXT); INSERT INTO clinics (id, name, location, country) VALUES (1, \u0027Clinic A\u0027, \u0027Rural Canada\u0027, \u0027Canada\u0027), (2, \u0027Clinic B\u0027, \u0027Rural USA\u0027, \u0027USA\u0027);", + "sql": "SELECT COUNT(*) FROM hospitals WHERE location LIKE \u0027%rural%\u0027 UNION ALL SELECT COUNT(*) FROM clinics WHERE location LIKE \u0027%rural%\u0027", + "sql_explanation": "This query first counts the number of hospitals in rural areas of Canada and the United States, then combines it with the count of clinics in rural areas using the UNION ALL operator." +}, { + "id": "3053", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What\u0027s the total number of members in the \u0027government\u0027 and \u0027media\u0027 unions?", + "sql_context": "CREATE TABLE government_union (id INT, name VARCHAR, dob DATE); INSERT INTO government_union (id, name, dob) VALUES (1, \u0027Quinn\u0027, \u00271995-01-05\u0027); CREATE TABLE media_union (id INT, name VARCHAR, dob DATE); INSERT INTO media_union (id, name, dob) VALUES (1, \u0027Rachel\u0027, \u00271998-09-30\u0027);", + "sql": "SELECT COUNT(*) FROM ( (SELECT * FROM government_union) UNION (SELECT * FROM media_union) ) AS all_unions;", + "sql_explanation": "The SQL query first creates two tables, \u0027government_union\u0027 and \u0027media_union\u0027, and inserts some records. Then, it uses a UNION operator to combine the records from both tables. Lastly, it calculates the total number of records (members) in the combined table." +}, { + "id": "826", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the genetic research projects that have not received private funding and list the funding sources for each.", + "sql_context": "CREATE TABLE genetic_research (project_id INT, project_name VARCHAR(100), partner VARCHAR(50)); INSERT INTO genetic_research VALUES (1, \u0027Genome Mapping\u0027, \u0027Public\u0027); INSERT INTO genetic_research VALUES (2, \u0027DNA Sequencing\u0027, \u0027Private\u0027); INSERT INTO genetic_research VALUES (3, \u0027Gene Therapy\u0027, \u0027Public-Private\u0027);", + "sql": "SELECT project_name, partner AS funding_source FROM genetic_research WHERE partner \u003c\u003e \u0027Private\u0027 UNION SELECT project_name, \u0027No Private Funding\u0027 FROM genetic_research WHERE partner \u003d \u0027Private\u0027;", + "sql_explanation": "This SQL query identifies genetic research projects that have not received private funding (WHERE partner \u003d \u0027Private\u0027) and lists the funding sources for each project. It does so by combining two SELECT statements using UNION. The first statement selects the project name and partner (renamed to funding_source) if the partner is not \u0027Private\u0027. The second statement selects the project name and the text \u0027No Private Funding\u0027 if the partner is \u0027Private\u0027." +}, { + "id": "2015", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all suppliers who provide materials for both wind energy and solar energy systems.", + "sql_context": "CREATE TABLE suppliers (supplier_id INT, supplier_name VARCHAR(50), energy_type VARCHAR(50)); INSERT INTO suppliers (supplier_id, supplier_name, energy_type) VALUES (1, \u0027SupplierA\u0027, \u0027Wind\u0027), (2, \u0027SupplierB\u0027, \u0027Solar\u0027), (3, \u0027SupplierC\u0027, \u0027Wind\u0027), (4, \u0027SupplierD\u0027, \u0027Solar\u0027), (5, \u0027SupplierE\u0027, \u0027Wind\u0027), (6, \u0027SupplierF\u0027, \u0027Solar\u0027), (7, \u0027SupplierG\u0027, \u0027Both\u0027);", + "sql": "SELECT supplier_name FROM suppliers WHERE energy_type \u003d \u0027Wind\u0027 INTERSECT SELECT supplier_name FROM suppliers WHERE energy_type \u003d \u0027Solar\u0027;", + "sql_explanation": "This query uses the INTERSECT operator to select the supplier_name column from the suppliers table, filtering rows where the energy_type value is \u0027Wind\u0027 and \u0027Solar\u0027. The INTERSECT operator returns only the common rows between these two SELECT statements." +}, { + "id": "46", + "domain": "mental health", + "domain_description": "In-depth data on mental health conditions, treatment approaches, patient outcomes, and public awareness campaigns in mental health.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the percentage of patients who received therapy in \u0027clinic_5\u0027 and \u0027clinic_6\u0027", + "sql_context": "CREATE TABLE clinic_5 (patient_id INT, therapy_received BOOLEAN); INSERT INTO clinic_5 (patient_id, therapy_received) VALUES (1, true), (2, false), (3, true), (12, false); CREATE TABLE clinic_6 (patient_id INT, therapy_received BOOLEAN); INSERT INTO clinic_6 (patient_id, therapy_received) VALUES (4, true), (5, true), (6, false), (13, true);", + "sql": "SELECT clinic, COUNT(*) * 100.0 / total_patients AS therapy_percentage FROM (SELECT \u0027clinic_5\u0027 AS clinic, therapy_received FROM clinic_5 UNION ALL SELECT \u0027clinic_6\u0027 AS clinic, therapy_received FROM clinic_6) AS all_clinics CROSS JOIN (SELECT COUNT(*) AS total_patients FROM (SELECT patient_id FROM clinic_5 UNION SELECT patient_id FROM clinic_6)) AS totals GROUP BY clinic;", + "sql_explanation": "Calculate the percentage of patients who received therapy in clinics 5 and 6. First, create two tables, clinic_5, clinic_6, each with two columns: patient_id and therapy_received. Insert the provided records into each table. Then, compute the percentage of patients who received therapy in both clinics using the UNION ALL operator to combine the tables, and group the results by clinic using the GROUP BY clause." +}, { + "id": "2973", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "find the number of healthcare facilities and the number of unique ZIP codes in the HealthcareFacilities table, using an INTERSECT operator", + "sql_context": "CREATE TABLE HealthcareFacilities (ID INT, Name TEXT, ZipCode TEXT, City TEXT, State TEXT, Capacity INT); INSERT INTO HealthcareFacilities (ID, Name, ZipCode, City, State, Capacity) VALUES (1, \u0027General Hospital\u0027, \u002712345\u0027, \u0027Anytown\u0027, \u0027NY\u0027, 500), (2, \u0027Community Clinic\u0027, \u002767890\u0027, \u0027Othertown\u0027, \u0027NY\u0027, 100);", + "sql": "SELECT COUNT(*) FROM HealthcareFacilities INTERSECT SELECT COUNT(DISTINCT ZipCode) FROM HealthcareFacilities;", + "sql_explanation": "The SQL query uses the INTERSECT operator to find the number of records that are both healthcare facilities and unique ZIP codes in the HealthcareFacilities table." +}, { + "id": "3067", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "find the number of healthcare facilities and the number of unique ZIP codes in the HealthcareFacilities table, using an EXCEPT operator", + "sql_context": "CREATE TABLE HealthcareFacilities (ID INT, Name TEXT, ZipCode TEXT, City TEXT, State TEXT, Capacity INT); INSERT INTO HealthcareFacilities (ID, Name, ZipCode, City, State, Capacity) VALUES (1, \u0027General Hospital\u0027, \u002712345\u0027, \u0027Anytown\u0027, \u0027NY\u0027, 500), (2, \u0027Community Clinic\u0027, \u002767890\u0027, \u0027Othertown\u0027, \u0027NY\u0027, 100);", + "sql": "SELECT COUNT(*) FROM HealthcareFacilities EXCEPT SELECT COUNT(DISTINCT ZipCode) FROM HealthcareFacilities;", + "sql_explanation": "The SQL query uses the EXCEPT operator to find the number of records that are healthcare facilities, but not unique ZIP codes in the HealthcareFacilities table." +}, { + "id": "4072", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many healthcare access issues were reported in the Central and Antarctic regions?", + "sql_context": "CREATE TABLE central_access (region VARCHAR(255), issue VARCHAR(255)); INSERT INTO central_access (region, issue) VALUES (\u0027Central\u0027, \u0027Lack of Primary Care Physicians\u0027); INSERT INTO central_access (region, issue) VALUES (\u0027Central\u0027, \u0027Long Wait Times\u0027); CREATE TABLE antarctic_access (region VARCHAR(255), issue VARCHAR(255)); INSERT INTO antarctic_access (region, issue) VALUES (\u0027Antarctic\u0027, \u0027Lack of Medical Specialists\u0027); INSERT INTO antarctic_access (region, issue) VALUES (\u0027Antarctic\u0027, \u0027Emergency Response Time\u0027);", + "sql": "SELECT COUNT(*) FROM central_access UNION ALL SELECT COUNT(*) FROM antarctic_access;", + "sql_explanation": "The SQL query uses the UNION ALL operator to combine the count of issues from both central_access and antarctic_access tables." +}, { + "id": "124", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of crime incidents, emergency calls, and fire incidents in each district?", + "sql_context": "CREATE TABLE crime_incidents (id INT, district VARCHAR(20), type VARCHAR(20)); INSERT INTO crime_incidents (id, district, type) VALUES (1, \u0027district1\u0027, \u0027theft\u0027), (2, \u0027district2\u0027, \u0027burglary\u0027); CREATE TABLE emergency_calls (id INT, district VARCHAR(20), type VARCHAR(20)); INSERT INTO emergency_calls (id, district, type) VALUES (1, \u0027district1\u0027, \u0027emergency call\u0027); CREATE TABLE fire_incidents (id INT, district VARCHAR(20), type VARCHAR(20)); INSERT INTO fire_incidents (id, district, type) VALUES (1, \u0027district2\u0027, \u0027fire incident\u0027);", + "sql": "SELECT district, \u0027crime incidents\u0027 AS type, COUNT(*) FROM crime_incidents GROUP BY district UNION ALL SELECT district, \u0027emergency calls\u0027 AS type, COUNT(*) FROM emergency_calls GROUP BY district UNION ALL SELECT district, \u0027fire incidents\u0027 AS type, COUNT(*) FROM fire_incidents GROUP BY district;", + "sql_explanation": "This query uses the UNION ALL operator to combine the count of crime incidents, emergency calls, and fire incidents in each district." +}, { + "id": "3804", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of police officers and firefighters in the city of New York?", + "sql_context": "CREATE TABLE nyc_police_officers (id INT, officer_name VARCHAR(255), officer_type VARCHAR(255)); INSERT INTO nyc_police_officers (id, officer_name, officer_type) VALUES (1, \u0027James Brown\u0027, \u0027Detective\u0027); CREATE TABLE nyc_firefighters (id INT, firefighter_name VARCHAR(255), firefighter_type VARCHAR(255)); INSERT INTO nyc_firefighters (id, firefighter_name, firefighter_type) VALUES (1, \u0027Sarah Johnson\u0027, \u0027Fire Captain\u0027);", + "sql": "SELECT COUNT(*) FROM nyc_police_officers UNION ALL SELECT COUNT(*) FROM nyc_firefighters;", + "sql_explanation": "The SQL query counts the number of police officers and firefighters separately using the COUNT function and UNION ALL operator to combine the results." +}, { + "id": "1456", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the names of all climate finance recipients who received funding in both 2019 and 2020.", + "sql_context": "CREATE TABLE climate_finance_2019 (recipient_name TEXT, funding_year INTEGER); INSERT INTO climate_finance_2019 (recipient_name, funding_year) VALUES (\u0027Recipient A\u0027, 2019), (\u0027Recipient B\u0027, 2019), (\u0027Recipient A\u0027, 2020); CREATE TABLE climate_finance_2020 (recipient_name TEXT, funding_year INTEGER); INSERT INTO climate_finance_2020 (recipient_name, funding_year) VALUES (\u0027Recipient A\u0027, 2020), (\u0027Recipient C\u0027, 2020);", + "sql": "SELECT recipient_name FROM climate_finance_2019 WHERE funding_year \u003d 2019 INTERSECT SELECT recipient_name FROM climate_finance_2020 WHERE funding_year \u003d 2020;", + "sql_explanation": "The query uses the INTERSECT operator to find the recipient_name records in the climate_finance_2019 table with a funding_year of 2019 and the recimate_finance_2020 table with a funding_year of 2020, displaying the names of recipients who received funding in both years." +}, { + "id": "77", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the unique genres of music that have been streamed in at least one country in each continent (North America, South America, Europe, Asia, Africa, and Australia).", + "sql_context": "CREATE TABLE Streams (country TEXT, genre TEXT); INSERT INTO Streams (country, genre) VALUES (\u0027USA\u0027, \u0027Pop\u0027), (\u0027USA\u0027, \u0027Rock\u0027), (\u0027Brazil\u0027, \u0027Samba\u0027), (\u0027France\u0027, \u0027Jazz\u0027), (\u0027Japan\u0027, \u0027Pop\u0027), (\u0027Kenya\u0027, \u0027Reggae\u0027), (\u0027Australia\u0027, \u0027Pop\u0027);", + "sql": "SELECT genre FROM Streams WHERE country IN (SELECT DISTINCT country FROM (SELECT \u0027North America\u0027 as continent UNION ALL SELECT \u0027South America\u0027 UNION ALL SELECT \u0027Europe\u0027 UNION ALL SELECT \u0027Asia\u0027 UNION ALL SELECT \u0027Africa\u0027 UNION ALL SELECT \u0027Australia\u0027) as Continents) GROUP BY genre HAVING COUNT(DISTINCT country) \u003e\u003d 6;", + "sql_explanation": "The SQL query identifies the unique genres of music that have been streamed in at least one country in each continent, by using a subquery to create a table of distinct continent names, and then using the IN set operation to select the countries within those continents. The query then groups the results by genre and filters for genres that have been streamed in at least 6 distinct countries." +}, { + "id": "242", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many electric vehicle charging stations are there in each of the following cities: New York, Los Angeles, and Toronto?", + "sql_context": "CREATE TABLE ny_charging_stations (station_id INT, location VARCHAR(100), city VARCHAR(50), is_ev_charging BOOLEAN); CREATE TABLE la_charging_stations (station_id INT, location VARCHAR(100), city VARCHAR(50), is_ev_charging BOOLEAN); CREATE TABLE toronto_charging_stations (station_id INT, location VARCHAR(100), city VARCHAR(50), is_ev_charging BOOLEAN);", + "sql": "SELECT city, COUNT(*) FROM ny_charging_stations WHERE is_ev_charging \u003d TRUE UNION ALL SELECT city, COUNT(*) FROM la_charging_stations WHERE is_ev_charging \u003d TRUE UNION ALL SELECT city, COUNT(*) FROM toronto_charging_stations WHERE is_ev_charging \u003d TRUE;", + "sql_explanation": "Count the number of electric vehicle charging stations in New York, Los Angeles, and Toronto using the WHERE clause to filter for EV charging stations only. Then, combine the results using the UNION ALL operator." +}, { + "id": "2", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "data manipulation", + "sql_task_type_description": "inserting, updating, or deleting records", + "sql_prompt": "Insert new data into the \u0027cosmetic_ingredients\u0027 table for a vegan eyeshadow product by brand \u0027Ara\u0027 with ingredients \u0027Mica\u0027, \u0027Iron Oxide\u0027, \u0027Titanium Dioxide\u0027 and \u0027Zinc Oxide\u0027.", + "sql_context": "CREATE TABLE cosmetic_ingredients (ingredient_id INT, product_name TEXT, brand_name TEXT, ingredient_type TEXT);", + "sql": "INSERT INTO cosmetic_ingredients (ingredient_id, product_name, brand_name, ingredient_type) VALUES (NULL, \u0027Vegan Eyeshadow\u0027, \u0027Ara\u0027, \u0027Ingredient\u0027); INSERT INTO cosmetic_ingredients (ingredient_id, product_name, brand_name, ingredient_type, ingredient_name) SELECT ingredient_id, \u0027Vegan Eyeshadow\u0027, \u0027Ara\u0027, \u0027Ingredient\u0027, \u0027Mica\u0027 FROM cosmetic_ingredients WHERE ingredient_name \u003d \u0027Mica\u0027 UNION ALL SELECT NULL, \u0027Vegan Eyeshadow\u0027, \u0027Ara\u0027, \u0027Ingredient\u0027, \u0027Iron Oxide\u0027 UNION ALL SELECT NULL, \u0027Vegan Eyeshadow\u0027, \u0027Ara\u0027, \u0027Ingredient\u0027, \u0027Titanium Dioxide\u0027 UNION ALL SELECT NULL, \u0027Vegan Eyeshadow\u0027, \u0027Ara\u0027, \u0027Ingredient\u0027, \u0027Zinc Oxide\u0027;", + "sql_explanation": "This query first inserts a new record with the required product and brand details. Then, it inserts the specified ingredients using UNION ALL to combine multiple INSERT statements. It first checks if \u0027Mica\u0027 exists in the table to reuse its ingredient_id and avoid duplicates." +}, { + "id": "170", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which products from European brands have been reviewed more than 50 times and have a preference rating above 4?", + "sql_context": "CREATE TABLE Product_Reviews (review_id INT, product_id INT, review_text TEXT, helpful_votes INT, review_date DATE, preference_rating INT); INSERT INTO Product_Reviews (review_id, product_id, review_text, helpful_votes, review_date, preference_rating) VALUES (1, 1001, \u0027Lovely product\u0027, 55, \u00272021-09-01\u0027, 4), (2, 1002, \u0027Great color\u0027, 75, \u00272021-08-15\u0027, 5), (3, 1003, \u0027Disappointing quality\u0027, 35, \u00272021-07-01\u0027, 2), (4, 1004, \u0027Good but expensive\u0027, 60, \u00272021-06-10\u0027, 4); CREATE TABLE Product_Details (product_id INT, brand_id INT, brand TEXT, country TEXT); INSERT INTO Product_Details (product_id, brand_id, brand, country) VALUES (1001, 1, \u0027EcoWise Cosmetics\u0027, \u0027France\u0027), (1002, 2, \u0027Nature’s Beauty\u0027, \u0027Germany\u0027), (1003, 3, \u0027Green Visions\u0027, \u0027Italy\u0027), (1004, 4, \u0027Pure \u0026 Simple\u0027, \u0027Spain\u0027);", + "sql": "SELECT product_id FROM Product_Reviews WHERE preference_rating \u003e 4 INTERSECT SELECT product_id FROM Product_Details WHERE country IN (SELECT country FROM Product_Details WHERE country IN (\u0027France\u0027, \u0027Germany\u0027, \u0027Italy\u0027, \u0027Spain\u0027) GROUP BY country HAVING COUNT(DISTINCT brand) \u003e 1);", + "sql_explanation": "This query selects the product IDs from the Product_Reviews table where the preference rating is above 4 and intersects it with the product IDs from the Product_Details table where the country is in the list of European countries with more than one brand. It then uses the INTERSECT operator to find the intersection of these two sets." +}, { + "id": "10", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average sales quantity and sales amount for each drug, unpivoted and with a total row?", + "sql_context": "CREATE TABLE SalesData (drug_name VARCHAR(255), sales_quantity INT, sales_amount DECIMAL(10,2)); INSERT INTO SalesData (drug_name, sales_quantity, sales_amount) VALUES (\u0027DrugK\u0027, 200, 30000.00), (\u0027DrugL\u0027, 100, 15000.00), (\u0027DrugM\u0027, 150, 25000.00);", + "sql": "SELECT drug_name, \u0027sales_quantity\u0027 as metric, AVG(sales_quantity) as value FROM SalesData GROUP BY drug_name UNION ALL SELECT drug_name, \u0027sales_amount\u0027 as metric, AVG(sales_amount) as value FROM SalesData GROUP BY drug_name UNION ALL SELECT \u0027Total\u0027, AVG(value) as value FROM (SELECT drug_name, \u0027sales_quantity\u0027 as metric, AVG(sales_quantity) as value FROM SalesData GROUP BY drug_name UNION ALL SELECT drug_name, \u0027sales_amount\u0027 as metric, AVG(sales_amount) as value FROM SalesData GROUP BY drug_name) sub;", + "sql_explanation": "This SQL query calculates the average sales quantity and sales amount for each drug, unpivoted and with a total row. It uses the UNION ALL operator to combine the results of three separate SELECT statements. The first SELECT statement gets the average sales quantities for each drug, the second gets the average sales amounts for each drug, and the third gets the average sales quantities and sales amounts for all drugs. The first and second SELECT statements use a GROUP BY clause to group the results by drug name. The third SELECT statement uses a subquery to get the average sales quantities and sales amounts for all drugs." +}, { + "id": "1682", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which drugs had zero sales in \u00272019\u0027 but not in \u00272018\u0027 and \u00272020\u0027?", + "sql_context": "CREATE TABLE sales_yearly_2(year int, drug varchar(10), revenue int); INSERT INTO sales_yearly_2(year, drug, revenue) VALUES(2018, \u0027DrugM\u0027, 500), (2019, \u0027DrugM\u0027, 0), (2020, \u0027DrugM\u0027, 600);", + "sql": "SELECT drug FROM sales_yearly_2 WHERE revenue \u003d 0 AND year \u003d 2019 INTERSECT SELECT drug FROM sales_yearly_2 WHERE year IN (2018, 2020) AND revenue \u003e 0", + "sql_explanation": "Find drugs with zero sales in \u00272019\u0027 but not in \u00272018\u0027 and \u00272020\u0027 using the INTERSECT clause." +}, { + "id": "1806", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many bridges, dams, and tunnels are there in total in the \u0027public_works\u0027 schema?", + "sql_context": "CREATE TABLE public_works.bridges (bridge_id INT, name VARCHAR(255)); CREATE TABLE public_works.dams (dam_id INT, name VARCHAR(255)); CREATE TABLE public_works.tunnels (tunnel_id INT, name VARCHAR(255)); INSERT INTO public_works.bridges (bridge_id, name) VALUES (1, \u0027Golden Gate\u0027), (2, \u0027Verrazano-Narrows\u0027); INSERT INTO public_works.dams (dam_id, name) VALUES (1, \u0027Hoover\u0027), (2, \u0027Grand Coulee\u0027); INSERT INTO public_works.tunnels (tunnel_id, name) VALUES (1, \u0027Channel Tunnel\u0027), (2, \u0027Seikan Tunnel\u0027), (3, \u0027Gotthard Base Tunnel\u0027);", + "sql": "SELECT COUNT(*) FROM (SELECT * FROM public_works.bridges UNION ALL SELECT * FROM public_works.dams UNION ALL SELECT * FROM public_works.tunnels);", + "sql_explanation": "This query combines the records from the \u0027bridges\u0027, \u0027dams\u0027, and \u0027tunnels\u0027 tables in the \u0027public_works\u0027 schema using the UNION ALL operator, and then uses the COUNT() function to count the total number of records." +}, { + "id": "4292", + "domain": "civil engineering", + "domain_description": "Infrastructure development data, engineering design standards, public works project information, and resilience metrics.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many projects are there in total in the \u0027Renewable_Energy\u0027 and \u0027Transportation\u0027 tables?", + "sql_context": "CREATE TABLE Renewable_Energy (project_id INT, project_name VARCHAR(50), location VARCHAR(50)); INSERT INTO Renewable_Energy (project_id, project_name, location) VALUES (1, \u0027Solar Farm\u0027, \u0027Arizona\u0027); INSERT INTO Renewable_Energy (project_id, project_name, location) VALUES (2, \u0027Wind Turbine Installation\u0027, \u0027Oregon\u0027); CREATE TABLE Transportation (project_id INT, project_name VARCHAR(50), location VARCHAR(50)); INSERT INTO Transportation (project_id, project_name, location) VALUES (1, \u0027Bridge Replacement\u0027, \u0027Texas\u0027); INSERT INTO Transportation (project_id, project_name, location) VALUES (2, \u0027Road Construction\u0027, \u0027Florida\u0027);", + "sql": "SELECT COUNT(*) FROM Renewable_Energy UNION SELECT COUNT(*) FROM Transportation;", + "sql_explanation": "This SQL query counts the number of projects in the \u0027Renewable_Energy\u0027 and \u0027Transportation\u0027 tables separately using the COUNT function, and then combines the results using the UNION operator." +}, { + "id": "4686", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of farmers and farming organizations in the \u0027rural_development\u0027 schema?", + "sql_context": "CREATE TABLE farmers(id INT, name VARCHAR(50), age INT); INSERT INTO farmers VALUES (1, \u0027John Doe\u0027, 45); CREATE TABLE farming_orgs(id INT, name VARCHAR(50), members INT); INSERT INTO farming_orgs VALUES (1, \u0027FarmCo\u0027, 30);", + "sql": "SELECT COUNT(*) FROM farmers UNION ALL SELECT COUNT(*) FROM farming_orgs;", + "sql_explanation": "The SQL query first counts the number of rows in the \u0027farmers\u0027 table, then counts the number of rows in the \u0027farming_orgs\u0027 table using the UNION ALL operator to combine the two results." +}, { + "id": "2471", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which customers have investments in both the US and Canadian stock markets?", + "sql_context": "CREATE TABLE Investments (CustomerID INT, Market VARCHAR(20), Value DECIMAL(10,2)); INSERT INTO Investments (CustomerID, Market, Value) VALUES (1, \u0027US\u0027, 5000); INSERT INTO Investments (CustomerID, Market, Value) VALUES (2, \u0027Canada\u0027, 3000);", + "sql": "SELECT CustomerID FROM Investments WHERE Market \u003d \u0027US\u0027 INTERSECT SELECT CustomerID FROM Investments WHERE Market \u003d \u0027Canada\u0027", + "sql_explanation": "This query identifies the customers who have investments in both the US and Canadian stock markets by intersecting the results for each market." +}, { + "id": "1845", + "domain": "sustainable energy", + "domain_description": "Renewable energy project data, energy efficiency stats, carbon offset programs, and clean energy policy trends.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total installed capacity (in MW) of all renewable energy projects in the \u0027south\u0027 region?", + "sql_context": "CREATE TABLE wind_farms (id INT, name TEXT, region TEXT, capacity_mw FLOAT); INSERT INTO wind_farms (id, name, region, capacity_mw) VALUES (1, \u0027Windfarm A\u0027, \u0027west\u0027, 150.5); INSERT INTO wind_farms (id, name, region, capacity_mw) VALUES (2, \u0027Windfarm B\u0027, \u0027east\u0027, 120.2); CREATE TABLE solar_power_plants (id INT, name TEXT, region TEXT, capacity_mw FLOAT); INSERT INTO solar_power_plants (id, name, region, capacity_mw) VALUES (1, \u0027Solar Plant A\u0027, \u0027north\u0027, 125.8); INSERT INTO solar_power_plants (id, name, region, capacity_mw) VALUES (2, \u0027Solar Plant B\u0027, \u0027south\u0027, 180.3);", + "sql": "SELECT SUM(capacity_mw) FROM wind_farms WHERE region \u003d \u0027south\u0027 UNION ALL SELECT SUM(capacity_mw) FROM solar_power_plants WHERE region \u003d \u0027south\u0027;", + "sql_explanation": "This query calculates the sum of capacity_mw in the wind_farms and solar_power_plants tables where the region is \u0027south\u0027 using the UNION ALL operator to combine the results." +}, { + "id": "365", + "domain": "telecommunications", + "domain_description": "Mobile and broadband subscriber data, network infrastructure investments, customer usage patterns, and regulatory compliance information.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of mobile and broadband subscribers for each technology, ranked in descending order?", + "sql_context": "CREATE TABLE mobile_subscribers (subscriber_id INT, technology VARCHAR(20), region VARCHAR(50)); INSERT INTO mobile_subscribers (subscriber_id, technology, region) VALUES (1, \u00274G\u0027, \u0027North\u0027), (2, \u00275G\u0027, \u0027North\u0027), (3, \u00273G\u0027, \u0027South\u0027), (4, \u00275G\u0027, \u0027East\u0027); CREATE TABLE broadband_subscribers (subscriber_id INT, technology VARCHAR(20), region VARCHAR(50)); INSERT INTO broadband_subscribers (subscriber_id, technology, region) VALUES (5, \u0027Fiber\u0027, \u0027North\u0027), (6, \u0027Cable\u0027, \u0027North\u0027), (7, \u0027Fiber\u0027, \u0027West\u0027), (8, \u0027DSL\u0027, \u0027East\u0027);", + "sql": "SELECT \u0027Mobile\u0027 AS source, technology, COUNT(*) AS total FROM mobile_subscribers GROUP BY technology UNION ALL SELECT \u0027Broadband\u0027 AS source, technology, COUNT(*) AS total FROM broadband_subscribers GROUP BY technology ORDER BY total DESC;", + "sql_explanation": "The query combines the Mobile and Broadband subscribers data using the UNION ALL operator, grouping them by technology, and adding a source identifier to distinguish between the two. It then ranks the total number of subscribers for each technology in descending order." +}, { + "id": "2169", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of defense projects with Boeing and Raytheon combined?", + "sql_context": "CREATE TABLE boeing_defense_projects (project_id INT, project_name VARCHAR(50), country VARCHAR(50)); CREATE TABLE raytheon_defense_projects (project_id INT, project_name VARCHAR(50), country VARCHAR(50));", + "sql": "SELECT COUNT(*) FROM (SELECT * FROM boeing_defense_projects UNION ALL SELECT * FROM raytheon_defense_projects) AS combined_projects;", + "sql_explanation": "The SQL query uses the UNION ALL operator to combine the defense projects from Boeing and Raytheon into a single dataset. Then, it calculates the total number of projects in the combined dataset." +}, { + "id": "146", + "domain": "sports entertainment", + "domain_description": "Sports team performance data, fan demographics, ticket sales statistics, and athlete wellbeing programs.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the total number of games played per team, and rank them in descending order.", + "sql_context": "CREATE TABLE Teams (TeamID INT, TeamName VARCHAR(255));CREATE TABLE Games (GameID INT, HomeTeam VARCHAR(255), AwayTeam VARCHAR(255), GameDate DATE);", + "sql": "SELECT HomeTeam AS TeamName, COUNT(*) AS TotalGames, ROW_NUMBER() OVER (PARTITION BY HomeTeam ORDER BY COUNT(*) DESC) AS Rank FROM Games GROUP BY HomeTeam UNION ALL SELECT AwayTeam, COUNT(*), ROW_NUMBER() OVER (PARTITION BY AwayTeam ORDER BY COUNT(*) DESC) FROM Games GROUP BY AwayTeam;", + "sql_explanation": "The SQL query calculates the total number of games played per team for both the home and away teams using the COUNT() function. It assigns a rank to each team based on the total number of games played in descending order using the ROW_NUMBER() function. The UNION ALL operator is used to combine the results for the home and away teams." +}, { + "id": "403", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find artists who have mastered traditional arts from Africa, Asia, and South America.", + "sql_context": "CREATE TABLE Artists (ArtistID int, ArtistName text, Specialization text); INSERT INTO Artists (ArtistID, ArtistName, Specialization) VALUES (1, \u0027Amina Ahmed\u0027, \u0027Indian Miniature Painting\u0027), (2, \u0027Bertina Lopes\u0027, \u0027Mozambican Modern Art\u0027), (3, \u0027Fernando de Szyszlo\u0027, \u0027Peruvian Abstract Art\u0027);", + "sql": "SELECT ArtistName FROM Artists WHERE Specialization LIKE \u0027%African%\u0027 INTERSECT SELECT ArtistName FROM Artists WHERE Specialization LIKE \u0027%Asian%\u0027 INTERSECT SELECT ArtistName FROM Artists WHERE Specialization LIKE \u0027%South% American%\u0027;", + "sql_explanation": "This query uses the INTERSECT set operation to return artists who have mastered traditional arts from Africa, Asia, and South America." +}, { + "id": "569", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many heritage sites (tangible and intangible) are there in the Amazon region?", + "sql_context": "CREATE TABLE tangible_heritage (id INT, name VARCHAR(255), region VARCHAR(255)); INSERT INTO tangible_heritage (id, name, region) VALUES (1, \u0027Historic Centre of Salvador de Bahia\u0027, \u0027Amazon\u0027); CREATE TABLE intangible_heritage (id INT, name VARCHAR(255), region VARCHAR(255)); INSERT INTO intangible_heritage (id, name, region) VALUES (1, \u0027Maroon Arts and Culture of Suriname\u0027, \u0027Amazon\u0027);", + "sql": "SELECT COUNT(*) FROM (SELECT \u0027tangible\u0027 as type, t.name FROM tangible_heritage t WHERE t.region \u003d \u0027Amazon\u0027 UNION ALL SELECT \u0027intangible\u0027 as type, i.name FROM intangible_heritage i WHERE i.region \u003d \u0027Amazon\u0027) AS h;", + "sql_explanation": "The SQL query first creates a subquery that combines the tangible_heritage and intangible_heritage tables into one table with a type column that identifies the type of heritage site. Then, it selects the number of records in the subquery where the region is \u0027Amazon\u0027." +}, { + "id": "2292", + "domain": "cultural preservation", + "domain_description": "In-depth data on heritage sites, traditional arts, language preservation, and community engagement in cultural preservation.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which heritage sites are located in France and Spain?", + "sql_context": "CREATE TABLE HeritageSites (SiteID int, SiteName text, Country text); INSERT INTO HeritageSites (SiteID, SiteName, Country) VALUES (1, \u0027Eiffel Tower\u0027, \u0027France\u0027), (2, \u0027Mont Saint-Michel\u0027, \u0027France\u0027), (3, \u0027Alhambra\u0027, \u0027Spain\u0027);", + "sql": "SELECT SiteName FROM HeritageSites WHERE Country \u003d \u0027France\u0027 INTERSECT SELECT SiteName FROM HeritageSites WHERE Country \u003d \u0027Spain\u0027;", + "sql_explanation": "This query uses the INTERSECT set operation to return sites that are present in both the French and Spanish subsets of the HeritageSites table." +}, { + "id": "1087", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of \u0027Primate\u0027 species in \u0027Refuge G\u0027 and \u0027Refuge H\u0027?", + "sql_context": "CREATE TABLE RefugeG(animal_id INT, species VARCHAR(20), refuge VARCHAR(10)); INSERT INTO RefugeG VALUES (1, \u0027Howler Monkey\u0027, \u0027RefugeG\u0027), (2, \u0027Spider Monkey\u0027, \u0027RefugeG\u0027), (3, \u0027Tamarin\u0027, \u0027RefugeG\u0027); CREATE TABLE RefugeH(animal_id INT, species VARCHAR(20), refuge VARCHAR(10)); INSERT INTO RefugeH VALUES (4, \u0027Orangutan\u0027, \u0027RefugeH\u0027), (5, \u0027Gibbon\u0027, \u0027RefugeH\u0027), (6, \u0027Lemur\u0027, \u0027RefugeH\u0027);", + "sql": "SELECT SUM(qty) FROM (SELECT COUNT(*) as qty FROM RefugeG WHERE species LIKE \u0027%Primate%\u0027 UNION ALL SELECT COUNT(*) as qty FROM RefugeH WHERE species LIKE \u0027%Primate%\u0027) as total;", + "sql_explanation": "The SQL query first counts the number of \u0027Primate\u0027 species in \u0027RefugeG\u0027 and \u0027RefugeH\u0027 using the UNION ALL operator to combine the two counts. Then, the SUM function adds up these two numbers to give the total number of \u0027Primate\u0027 species in both refuges." +}, { + "id": "2511", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which animals are present in both \u0027National Park B\u0027 and \u0027Conservation Area C\u0027?", + "sql_context": "CREATE TABLE Habitats (HabitatID INT, HabitatName TEXT, Location TEXT); INSERT INTO Habitats (HabitatID, HabitatName, Location) VALUES (2, \u0027National Park B\u0027, \u0027Country B\u0027); INSERT INTO Habitats (HabitatID, HabitatName, Location) VALUES (3, \u0027Conservation Area C\u0027, \u0027Country C\u0027); CREATE TABLE AnimalHabitats (AnimalID INT, HabitatID INT); INSERT INTO AnimalHabitats (AnimalID, HabitatID) VALUES (1, 2); INSERT INTO AnimalHabitats (AnimalID, HabitatID) VALUES (1, 3);", + "sql": "SELECT AnimalID FROM AnimalHabitats WHERE HabitatID \u003d 2 INTERSECT SELECT AnimalID FROM AnimalHabitats WHERE HabitatID \u003d 3;", + "sql_explanation": "The SQL query uses the INTERSECT operator to find AnimalIDs that are present in both the \u0027National Park B\u0027 and \u0027Conservation Area C\u0027 habitat records." +}, { + "id": "2779", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of animals in \u0027Refuge A\u0027 and \u0027Refuge B\u0027?", + "sql_context": "CREATE TABLE RefugeA(animal_id INT, species VARCHAR(20), refuge VARCHAR(10)); INSERT INTO RefugeA VALUES (1, \u0027Bear\u0027, \u0027RefugeA\u0027), (2, \u0027Deer\u0027, \u0027RefugeA\u0027), (3, \u0027Raccoon\u0027, \u0027RefugeA\u0027); CREATE TABLE RefugeB(animal_id INT, species VARCHAR(20), refuge VARCHAR(10)); INSERT INTO RefugeB VALUES (4, \u0027Fox\u0027, \u0027RefugeB\u0027), (5, \u0027Rabbit\u0027, \u0027RefugeB\u0027), (6, \u0027Bear\u0027, \u0027RefugeB\u0027);", + "sql": "SELECT SUM(qty) FROM (SELECT COUNT(*) as qty FROM RefugeA UNION ALL SELECT COUNT(*) as qty FROM RefugeB) as total;", + "sql_explanation": "The SQL query first counts the number of animals in \u0027RefugeA\u0027 and \u0027RefugeB\u0027 using the UNION ALL operator to combine the two counts. Then, the SUM function adds up these two numbers to give the total number of animals in both refuges." +}, { + "id": "3749", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of animals in the animal_population table and the total number of community education programs?", + "sql_context": "CREATE TABLE if NOT EXISTS animal_population (animal_id INT, animal_name VARCHAR(50), conservation_status VARCHAR(20)); INSERT INTO animal_population (animal_id, animal_name, conservation_status) VALUES (1, \u0027Giant Panda\u0027, \u0027Vulnerable\u0027); INSERT INTO animal_population (animal_id, animal_name, conservation_status) VALUES (2, \u0027Tiger\u0027, \u0027Endangered\u0027); INSERT INTO animal_population (animal_id, animal_name, conservation_status) VALUES (3, \u0027Elephant\u0027, \u0027Vulnerable\u0027); CREATE TABLE if NOT EXISTS community_education (program_id INT, program_name VARCHAR(50), donation_count INT); INSERT INTO community_education (program_id, program_name, donation_count) VALUES (1, \u0027Wildlife Conservation 101\u0027, 500); INSERT INTO community_education (program_id, program_name, donation_count) VALUES (2, \u0027Endangered Species Awareness\u0027, 300);", + "sql": "SELECT COUNT(*) FROM animal_population UNION ALL SELECT COUNT(*) FROM community_education;", + "sql_explanation": "Use the UNION ALL operator to combine the count of animals in the animal_population table and the count of community education programs to get the total number of animals and the total number of community education programs." +}, { + "id": "3300", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "set operations", + "sql_complexity_description": "set operations such as UNION, INTERSECT, and EXCEPT", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of green buildings and smart city projects in their respective schemas?", + "sql_context": "CREATE SCHEMA if not exists green_buildings; CREATE TABLE if not exists green_buildings.buildings (id INT, building_name VARCHAR, country VARCHAR, co2_emissions FLOAT); CREATE SCHEMA if not exists smart_cities; CREATE TABLE if not exists smart_cities.projects (id INT, project_name VARCHAR, location VARCHAR); INSERT INTO green_buildings.buildings (id, building_name, country, co2_emissions) VALUES (1, \u0027Green Building 1\u0027, \u0027USA\u0027, 100), (2, \u0027Green Building 2\u0027, \u0027Canada\u0027, 120); INSERT INTO smart_cities.projects (id, project_name, location) VALUES (1, \u0027Smart City 1\u0027, \u0027USA\u0027), (2, \u0027Smart City 2\u0027, \u0027Germany\u0027);", + "sql": "SELECT COUNT(*) FROM green_buildings.buildings UNION ALL SELECT COUNT(*) FROM smart_cities.projects;", + "sql_explanation": "This query calculates the total number of green buildings and smart city projects in their respective schemas by performing a union all operation between two count queries, one for each schema." +}, { + "id": "509", + "domain": "media", + "domain_description": "Media data on content diversity, media literacy, disinformation detection, and media representation.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the title and genre of the top 3 movies with the highest ratings from studios based in Italy, ordered by ratings in descending order.", + "sql_context": "CREATE TABLE movies (title VARCHAR(255), genre VARCHAR(255), studio VARCHAR(255), rating FLOAT); INSERT INTO movies (title, genre, studio, rating) VALUES (\u0027Movie19\u0027, \u0027Action\u0027, \u0027Italy Studio1\u0027, 8.5), (\u0027Movie20\u0027, \u0027Drama\u0027, \u0027Italy Studio2\u0027, 9.0);", + "sql": "SELECT title, genre FROM (SELECT title, genre, studio, rating, ROW_NUMBER() OVER (PARTITION BY studio ORDER BY rating DESC) as rank FROM movies WHERE studio LIKE \u0027%Italy%\u0027) subquery WHERE rank \u003c\u003d 3 ORDER BY rating DESC;", + "sql_explanation": "The query finds the title and genre of the top 3 movies with the highest ratings from studios based in Italy, ordered by ratings in descending order. It uses a subquery with the ROW_NUMBER() function to rank movies by rating within each studio and filters the top 3 movies in the outer query." +}, { + "id": "166", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of successful Mars missions launched per year by the United States, ordered from the earliest to latest?", + "sql_context": "CREATE TABLE MarsMissions (MissionID INT, Name VARCHAR(50), LaunchCountry VARCHAR(50), LaunchDate DATE, Success BOOLEAN); INSERT INTO MarsMissions VALUES (1, \u0027Mariner 4\u0027, \u0027United States\u0027, \u00271964-11-28\u0027, true); INSERT INTO MarsMissions VALUES (2, \u0027Mariner 6\u0027, \u0027United States\u0027, \u00271969-02-25\u0027, true); INSERT INTO MarsMissions VALUES (3, \u0027Mariner 7\u0027, \u0027United States\u0027, \u00271969-03-27\u0027, true);", + "sql": "SELECT LaunchCountry, AVG(DATEDIFF(YEAR, LaunchDate, LEAD(LaunchDate) OVER (PARTITION BY LaunchCountry ORDER BY LaunchDate))) as avg_launches_per_year FROM MarsMissions WHERE LaunchCountry \u003d \u0027United States\u0027 AND Success \u003d true GROUP BY LaunchCountry ORDER BY avg_launches_per_year", + "sql_explanation": "Calculate the average number of successful Mars missions launched per year by the United States using DATEDIFF and LEAD to find the difference in years between launch dates." +}, { + "id": "411", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average number of satellites launched per year by each country, ordered by the country with the highest average?", + "sql_context": "CREATE TABLE SatelliteMissions (MissionID INT, Name VARCHAR(50), LaunchCountry VARCHAR(50), LaunchDate DATE); INSERT INTO SatelliteMissions VALUES (1, \u0027GSAT-12\u0027, \u0027India\u0027, \u00272011-07-15\u0027); INSERT INTO SatelliteMissions VALUES (2, \u0027GSAT-11\u0027, \u0027India\u0027, \u00272018-12-04\u0027);", + "sql": "SELECT LaunchCountry, AVG(DATEDIFF(YEAR, LaunchDate, LEAD(LaunchDate) OVER (PARTITION BY LaunchCountry ORDER BY LaunchDate))) as avg_launches_per_year FROM SatelliteMissions GROUP BY LaunchCountry ORDER BY avg_launches_per_year DESC", + "sql_explanation": "Calculate the average number of satellites launched per year for each country by using DATEDIFF to find the difference in years between launch dates, and LEAD to get the next launch date for each country." +}, { + "id": "1253", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the launch dates of the first satellite for each country?", + "sql_context": "CREATE TABLE Satellites (id INT, name VARCHAR(255), type VARCHAR(255), launch_date DATE, country VARCHAR(255)); INSERT INTO Satellites (id, name, type, launch_date, country) VALUES (1, \u0027Sputnik 1\u0027, \u0027Satellite\u0027, \u00271957-10-04\u0027, \u0027Russia\u0027), (2, \u0027Explorer 1\u0027, \u0027Satellite\u0027, \u00271958-01-31\u0027, \u0027United States\u0027);", + "sql": "SELECT name, launch_date, ROW_NUMBER() OVER (PARTITION BY country ORDER BY launch_date ASC) as launch_rank FROM Satellites WHERE type \u003d \u0027Satellite\u0027 AND launch_rank \u003d 1;", + "sql_explanation": "This query uses the ROW_NUMBER() function to assign a ranking to satellites based on their launch date within a country, in ascending order. The partitioning is done by country, so the ranking is reset for each country. The query filters for launch rank 1, which represents the first satellite launched for each country." +}, { + "id": "1320", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many satellites were launched in each year, with a running total?", + "sql_context": "CREATE TABLE satellite_launches (year INT, satellite_name VARCHAR(50), country VARCHAR(50)); INSERT INTO satellite_launches (year, satellite_name, country) VALUES (2015, \u0027Kalamsat\u0027, \u0027India\u0027), (2017, \u0027PSLV-C37\u0027, \u0027India\u0027), (2018, \u0027PSLV-C42\u0027, \u0027India\u0027), (2018, \u0027PSLV-C43\u0027, \u0027India\u0027), (2019, \u0027PSLV-C45\u0027, \u0027India\u0027), (2020, \u0027PSLV-C46\u0027, \u0027India\u0027), (2021, \u0027PSLV-C51\u0027, \u0027India\u0027);", + "sql": "SELECT year, COUNT(satellite_name) OVER (PARTITION BY year ORDER BY year ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_total FROM satellite_launches;", + "sql_explanation": "Calculate the number of satellites launched in each year and create a running total of satellite launches. Order data by year and calculate the running total for each year." +}, { + "id": "1954", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of asteroids discovered by each observatory in descending order?", + "sql_context": "CREATE TABLE asteroids (id INT, asteroid_name VARCHAR(50), discovery_date DATE, observatory VARCHAR(50));", + "sql": "SELECT observatory, COUNT(*) AS num_asteroids, RANK() OVER (ORDER BY COUNT(*) DESC) AS observatory_rank FROM asteroids GROUP BY observatory;", + "sql_explanation": "Find the number of asteroids discovered by each observatory in descending order by using the RANK function to assign a rank based on the count of asteroids per observatory, ordered in descending order." +}, { + "id": "4109", + "domain": "space", + "domain_description": "Space data on space exploration, satellite technology, space debris mitigation, and astrobiology.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many satellites were launched by each country in the satellites table?", + "sql_context": "CREATE TABLE satellites (id INT, name VARCHAR(50), launch_country VARCHAR(50), launch_date DATE); INSERT INTO satellites VALUES (1, \u0027Sputnik 1\u0027, \u0027USSR\u0027, \u00271957-10-04\u0027); INSERT INTO satellites VALUES (2, \u0027Explorer 1\u0027, \u0027USA\u0027, \u00271958-01-31\u0027);", + "sql": "SELECT launch_country, COUNT(*) OVER (PARTITION BY launch_country) FROM satellites;", + "sql_explanation": "Counts the number of satellites launched by each country using the window function COUNT() over partitioning." +}, { + "id": "721", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the change in carbon price for each country between two consecutive dates.", + "sql_context": "CREATE TABLE carbon_prices_history (country VARCHAR(255), date DATE, price FLOAT); INSERT INTO carbon_prices_history VALUES (\u0027USA\u0027, \u00272023-01-01\u0027, 10), (\u0027Canada\u0027, \u00272023-01-01\u0027, 15), (\u0027USA\u0027, \u00272023-02-01\u0027, 11), (\u0027Canada\u0027, \u00272023-02-01\u0027, 16), (\u0027USA\u0027, \u00272023-03-01\u0027, 12), (\u0027Canada\u0027, \u00272023-03-01\u0027, 17);", + "sql": "SELECT country, (price_2 - price_1) AS carbon_price_change FROM (SELECT country, date, LAG(price) OVER (PARTITION BY country ORDER BY date) AS price_1, price AS price_2 FROM carbon_prices_history) t;", + "sql_explanation": "The query uses the LAG function to access the previous date\u0027s carbon price and calculates the difference between the current and previous date\u0027s carbon price for each country." +}, { + "id": "1165", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the percentage of total energy production for each energy source (wind, solar, hydro) in a given year.", + "sql_context": "CREATE TABLE energy_production (energy_source VARCHAR(255), year INT, monthly_production FLOAT); INSERT INTO energy_production VALUES (\u0027Wind\u0027, 2022, 2000), (\u0027Solar\u0027, 2022, 3000), (\u0027Hydro\u0027, 2022, 4000), (\u0027Wind\u0027, 2022, 2500), (\u0027Solar\u0027, 2022, 3500), (\u0027Hydro\u0027, 2022, 4500);", + "sql": "SELECT energy_source, SUM(monthly_production) / SUM(SUM(monthly_production)) OVER () AS percentage_of_total FROM energy_production WHERE year \u003d 2022 GROUP BY energy_source;", + "sql_explanation": "The query calculates the percentage of total energy production for each energy source by partitioning the data by the \u0027energy_source\u0027 column and using the SUM function. The outer query then calculates the percentage of total production for each energy source." +}, { + "id": "1814", + "domain": "energy", + "domain_description": "Energy market data covering renewable energy sources, energy storage, carbon pricing, and energy efficiency.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the change in CO2 price from the previous day for each day?", + "sql_context": "CREATE TABLE CO2Price (Date DATE, Price INT); INSERT INTO CO2Price (Date, Price) VALUES (\u00272022-01-01\u0027, 20), (\u00272022-01-02\u0027, 22), (\u00272022-01-03\u0027, 24);", + "sql": "SELECT Date, LAG(Price) OVER (ORDER BY Date) AS PreviousDayPrice, Price, Price - LAG(Price) OVER (ORDER BY Date) AS CO2PriceChange FROM CO2Price;", + "sql_explanation": "The SQL query calculates the change in CO2 price from the previous day for each day by using the LAG window function to access the previous day\u0027s price and then subtracting it from the current day\u0027s price." +}, { + "id": "16", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 3 most played games in each country, and their respective genres, ordered by the total number of hours played.", + "sql_context": "CREATE TABLE GameData (GameID INT, GameName VARCHAR(50), Genre VARCHAR(50), AvgHoursPlayed DECIMAL(5,2)); INSERT INTO GameData (GameID, GameName, Genre, AvgHoursPlayed) VALUES (1, \u0027Game1\u0027, \u0027FPS\u0027, 12.5), (2, \u0027Game2\u0027, \u0027RPG\u0027, 10.3), (3, \u0027Game3\u0027, \u0027Sports\u0027, 15.6), (4, \u0027Game4\u0027, \u0027Simulation\u0027, 17.2);", + "sql": "SELECT E.EventCountry, GD.GameName, GD.Genre, SUM(GD.AvgHoursPlayed) AS TotalHoursPlayed, ROW_NUMBER() OVER (PARTITION BY E.EventCountry ORDER BY SUM(GD.AvgHoursPlayed) DESC) AS Rank FROM Players P JOIN EventParticipation EP ON P.PlayerID \u003d EP.PlayerID JOIN EventDates E ON EP.EventID \u003d E.EventID JOIN GameData GD ON P.PlayerID \u003d GD.GameID GROUP BY E.EventCountry, GD.GameName, GD.Genre HAVING COUNT(DISTINCT P.PlayerID) \u003e 1 ORDER BY E.EventCountry, TotalHoursPlayed DESC", + "sql_explanation": "Identify the top 3 most played games in each country, and their respective genres, ordered by the total number of hours played." +}, { + "id": "449", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of VR hardware products by price range, considering price ranges as: 0-500, 500-1000, and \u003e1000, and their respective counts, ranked by count?", + "sql_context": "CREATE TABLE VRHardware (HardwareID INT, HardwareName VARCHAR(50), Manufacturer VARCHAR(20), ReleaseDate DATE, Price NUMERIC(10,2)); INSERT INTO VRHardware (HardwareID, HardwareName, Manufacturer, ReleaseDate, Price) VALUES (1, \u0027Oculus Rift\u0027, \u0027Oculus\u0027, \u00272016-03-28\u0027, 599); INSERT INTO VRHardware (HardwareID, HardwareName, Manufacturer, ReleaseDate, Price) VALUES (2, \u0027HTC Vive\u0027, \u0027HTC\u0027, \u00272016-04-05\u0027, 799); INSERT INTO VRHardware (HardwareID, HardwareName, Manufacturer, ReleaseDate, Price) VALUES (3, \u0027Valve Index\u0027, \u0027Valve\u0027, \u00272019-06-28\u0027, 999); INSERT INTO VRHardware (HardwareID, HardwareName, Manufacturer, ReleaseDate, Price) VALUES (4, \u0027Pico Neo 2\u0027, \u0027Pico\u0027, \u00272019-04-15\u0027, 699);", + "sql": "SELECT CASE WHEN Price \u003c\u003d 500 THEN \u00270-500\u0027 WHEN Price \u003c\u003d 1000 THEN \u0027500-1000\u0027 ELSE \u0027\u003e1000\u0027 END as PriceRange, COUNT(*) as ProductsInRange, ROW_NUMBER() OVER (ORDER BY COUNT(*) DESC) as Rank FROM VRHardware GROUP BY PriceRange;", + "sql_explanation": "Determine the distribution of VR hardware products by price range, considering price ranges as: 0-500, 500-1000, and \u003e1000, and their respective counts, ranked by count." +}, { + "id": "3362", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which countries have the highest esports tournament viewership?", + "sql_context": "CREATE TABLE tournaments (id INT, country VARCHAR(255), viewers INT); INSERT INTO tournaments (id, country, viewers) VALUES (1, \u0027USA\u0027, 50000), (2, \u0027China\u0027, 70000), (3, \u0027Germany\u0027, 35000);", + "sql": "SELECT country, RANK() OVER (ORDER BY viewers DESC) AS rank FROM tournaments WHERE viewers \u003e 40000;", + "sql_explanation": "Calculate the rank of each country based on viewership using the RANK window function and order by viewers in descending order. Filter countries with more than 40,000 viewers." +}, { + "id": "3937", + "domain": "gaming", + "domain_description": "Player demographics, game design data, esports event information, and virtual reality technology adoption.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the ranking of VR platforms based on price, with the cheapest having the lowest rank and the most expensive having the highest rank?", + "sql_context": "CREATE TABLE VRPlatforms (PlatformID INT, PlatformName VARCHAR(50), ReleaseDate DATE, Price DECIMAL(5,2)); INSERT INTO VRPlatforms (PlatformID, PlatformName, ReleaseDate, Price) VALUES (1, \u0027Oculus Rift\u0027, \u00272016-03-28\u0027, 599.00); INSERT INTO VRPlatforms (PlatformID, PlatformName, ReleaseDate, Price) VALUES (2, \u0027HTC Vive\u0027, \u00272016-04-05\u0027, 799.00); INSERT INTO VRPlatforms (PlatformID, PlatformName, ReleaseDate, Price) VALUES (3, \u0027PlayStation VR\u0027, \u00272016-10-13\u0027, 399.00); INSERT INTO VRPlatforms (PlatformID, PlatformName, ReleaseDate, Price) VALUES (4, \u0027Valve Index\u0027, \u00272019-06-28\u0027, 999.00);", + "sql": "SELECT PlatformName, ROW_NUMBER() OVER (ORDER BY Price DESC) AS Rank FROM VRPlatforms;", + "sql_explanation": "This query returns the PlatformName and a Rank based on the Price of each VR platform, ordered from highest to lowest price." +}, { + "id": "1179", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the moving average of CO2 emissions for each mine.", + "sql_context": "CREATE TABLE environmental_impact (id INT, mine_name VARCHAR(50), co2_emissions INT, year INT); INSERT INTO environmental_impact (id, mine_name, co2_emissions, year) VALUES (1, \u0027Copper Mountain\u0027, 12000, 2017), (2, \u0027Barrick Gold\u0027, 15000, 2018), (3, \u0027Newmont Goldcorp\u0027, 18000, 2019), (4, \u0027Copper Mountain\u0027, 12500, 2018), (5, \u0027Barrick Gold\u0027, 14000, 2019);", + "sql": "SELECT mine_name, year, AVG(co2_emissions) OVER (PARTITION BY mine_name ORDER BY year ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS moving_average FROM environmental_impact;", + "sql_explanation": "This query calculates the moving average of CO2 emissions for each mine using the window function AVG() with partitioning by mine_name, ordering by year, and a range of 2 rows preceding and the current row." +}, { + "id": "2234", + "domain": "mining", + "domain_description": "Mineral extraction statistics, environmental impact assessments, labor productivity metrics, and geological survey information.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the change in CO2 emissions for each mine over time?", + "sql_context": "CREATE TABLE environmental_impact (id INT, mine_id INT, impact_type VARCHAR(50), value INT, PRIMARY KEY (id), FOREIGN KEY (mine_id) REFERENCES mines(id)); INSERT INTO environmental_impact (id, mine_id, impact_type, value) VALUES (1, 1, \u0027CO2 Emissions\u0027, 1200); INSERT INTO environmental_impact (id, mine_id, impact_type, value) VALUES (2, 1, \u0027CO2 Emissions\u0027, 1250); INSERT INTO environmental_impact (id, mine_id, impact_type, value) VALUES (3, 2, \u0027CO2 Emissions\u0027, 2000); INSERT INTO environmental_impact (id, mine_id, impact_type, value) VALUES (4, 2, \u0027CO2 Emissions\u0027, 2100); CREATE TABLE mines (id INT, name VARCHAR(50), location VARCHAR(50), annual_production INT, PRIMARY KEY (id)); INSERT INTO mines (id, name, location, annual_production) VALUES (1, \u0027Golden Mine\u0027, \u0027California\u0027, 15000); INSERT INTO mines (id, name, location, annual_production) VALUES (2, \u0027Silver Mine\u0027, \u0027Nevada\u0027, 22000);", + "sql": "SELECT mine_id, impact_type, value, LAG(value) OVER (PARTITION BY mine_id ORDER BY id) as previous_value FROM environmental_impact;", + "sql_explanation": "This SQL query selects the mine_id, impact_type, and value columns from the environmental_impact table. It also calculates the previous value for each impact type within each mine using the LAG window function. This allows for comparison of environmental impact values between different time periods or categories." +}, { + "id": "276", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of products that are ethically sourced, by category, for each store in the past quarter?", + "sql_context": "CREATE TABLE products (id INT, category VARCHAR(50), store VARCHAR(50), ethically_sourced BOOLEAN); CREATE TABLE stores (id INT, name VARCHAR(50), location VARCHAR(50));", + "sql": "SELECT store, category, 100.0 * COUNT(*) / SUM(COUNT(*)) OVER (PARTITION BY store) as percentage FROM products JOIN stores ON products.store \u003d stores.name WHERE ethically_sourced \u003d TRUE AND date \u003e\u003d DATEADD(quarter, -1, GETDATE()) GROUP BY store, category;", + "sql_explanation": "The SQL query calculates the percentage of products that are ethically sourced, by category, for each store in the past quarter. It first joins the products and stores tables on the store column, then filters the records to only include those that are ethically sourced and from the past quarter. It then groups the data by store and category using the GROUP BY clause. It then calculates the percentage of products in each group using the COUNT and SUM functions, and orders the results by store and percentage in descending order." +}, { + "id": "1899", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the rank of products based on their rating, for products manufactured in Spain?", + "sql_context": "CREATE TABLE product_ratings (product_id INT, product_name VARCHAR(255), manufacturer_country VARCHAR(50), rating DECIMAL(2,1)); INSERT INTO product_ratings (product_id, product_name, manufacturer_country, rating) VALUES (1, \u0027Product A\u0027, \u0027Spain\u0027, 4.5), (2, \u0027Product B\u0027, \u0027Italy\u0027, 4.2), (3, \u0027Product C\u0027, \u0027Spain\u0027, 4.8);", + "sql": "SELECT product_id, product_name, rating, RANK() OVER (ORDER BY rating DESC) as rank FROM product_ratings WHERE manufacturer_country \u003d \u0027Spain\u0027;", + "sql_explanation": "Calculate the rank of products based on their rating for products manufactured in Spain by filtering the manufacturer_country column with the value \u0027Spain\u0027 and then applying the RANK function on the rating column with descending order." +}, { + "id": "1935", + "domain": "retail", + "domain_description": "Retail data on circular supply chains, ethical labor practices, product transparency, and consumer education.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the moving average of product prices for each brand over the last 30 days?", + "sql_context": "CREATE TABLE products (product_id INT, product_name VARCHAR(255), brand VARCHAR(255), price DECIMAL(10, 2), sale_date DATE); INSERT INTO products VALUES (1, \u0027ProductA\u0027, \u0027BrandX\u0027, 50, \u00272022-01-01\u0027), (2, \u0027ProductB\u0027, \u0027BrandX\u0027, 75, \u00272022-01-03\u0027), (3, \u0027ProductC\u0027, \u0027BrandY\u0027, 60, \u00272022-01-02\u0027);", + "sql": "SELECT brand, AVG(price) OVER (PARTITION BY brand ORDER BY sale_date ROWS BETWEEN 29 PRECEDING AND CURRENT ROW) as moving_avg FROM products;", + "sql_explanation": "The SQL query calculates the moving average of product prices for each brand over the last 30 days using a window function with the AVG function, partitioning by brand, and ordering by sale_date. The ROWS BETWEEN 29 PRECEDING AND CURRENT ROW clause is used to consider the last 30 days of data for each row." +}, { + "id": "204", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who is the top scorer for each team in a tournament?", + "sql_context": "CREATE TABLE Player (PlayerID int, PlayerName varchar(50), TeamID int); CREATE TABLE Goal (GoalID int, PlayerID int, Goals int, MatchDate date, TournamentID int); INSERT INTO Player (PlayerID, PlayerName, TeamID) VALUES (1, \u0027James Rodriguez\u0027, 1), (2, \u0027Radamel Falcao\u0027, 1), (3, \u0027Thomas Muller\u0027, 2), (4, \u0027Miroslav Klose\u0027, 2); INSERT INTO Goal (GoalID, PlayerID, Goals, MatchDate, TournamentID) VALUES (1, 1, 2, \u00272022-06-01\u0027, 1), (2, 1, 3, \u00272022-06-05\u0027, 1), (3, 2, 1, \u00272022-06-01\u0027, 1), (4, 2, 2, \u00272022-06-05\u0027, 1), (5, 3, 4, \u00272022-06-01\u0027, 1), (6, 3, 5, \u00272022-06-05\u0027, 1), (7, 4, 2, \u00272022-06-01\u0027, 1), (8, 4, 3, \u00272022-06-05\u0027, 1);", + "sql": "SELECT p.TeamID, p.PlayerName, SUM(g.Goals) AS TotalGoals, ROW_NUMBER() OVER (PARTITION BY p.TeamID ORDER BY SUM(g.Goals) DESC) AS Ranking FROM Player p JOIN Goal g ON p.PlayerID \u003d g.PlayerID WHERE g.TournamentID \u003d 1 GROUP BY p.TeamID, p.PlayerName HAVING Ranking \u003c\u003d 1;", + "sql_explanation": "We join the Player and Goal tables, then calculate the total goals for each player-team combination, ranking them by goals scored in a tournament. We filter the top scorer per team using the HAVING clause." +}, { + "id": "330", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 3 goal scorers for each team?", + "sql_context": "CREATE TABLE Player (PlayerID int, PlayerName varchar(50), TeamID int); CREATE TABLE Goal (GoalID int, PlayerID int, Goals int, MatchDate date); INSERT INTO Player (PlayerID, PlayerName, TeamID) VALUES (1, \u0027Messi\u0027, 1), (2, \u0027Neymar\u0027, 1), (3, \u0027Mbappe\u0027, 1), (4, \u0027Ronaldo\u0027, 2); INSERT INTO Goal (GoalID, PlayerID, Goals, MatchDate) VALUES (1, 1, 5, \u00272022-04-01\u0027), (2, 1, 3, \u00272022-04-05\u0027), (3, 2, 2, \u00272022-04-01\u0027), (4, 3, 4, \u00272022-04-05\u0027), (5, 4, 6, \u00272022-04-01\u0027);", + "sql": "SELECT p.TeamID, p.PlayerName, SUM(g.Goals) AS TotalGoals, ROW_NUMBER() OVER (PARTITION BY p.TeamID ORDER BY SUM(g.Goals) DESC) AS Ranking FROM Player p JOIN Goal g ON p.PlayerID \u003d g.PlayerID GROUP BY p.TeamID, p.PlayerName HAVING Ranking \u003c\u003d 3;", + "sql_explanation": "We join the Player and Goal tables, then calculate the total goals for each player-team combination, ranking them by goals scored. We filter the top 3 per team using the HAVING clause." +}, { + "id": "1269", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 5 goal scorers in a team?", + "sql_context": "CREATE TABLE players (player_id INT, player_name TEXT, goals INT, team_id INT);", + "sql": "SELECT player_name, goals FROM (SELECT player_name, goals, team_id, ROW_NUMBER() OVER (PARTITION BY team_id ORDER BY goals DESC) as rn FROM players) sub WHERE rn \u003c\u003d 5;", + "sql_explanation": "We are using a window function to rank the players by their goals in each team, then filtering the top 5 goal scorers in each team." +}, { + "id": "1841", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the highest scoring player and their total goals for each team in the 2022 UEFA Champions League?", + "sql_context": "CREATE TABLE ucl (player VARCHAR(255), team VARCHAR(255), goals INT); INSERT INTO ucl (player, team, goals) VALUES (\u0027Messi\u0027, \u0027Barcelona\u0027, 10), (\u0027Messi\u0027, \u0027Barcelona\u0027, 12), (\u0027Ronaldo\u0027, \u0027Manchester United\u0027, 15), (\u0027Ronaldo\u0027, \u0027Manchester United\u0027, 13);", + "sql": "SELECT team, MAX(goals) AS highest_goals, FIRST_VALUE(player) OVER (PARTITION BY team ORDER BY goals DESC) AS top_scorer FROM ucl GROUP BY team;", + "sql_explanation": "Join the ucl and teams tables, group by team, and calculate the highest scoring player and their total goals for each team in the 2022 UEFA Champions League." +}, { + "id": "2569", + "domain": "sports", + "domain_description": "Extensive data on athlete performance, team management, fan engagement, facility operations, and event planning in sports.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who is the leading goal scorer for each team in the 2022-2023 EPL season?", + "sql_context": "CREATE TABLE epl_teams (team_id INT, team_name TEXT, league TEXT, goals_scored INT, goals_conceded INT); INSERT INTO epl_teams (team_id, team_name, league, goals_scored, goals_conceded) VALUES (1, \u0027Manchester City\u0027, \u0027EPL\u0027, 99, 33), (2, \u0027Liverpool\u0027, \u0027EPL\u0027, 94, 34);", + "sql": "SELECT team_name, LEAD(goals_scored) OVER (PARTITION BY team_id ORDER BY team_id) AS leading_goal_scorer FROM epl_teams;", + "sql_explanation": "Find the leading goal scorer for each team by partitioning the records by team ID and ordering them by team ID. Use the LEAD function to select the goals scored for the next record, which represents the leading goal scorer for the team." +}, { + "id": "701", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the most recent diplomacy event for Canada?", + "sql_context": "CREATE TABLE if not exists diplomacy (id INT, event_name VARCHAR(100), country VARCHAR(50), partner_country VARCHAR(50), date DATE); INSERT INTO diplomacy (id, event_name, country, partner_country, date) VALUES (1, \u0027Military Aid\u0027, \u0027USA\u0027, \u0027Afghanistan\u0027, \u00272001-09-11\u0027); INSERT INTO diplomacy (id, event_name, country, partner_country, date) VALUES (2, \u0027Defense Agreement\u0027, \u0027France\u0027, \u0027Germany\u0027, \u00272014-07-01\u0027);", + "sql": "SELECT event_name, country, partner_country, date, RANK() OVER(PARTITION BY country ORDER BY date DESC) as recent_rank FROM diplomacy WHERE country \u003d \u0027Canada\u0027 ORDER BY date DESC FETCH FIRST 1 ROW ONLY;", + "sql_explanation": "The SQL query selects the event_name, country, partner_country, date and assigns a recent_rank using the RANK() function based on the order of date in descending order for each country. It filters the results to only show events for Canada and orders it by date in descending order, then fetches the first row only." +}, { + "id": "1789", + "domain": "defense", + "domain_description": "Defense data on military innovation, peacekeeping operations, defense diplomacy, and humanitarian assistance.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the percentage of humanitarian assistance provided by each country in 2019?", + "sql_context": "CREATE TABLE HumanitarianAssistance (Country VARCHAR(50), Assistance FLOAT); INSERT INTO HumanitarianAssistance (Country, Assistance) VALUES (\u0027Brazil\u0027, 54.3), (\u0027Colombia\u0027, 45.6), (\u0027Costa Rica\u0027, 34.5), (\u0027Cuba\u0027, 67.8);", + "sql": "SELECT Country, Assistance, Assistance/SUM(Assistance) OVER (PARTITION BY NULL) * 100 as Percentage FROM HumanitarianAssistance WHERE Year \u003d 2019;", + "sql_explanation": "Calculate the percentage of humanitarian assistance provided by each country in 2019." +}, { + "id": "102", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of garments sold for each manufacturer, partitioned by the manufacturer and ordered by the total number of garments sold in descending order?", + "sql_context": "CREATE TABLE Manufacturers (ManufacturerID INT, ManufacturerName VARCHAR(255));CREATE TABLE Garments (GarmentID INT, ManufacturerID INT);CREATE TABLE Sales (SaleID INT, GarmentID INT, SaleDate DATE, Quantity INT);", + "sql": "SELECT m.ManufacturerName, COUNT(g.GarmentID) AS TotalGarments, ROW_NUMBER() OVER (PARTITION BY NULL ORDER BY COUNT(g.GarmentID) DESC) AS Rank FROM Manufacturers m JOIN Garments g ON m.ManufacturerID \u003d g.ManufacturerID JOIN Sales s ON g.GarmentID \u003d s.GarmentID GROUP BY m.ManufacturerName ORDER BY Rank;", + "sql_explanation": "This query calculates the total number of garments sold for each manufacturer by joining the Manufacturers, Garments, and Sales tables on ManufacturerID, GarmentID, and GarmentID, respectively. It then groups the results by ManufacturerName and calculates the total number of garments sold using the COUNT function. The ROW_NUMBER function is used to rank the results within each partition in descending order." +}, { + "id": "433", + "domain": "fashion", + "domain_description": "Trend forecasting data, garment manufacturing information, retail sales statistics, and sustainability metrics in the industry.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the daily sales trend for the past year, including the current day, by calculating the moving average of units sold per day.", + "sql_context": "CREATE TABLE daily_sales (sale_date DATE, units_sold INT); INSERT INTO daily_sales (sale_date, units_sold) VALUES (\u00272021-04-01\u0027, 500), (\u00272021-04-02\u0027, 600), (\u00272021-04-03\u0027, 700), (\u00272021-04-04\u0027, 800), (\u00272021-04-05\u0027, 900), (\u00272022-04-01\u0027, 1000);", + "sql": "SELECT sale_date, AVG(units_sold) OVER (ORDER BY sale_date ROWS BETWEEN 29 PRECEDING AND CURRENT ROW) AS moving_average FROM daily_sales WHERE sale_date \u003e\u003d DATE_TRUNC(\u0027day\u0027, CURRENT_DATE - INTERVAL \u0027365 day\u0027) ORDER BY sale_date;", + "sql_explanation": "The SQL query identifies the daily sales trend for the past year, including the current day, by calculating the moving average of units sold per day. It uses the AVG function with the window clause to calculate the moving average of units sold. The window clause specifies the frame for the average calculation, which is the current row and the 29 preceding rows. The query then filters the data by sale_date, retaining only the rows for the past 365 days, and orders the result by sale_date." +}, { + "id": "354", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the third highest salary for employees in the HR department, using a window function.", + "sql_context": "CREATE TABLE employees (employee_id INT, department VARCHAR(20), salary DECIMAL(10, 2));", + "sql": "SELECT employee_id, department, salary FROM (SELECT employee_id, department, salary, RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS salary_rank FROM employees) AS employee_salaries WHERE salary_rank \u003d 3 AND department \u003d \u0027HR\u0027;", + "sql_explanation": "First, calculate the salary rank for each employee within their department using the RANK function and partitioning the data by department and ordering by salary in descending order. Next, filter the results to only include the third highest salary in the HR department." +}, { + "id": "1092", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the number of days between each customer\u0027s first and last transaction, partitioned by account type.", + "sql_context": "CREATE TABLE accounts (customer_id INT, account_type VARCHAR(20), transaction_date DATE);", + "sql": "SELECT customer_id, account_type, DATEDIFF(MAX(transaction_date), MIN(transaction_date)) OVER (PARTITION BY customer_id, account_type) AS days_between_first_last FROM accounts;", + "sql_explanation": "For each customer and account type, calculate the number of days between the first and last transaction using the MIN and MAX aggregate functions and the DATEDIFF function, and partitioning the data by customer_id and account_type." +}, { + "id": "1576", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Rank customers by total deposits in Shariah-compliant accounts, with ties given the same rank.", + "sql_context": "CREATE TABLE deposits (customer_id INT, account_type VARCHAR(20), balance DECIMAL(10,2), deposit_date DATE);", + "sql": "SELECT customer_id, RANK() OVER (ORDER BY SUM(balance) DESC) as deposit_rank FROM deposits WHERE account_type \u003d \u0027Shariah-compliant\u0027 GROUP BY customer_id;", + "sql_explanation": "Calculate the rank for each customer based on their total deposits in Shariah-compliant accounts. Use RANK() function to handle ties." +}, { + "id": "1622", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the number of customers who have an account balance greater than the 75th percentile for their account type.", + "sql_context": "CREATE TABLE accounts (customer_id INT, account_type VARCHAR(20), balance DECIMAL(10, 2));", + "sql": "SELECT COUNT(DISTINCT customer_id) FROM accounts WHERE balance \u003e PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY balance) OVER (PARTITION BY account_type);", + "sql_explanation": "Calculate the 75th percentile balance for each account type using the PERCENTILE_CONT function and partitioning the data by account_type. Then, count the number of distinct customers who have a balance greater than the 75th percentile balance." +}, { + "id": "1675", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which Islamic fintech companies have the highest number of active users in Indonesia?", + "sql_context": "CREATE TABLE islamic_fintech (id INT, company_name VARCHAR(50), country VARCHAR(50), num_active_users INT); INSERT INTO islamic_fintech (id, company_name, country, num_active_users) VALUES (1, \u0027Alami\u0027, \u0027Indonesia\u0027, 10000), (2, \u0027Ayoconnect\u0027, \u0027Indonesia\u0027, 12000);", + "sql": "SELECT country, company_name, num_active_users, RANK() OVER (ORDER BY num_active_users DESC) as rank FROM islamic_fintech WHERE country \u003d \u0027Indonesia\u0027;", + "sql_explanation": "This query ranks Islamic fintech companies in Indonesia by their number of active users using the RANK() function." +}, { + "id": "1896", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 3 countries with the highest financial capability score.", + "sql_context": "CREATE TABLE financial_capability (id INT, country VARCHAR(50), score INT); INSERT INTO financial_capability (id, country, score) VALUES (1, \u0027Brazil\u0027, 65), (2, \u0027India\u0027, 70), (3, \u0027China\u0027, 80), (4, \u0027South Africa\u0027, 75), (5, \u0027Indonesia\u0027, 60);", + "sql": "SELECT country, score FROM (SELECT country, score, ROW_NUMBER() OVER (ORDER BY score DESC) as rn FROM financial_capability) tmp WHERE rn \u003c\u003d 3;", + "sql_explanation": "This query identifies the top 3 countries with the highest financial capability score by using the ROW_NUMBER function with ORDER BY to rank scores in descending order, and filtering for the top 3 rows." +}, { + "id": "2703", + "domain": "finance", + "domain_description": "Financial data on Shariah-compliant finance, socially responsible lending, financial capability, and financial wellbeing.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average account balance for socially responsible lending customers, partitioned by gender?", + "sql_context": "CREATE TABLE socially_responsible_lending(customer_id INT, name VARCHAR(50), gender VARCHAR(10), account_balance DECIMAL(10, 2)); INSERT INTO socially_responsible_lending VALUES (1, \u0027John Doe\u0027, \u0027Male\u0027, 5000), (2, \u0027Jane Smith\u0027, \u0027Female\u0027, 7000), (3, \u0027Ali Hassan\u0027, \u0027Male\u0027, 6000), (4, \u0027Fatima Khan\u0027, \u0027Female\u0027, 8000);", + "sql": "SELECT gender, AVG(account_balance) avg_balance FROM socially_responsible_lending WINDOW W AS (PARTITION BY gender);", + "sql_explanation": "Calculate the average account balance for each gender by partitioning the data using the WINDOW clause and the PARTITION BY statement. Then, use the AVG function to find the average account balance for each partition." +}, { + "id": "15", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which 2 cities with the highest digital interaction growth rates are hosting traveling exhibitions in the current year?", + "sql_context": "CREATE TABLE City (Id INT, Name VARCHAR(100)); CREATE TABLE TravelingExhibition (Id INT, CityId INT, Year INT, DigitalInteractions INT);", + "sql": "SELECT te.CityId, c.Name, te.DigitalInteractions, ((te.DigitalInteractions - LAG(te.DigitalInteractions, 1, 0) OVER (PARTITION BY te.CityId ORDER BY te.Year))*100.0 / LAG(te.DigitalInteractions, 1, 0) OVER (PARTITION BY te.CityId ORDER BY te.Year)) as PercentageChange FROM TravelingExhibition te JOIN City c ON te.CityId \u003d c.Id WHERE te.Year \u003d YEAR(CURRENT_DATE) GROUP BY te.CityId, c.Name, te.DigitalInteractions, PercentageChange ORDER BY PercentageChange DESC LIMIT 2;", + "sql_explanation": "The SQL query calculates the percentage change of digital interactions for traveling exhibitions in each city for the current year, using the LAG window function, and returns the top 2 cities with the highest growth rates." +}, { + "id": "494", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the community engagement events with the highest attendance in each country and their respective dates, sorted by country.", + "sql_context": "CREATE TABLE Country (Id INT, Name VARCHAR(100)); CREATE TABLE CommunityEvent (Id INT, CountryId INT, EventType VARCHAR(50), Attendance INT, EventDate DATE);", + "sql": "SELECT CountryId, Name, EventType, EventDate, Attendance, DENSE_RANK() OVER (PARTITION BY CountryId ORDER BY Attendance DESC) as Ranking FROM Country c JOIN CommunityEvent ce ON c.Id \u003d ce.CountryId ORDER BY Ranking, Name;", + "sql_explanation": "The SQL query retrieves the community engagement events with the highest attendance in each country and their respective dates, using the DENSE_RANK window function, and orders the results by country." +}, { + "id": "3295", + "domain": "museums", + "domain_description": "Visitor demographics, exhibition analytics, community engagement metrics, and digital museum trends.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of visitor ratings for exhibitions in Paris?", + "sql_context": "CREATE TABLE Exhibitions (ExhibitionID INT, Name VARCHAR(255), City VARCHAR(255), Rating FLOAT);", + "sql": "SELECT City, AVG(Rating) OVER(PARTITION BY City) as AvgRating FROM Exhibitions WHERE City \u003d \u0027Paris\u0027;", + "sql_explanation": "This query calculates the average rating of exhibitions in Paris by partitioning the data based on the city and then calculating the average rating for each city." +}, { + "id": "13", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the percentage change in visitors for each destination between the first and last days of the month.", + "sql_context": "CREATE TABLE MonthlyVisitors (Destination VARCHAR(50), VisitorCount INT, VisitDate DATE); INSERT INTO MonthlyVisitors VALUES (\u0027Berlin\u0027, 800, \u00272022-01-01\u0027), (\u0027Berlin\u0027, 900, \u00272022-01-31\u0027), (\u0027London\u0027, 1000, \u00272022-01-01\u0027), (\u0027London\u0027, 1100, \u00272022-01-31\u0027);", + "sql": "SELECT Destination, (VisitorCountEnd - VisitorCountStart) * 100.0 / VisitorCountStart as PercentageChange FROM (SELECT Destination, FIRST_VALUE(VisitorCount) OVER (PARTITION BY Destination ORDER BY VisitDate ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as VisitorCountStart, LAST_VALUE(VisitorCount) OVER (PARTITION BY Destination ORDER BY VisitDate ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as VisitorCountEnd FROM MonthlyVisitors) AS Subquery;", + "sql_explanation": "The SQL query calculates the percentage change in visitors between the first and last days of the month for each destination, using the FIRST_VALUE and LAST_VALUE window functions. The query orders the result by the Destination column." +}, { + "id": "263", + "domain": "tourism", + "domain_description": "International visitor statistics, travel advisory information, destination marketing data, and sustainable tourism practices.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the 3rd most visited sustainable destination for each year in the \u0027sustainable_visits\u0027 table.", + "sql_context": "CREATE TABLE sustainable_visits (visit_id INT, destination TEXT, visit_date DATE); INSERT INTO sustainable_visits (visit_id, destination, visit_date) VALUES (1, \u0027Bali\u0027, \u00272022-01-01\u0027), (2, \u0027Bali\u0027, \u00272022-02-01\u0027), (3, \u0027Paris\u0027, \u00272022-03-01\u0027); CREATE TABLE sustainable_destinations (destination TEXT, sustainability_rank INT); INSERT INTO sustainable_destinations (destination, sustainability_rank) VALUES (\u0027Bali\u0027, 1), (\u0027Paris\u0027, 2), (\u0027Rio de Janeiro\u0027, 3);", + "sql": "SELECT destination, EXTRACT(YEAR FROM visit_date) AS visit_year, RANK() OVER (PARTITION BY EXTRACT(YEAR FROM visit_date) ORDER BY COUNT(*) DESC) AS visit_rank FROM sustainable_visits GROUP BY destination, EXTRACT(YEAR FROM visit_date) HAVING visit_rank \u003d 3;", + "sql_explanation": "The SQL query first extracts the year from the visit_date column using the EXTRACT function. Then, it ranks the data using the RANK function, partitioning the data by year and ordering it by the number of visits in descending order. Finally, it filters the data to only include the 3rd most visited sustainable destination using the HAVING clause." +}, { + "id": "325", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average salary of employees in the Engineering department, and the number of employees with a salary higher than the average salary?", + "sql_context": "CREATE TABLE Employees (EmployeeID INT, FirstName VARCHAR(50), LastName VARCHAR(50), Department VARCHAR(50), Salary DECIMAL(10,2)); INSERT INTO Employees (EmployeeID, FirstName, LastName, Department, Salary) VALUES (1, \u0027John\u0027, \u0027Doe\u0027, \u0027Engineering\u0027, 75000.00), (2, \u0027Jane\u0027, \u0027Doe\u0027, \u0027Engineering\u0027, 80000.00), (3, \u0027Mike\u0027, \u0027Smith\u0027, \u0027Marketing\u0027, 60000.00), (4, \u0027Samantha\u0027, \u0027Johnson\u0027, \u0027Engineering\u0027, 85000.00), (5, \u0027David\u0027, \u0027Brown\u0027, \u0027Marketing\u0027, 65000.00);", + "sql": "SELECT AVG(Salary) OVER (PARTITION BY Department) AS Avg_Salary, COUNT(CASE WHEN Salary \u003e AVG(Salary) OVER (PARTITION BY Department) THEN 1 END) OVER (PARTITION BY Department) AS High_Salary_Count FROM Employees WHERE Department \u003d \u0027Engineering\u0027;", + "sql_explanation": "This query calculates the average salary of employees in the Engineering department and the number of employees with a salary higher than the average salary by using the AVG function with the OVER clause and the PARTITION BY clause on the Department column, and the COUNT function with the CASE statement and the OVER clause and the PARTITION BY clause on the Department column. The result shows the average salary and the number of employees with a salary higher than the average salary for the Engineering department." +}, { + "id": "922", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many aircraft were manufactured by each company in the past 5 years?", + "sql_context": "CREATE TABLE aircraft (aircraft_name VARCHAR(255), manufacturer VARCHAR(255), production_date DATE); INSERT INTO aircraft (aircraft_name, manufacturer, production_date) VALUES (\u0027Air1\u0027, \u0027Man1\u0027, \u00272018-05-12\u0027), (\u0027Air2\u0027, \u0027Man2\u0027, \u00272020-12-18\u0027), (\u0027Air3\u0027, \u0027Man1\u0027, \u00272019-09-21\u0027), (\u0027Air4\u0027, \u0027Man3\u0027, \u00272017-01-03\u0027), (\u0027Air5\u0027, \u0027Man2\u0027, \u00272021-06-25\u0027);", + "sql": "SELECT manufacturer, COUNT(*) OVER (PARTITION BY manufacturer) as count FROM aircraft WHERE production_date \u003e\u003d DATEADD(year, -5, CURRENT_DATE) GROUP BY manufacturer ORDER BY count DESC;", + "sql_explanation": "This query calculates the number of aircraft each manufacturer has produced in the past 5 years. It partitions the data by manufacturer and orders the results by the count in descending order." +}, { + "id": "2593", + "domain": "aerospace", + "domain_description": "Aircraft manufacturing data, satellite deployment projects, flight safety records, and space exploration research.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the aircraft models with the highest production quantity in each year", + "sql_context": "CREATE TABLE Aircraft (AircraftID INT, Model VARCHAR(50), Manufacturer VARCHAR(50), Year INT, ProductionQuantity INT); INSERT INTO Aircraft (AircraftID, Model, Manufacturer, Year, ProductionQuantity) VALUES (1, \u0027B737\u0027, \u0027Boeing\u0027, 1967, 10497), (2, \u0027A320\u0027, \u0027Airbus\u0027, 1988, 9324);", + "sql": "SELECT Model, Year, ProductionQuantity, MAX(ProductionQuantity) OVER(PARTITION BY Year) as MaxProduction FROM Aircraft;", + "sql_explanation": "The SQL query identifies the aircraft models with the highest production quantity in each year. It uses the MAX window function, partitioning by the Year column. The result is a new column named MaxProduction, which assigns the highest production quantity for each year." +}, { + "id": "1038", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the three sites with the highest chemical waste production and their corresponding ranks, in South America.", + "sql_context": "CREATE TABLE chemical_waste (site_name VARCHAR(50), waste_amount FLOAT, region VARCHAR(50)); INSERT INTO chemical_waste (site_name, waste_amount, region) VALUES (\u0027Site A\u0027, 150.5, \u0027South America\u0027), (\u0027Site B\u0027, 125.7, \u0027South America\u0027), (\u0027Site C\u0027, 200.3, \u0027South America\u0027), (\u0027Site D\u0027, 75.9, \u0027South America\u0027), (\u0027Site E\u0027, 175.4, \u0027South America\u0027);", + "sql": "SELECT site_name, waste_amount, RANK() OVER (PARTITION BY region ORDER BY waste_amount DESC) as waste_rank FROM chemical_waste WHERE region \u003d \u0027South America\u0027 AND waste_rank \u003c\u003d 3;", + "sql_explanation": "The SQL query finds the three sites with the highest chemical waste production in South America and their corresponding ranks. It uses the RANK function to assign a rank to each site based on the waste amount in descending order, partitioned by region. The WHERE clause filters the results to show only the data from South America." +}, { + "id": "1112", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the chemical with the highest safety violation score in the past month, per manufacturing site?", + "sql_context": "CREATE TABLE SafetyViolations (Site VARCHAR(50), Chemical VARCHAR(50), ViolationScore INT, ViolationDate DATETIME);", + "sql": "SELECT Site, MAX(ViolationScore) OVER (PARTITION BY Site ORDER BY ViolationDate ROWS BETWEEN 29 PRECEDING AND CURRENT ROW) AS MaxViolationScore, Chemical FROM SafetyViolations", + "sql_explanation": "Find the maximum safety violation score for each site in the past 30 days (29 days preceding and the current day), partitioned by site and ordered by violation date, and return the chemical associated with the maximum score." +}, { + "id": "1365", + "domain": "chemicals", + "domain_description": "Chemical manufacturing data, safety protocols, environmental impact assessments, and product innovation metrics.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the moving average of chemical waste production for each facility, for the last 6 months.", + "sql_context": "CREATE TABLE facility_waste (facility_id INT, date DATE, waste_amount FLOAT); INSERT INTO facility_waste (facility_id, date, waste_amount) VALUES (1, \u00272022-01-01\u0027, 500), (1, \u00272022-02-01\u0027, 550), (1, \u00272022-03-01\u0027, 600), (1, \u00272022-04-01\u0027, 650), (1, \u00272022-05-01\u0027, 700), (1, \u00272022-06-01\u0027, 750), (2, \u00272022-01-01\u0027, 400), (2, \u00272022-02-01\u0027, 450), (2, \u00272022-03-01\u0027, 500), (2, \u00272022-04-01\u0027, 550), (2, \u00272022-05-01\u0027, 600), (2, \u00272022-06-01\u0027, 650);", + "sql": "SELECT facility_id, AVG(waste_amount) OVER (PARTITION BY facility_id ORDER BY date ROWS BETWEEN 5 PRECEDING AND CURRENT ROW) as moving_average FROM facility_waste;", + "sql_explanation": "The SQL query calculates the moving average of chemical waste production for each facility, for the last 6 months. It uses the AVG function with a window clause to calculate the average waste amount for each facility, for the last 6 months. The ORDER BY clause sorts the data by date, and the window clause specifies the range as the last 6 months for each facility. The partition clause specifies the facility ID as the partitioning key." +}, { + "id": "93", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many students have decreased their mental health score by more than 5% in the last 3 months?", + "sql_context": "CREATE TABLE student_mental_health_history (student_id INT, score INT, date DATE); INSERT INTO student_mental_health_history VALUES (1, 60, \u00272022-04-01\u0027), (1, 55, \u00272022-07-01\u0027), (2, 80, \u00272022-04-01\u0027), (2, 75, \u00272022-07-01\u0027);", + "sql": "SELECT COUNT(*) FROM (SELECT student_id, (LAG(score) OVER(PARTITION BY student_id ORDER BY date) - score) / LAG(score) OVER(PARTITION BY student_id ORDER BY date) * 100.0 as decrease_percentage FROM student_mental_health_history WHERE date \u003e\u003d DATEADD(month, -3, GETDATE())) t WHERE decrease_percentage \u003e 5;", + "sql_explanation": "The SQL query calculates the number of students who have decreased their mental health score by more than 5% in the last 3 months by using a subquery with the LAG() window function to calculate the decrease percentage for each student. It then filters the results to only include students with a decrease percentage greater than 5 and counts the number of rows." +}, { + "id": "1245", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average mental health score for students, grouped by ethnicity, for the past 2 years?", + "sql_context": "CREATE TABLE student_mental_health (student_id INT, ethnicity VARCHAR(50), mental_health_score INT, date DATE); INSERT INTO student_mental_health VALUES (1, \u0027Hispanic\u0027, 75, \u00272020-01-01\u0027), (2, \u0027African American\u0027, 85, \u00272020-01-02\u0027);", + "sql": "SELECT ethnicity, AVG(mental_health_score) OVER (PARTITION BY ethnicity ORDER BY date ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) as avg_score FROM student_mental_health;", + "sql_explanation": "Calculate the average mental health score for each ethnicity, considering the previous 2 years\u0027 scores using the window function AVG with a partition on ethnicity and an ordering on date." +}, { + "id": "1272", + "domain": "education", + "domain_description": "Education data on student mental health, teacher professional development, open pedagogy, and lifelong learning.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the trend of student mental health scores in the last 3 months?", + "sql_context": "CREATE TABLE mental_health_trend (date DATE, school_id INT, mental_health_score INT); INSERT INTO mental_health_trend (date, school_id, mental_health_score) VALUES (\u00272022-03-01\u0027, 101, 80), (\u00272022-03-02\u0027, 101, 75), (\u00272022-03-03\u0027, 102, 85), (\u00272022-03-04\u0027, 102, 90), (\u00272022-04-01\u0027, 101, 82), (\u00272022-04-02\u0027, 101, 78), (\u00272022-04-03\u0027, 102, 88), (\u00272022-04-04\u0027, 102, 92);", + "sql": "SELECT date, mental_health_score, ROW_NUMBER() OVER (ORDER BY date) as rank FROM mental_health_trend WHERE date \u003e\u003d DATEADD(month, -3, CURRENT_TIMESTAMP) ORDER BY date;", + "sql_explanation": "Calculate the trend of student mental health scores in the last 3 months by ranking the scores in descending order of date, with 1 being the most recent score." +}, { + "id": "892", + "domain": "fine arts", + "domain_description": "In-depth data on art history, curation, exhibition, and cultural impact in the fine arts.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the artworks that were exhibited in the top 10% of the longest exhibitions.", + "sql_context": "CREATE TABLE AllGalleriesI (gallery_name VARCHAR(20), artwork_ID INT, exhibition_duration INT); INSERT INTO AllGalleriesI (gallery_name, artwork_ID, exhibition_duration) VALUES (\u0027GalleryD\u0027, 1, 45), (\u0027GalleryD\u0027, 2, 60), (\u0027GalleryD\u0027, 3, 30), (\u0027GalleryE\u0027, 4, 70), (\u0027GalleryE\u0027, 5, 50), (\u0027GalleryF\u0027, 6, 95), (\u0027GalleryF\u0027, 7, 85), (\u0027GalleryF\u0027, 8, 100), (\u0027GalleryF\u0027, 9, 120), (\u0027GalleryF\u0027, 10, 110);", + "sql": "SELECT artwork_ID, exhibition_duration FROM (SELECT artwork_ID, exhibition_duration, NTILE(10) OVER (ORDER BY exhibition_duration) as tile_num FROM AllGalleriesI) tmp WHERE tile_num \u003c\u003d 2;", + "sql_explanation": "Calculate the NTILE() for each exhibition_duration and assign them to a tile number. Filter and return the rows where the tile_num is less than or equal to 2, which represents the top 10% longest exhibitions." +}, { + "id": "523", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the second most recent document for each policy?", + "sql_context": "CREATE TABLE policy_documents (id INT, policy_id INT, document_name VARCHAR(50), document_date DATE); INSERT INTO policy_documents (id, policy_id, document_name, document_date) VALUES (1, 1, \u0027Policy Coverage\u0027, \u00272020-01-05\u0027); INSERT INTO policy_documents (id, policy_id, document_name, document_date) VALUES (2, 2, \u0027Policy Terms\u0027, \u00272021-03-12\u0027);", + "sql": "SELECT policy_id, document_name, NTH_VALUE(document_name, 2) OVER (PARTITION BY policy_id ORDER BY document_date DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as second_document FROM policy_documents;", + "sql_explanation": "The SQL query uses the NTH_VALUE() window function to retrieve the name of the second most recent document for each policy. It partitions the data by policy_id and orders it by document_date in descending order. The NTH_VALUE function then gets the second document name in each partition." +}, { + "id": "787", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the moving average of claims paid for the last 3 months.", + "sql_context": "CREATE TABLE Claim (ClaimID INT, ClaimDate DATE, ClaimAmount DECIMAL(10,2)); INSERT INTO Claim VALUES (1, \u00272021-01-01\u0027, 5000), (2, \u00272021-02-01\u0027, 3000), (3, \u00272021-03-01\u0027, 7000), (4, \u00272021-04-01\u0027, 8000), (5, \u00272021-05-01\u0027, 9000);", + "sql": "SELECT ClaimDate, AVG(ClaimAmount) OVER (ORDER BY ClaimDate ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) as MovingAvg FROM Claim WHERE ClaimDate \u003e\u003d DATEADD(MONTH, -3, GETDATE()) ORDER BY ClaimDate;", + "sql_explanation": "1. Select ClaimDate and calculate moving average of ClaimAmount using AVG and ROWS BETWEEN clause. 2. Filter for claims within the last 3 months. 3. Order by ClaimDate." +}, { + "id": "951", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of policyholder ages by policy type?", + "sql_context": "CREATE TABLE Policyholder (PolicyholderID INT, Age INT, PolicyType VARCHAR(20)); INSERT INTO Policyholder (PolicyHolderID, Age, PolicyType) VALUES (1, 35, \u0027Auto\u0027), (2, 45, \u0027Home\u0027), (3, 55, \u0027Life\u0027), (4, 65, \u0027Auto\u0027), (5, 75, \u0027Home\u0027), (6, 85, \u0027Life\u0027);", + "sql": "SELECT PolicyType, Age, COUNT(*) OVER (PARTITION BY PolicyType, Age) AS CountByTypeAge, ROW_NUMBER() OVER (PARTITION BY PolicyType ORDER BY Age) AS RankByPolicyType FROM Policyholder;", + "sql_explanation": "This query calculates the distribution of policyholder ages by policy type by counting the number of policyholders for each age and policy type and assigning a row number based on the age for each policy type." +}, { + "id": "2149", + "domain": "insurance", + "domain_description": "Underwriting data, claims processing information, risk assessment models, and policyholder demographics.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average claim amount by policy type and claim category?", + "sql_context": "CREATE TABLE PolicyClaims (PolicyType VARCHAR(20), ClaimCategory VARCHAR(20), ClaimAmount INT); INSERT INTO PolicyClaims (PolicyType, ClaimCategory, ClaimAmount) VALUES (\u0027Auto\u0027, \u0027Collision\u0027, 1500), (\u0027Auto\u0027, \u0027Theft\u0027, 5000), (\u0027Home\u0027, \u0027Fire\u0027, 30000), (\u0027Home\u0027, \u0027WaterDamage\u0027, 8000), (\u0027Life\u0027, \u0027AccidentalDeath\u0027, 200000);", + "sql": "SELECT PolicyType, ClaimCategory, AVG(ClaimAmount) OVER (PARTITION BY PolicyType, ClaimCategory) AS AvgClaimAmount FROM PolicyClaims;", + "sql_explanation": "This query calculates the average claim amount by policy type and claim category by finding the average claim amount for each unique combination of policy type and claim category." +}, { + "id": "2109", + "domain": "logistics", + "domain_description": "Freight forwarding data, warehouse management statistics, route optimization information, and reverse logistics metrics.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average delivery time for shipments from South Korea, partitioned by warehouse?", + "sql_context": "CREATE TABLE Warehouses (WarehouseID INT, WarehouseName VARCHAR(50), Country VARCHAR(50)); INSERT INTO Warehouses (WarehouseID, WarehouseName, Country) VALUES (1, \u0027Seoul Warehouse\u0027, \u0027South Korea\u0027); CREATE TABLE Shipments (ShipmentID INT, WarehouseID INT, DeliveryTime INT);", + "sql": "SELECT WarehouseID, AVG(DeliveryTime) OVER (PARTITION BY WarehouseID) AS AvgDeliveryTime FROM Shipments WHERE Country \u003d \u0027South Korea\u0027;", + "sql_explanation": "Calculate the average delivery time for shipments from South Korea, partitioned by warehouse, by using the AVG function with the OVER clause and partitioning by warehouse ID." +}, { + "id": "271", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total donation amount per quarter, per donor, in descending order of the total donation amount?", + "sql_context": "CREATE TABLE Donations (DonationID int, DonorID int, DonationAmount decimal(10, 2), DonationDate date); INSERT INTO Donations (DonationID, DonorID, DonationAmount, DonationDate) VALUES (1, 1, 500.00, \u00272022-01-01\u0027), (2, 1, 800.00, \u00272022-02-01\u0027), (3, 2, 300.00, \u00272022-01-01\u0027), (4, 3, 700.00, \u00272022-01-01\u0027);", + "sql": "SELECT DonorID, DATE_TRUNC(\u0027quarter\u0027, DonationDate) AS Quarter, SUM(DonationAmount) OVER (PARTITION BY DATE_TRUNC(\u0027quarter\u0027, DonationDate), DonorID) AS TotalDonationPerQuarter FROM Donations GROUP BY DonorID, Quarter ORDER BY TotalDonationPerQuarter DESC;", + "sql_explanation": "This SQL query calculates the total donation amount per quarter, per donor, by partitioning the data based on the quarter of the DonationDate and the donor. The SUM function with the OVER clause is used to calculate the total donation amount per quarter." +}, { + "id": "819", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average donation amount per donor in the Asia-Pacific region, excluding the top 10 donors?", + "sql_context": "CREATE TABLE Donors (DonorID INT, DonorName TEXT, Region TEXT, DonationAmount DECIMAL(10,2)); INSERT INTO Donors VALUES (1, \u0027John Smith\u0027, \u0027Asia-Pacific\u0027, 500.00), (2, \u0027Jane Doe\u0027, \u0027Americas\u0027, 300.00), (3, \u0027Mary Major\u0027, \u0027Asia-Pacific\u0027, 750.00);", + "sql": "SELECT AVG(DonationAmount) FROM (SELECT DonationAmount, ROW_NUMBER() OVER (PARTITION BY Region ORDER BY DonationAmount DESC) as rn FROM Donors WHERE Region \u003d \u0027Asia-Pacific\u0027) tmp WHERE rn \u003e 10;", + "sql_explanation": "Calculate the average donation amount per donor in the Asia-Pacific region, excluding the top 10 donors. First, a subquery is used to rank donors within their regions by donation amount. Then, the average is calculated for donors with a rank greater than 10." +}, { + "id": "841", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the moving average of donation amounts for each donor, over the last three donations?", + "sql_context": "CREATE TABLE DonationAmounts (DonationID INT, DonorID INT, DonationDate DATE, DonationAmount DECIMAL(10,2)); INSERT INTO DonationAmounts VALUES (1, 1, \u00272021-04-05\u0027, 1500.00), (2, 1, \u00272021-08-20\u0027, 1500.00), (3, 2, \u00272021-04-12\u0027, 1000.00), (4, 3, \u00272021-08-01\u0027, 2000.00), (5, 1, \u00272021-03-15\u0027, 500.00), (6, 1, \u00272021-09-01\u0027, 750.00);", + "sql": "SELECT DonorID, DonationDate, DonationAmount, AVG(DonationAmount) OVER (PARTITION BY DonorID ORDER BY DonationDate ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS MovingAvg FROM DonationAmounts;", + "sql_explanation": "This SQL query calculates the moving average of donation amounts for each donor, over the last three donations, by partitioning the results by DonorID and ordering them by DonationDate. It then calculates the average donation amount using the AVG() window function, with a window of three rows (the current row and the two preceding rows)." +}, { + "id": "1158", + "domain": "nonprofit", + "domain_description": "Nonprofit data on charitable giving trends, social impact assessment, capacity building, and volunteer management.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total donation amount per donor, ordered by the total donation amount in descending order, with a running total of donations for each donor?", + "sql_context": "CREATE TABLE Donors (DonorID int, DonorName varchar(50), DonationAmount decimal(10, 2)); INSERT INTO Donors (DonorID, DonorName, DonationAmount) VALUES (1, \u0027John Doe\u0027, 500.00), (2, \u0027Jane Smith\u0027, 300.00), (3, \u0027Mike Johnson\u0027, 700.00);", + "sql": "SELECT DonorName, DonationAmount, SUM(DonationAmount) OVER (PARTITION BY DonorName ORDER BY DonationAmount) AS RunningTotalDonation FROM Donors ORDER BY DonationAmount DESC;", + "sql_explanation": "This SQL query calculates the total donation amount per donor by partitioning the data based on DonorName and ordering it by DonationAmount in descending order. The SUM function with the OVER clause is used to calculate a running total of donations for each donor." +}, { + "id": "470", + "domain": "archeology", + "domain_description": "Detailed records on excavation sites, artifact analysis, historical context, and public outreach in archeology.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the age of the oldest artifact for each culture, grouped by excavation site?", + "sql_context": "CREATE TABLE ancient_cultures (id INT, culture VARCHAR(50)); CREATE TABLE ancient_artifacts (id INT, artifact_name VARCHAR(50), age INT, excavation_site VARCHAR(50), culture_id INT);", + "sql": "SELECT excavation_site, MAX(age) OVER (PARTITION BY culture_id) as oldest_artifact_age FROM ancient_artifacts JOIN ancient_cultures ON ancient_artifacts.culture_id \u003d ancient_cultures.id GROUP BY excavation_site, culture_id;", + "sql_explanation": "This query calculates the age of the oldest artifact for each culture, grouped by excavation site, by joining the \u0027ancient_artifacts\u0027 and \u0027ancient_cultures\u0027 tables on the \u0027culture_id\u0027 column, partitioning the results by \u0027culture_id\u0027, and selecting the maximum \u0027age\u0027 column value for each partition using the \u0027MAX\u0027 window function." +}, { + "id": "797", + "domain": "archeology", + "domain_description": "Detailed records on excavation sites, artifact analysis, historical context, and public outreach in archeology.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the name and historical significance of the artifact with the earliest creation date?", + "sql_context": "CREATE TABLE Artifacts (ArtifactID INT, Name VARCHAR(100), CreationDate DATETIME, HistoricalSignificance TEXT); INSERT INTO Artifacts (ArtifactID, Name, CreationDate, HistoricalSignificance) VALUES (1, \u0027Ancient Dagger\u0027, \u00271500-01-01\u0027, \u0027Ceremonial weapon of ancient civilization\u0027);", + "sql": "SELECT Name, HistoricalSignificance FROM (SELECT Name, CreationDate, HistoricalSignificance, ROW_NUMBER() OVER (ORDER BY CreationDate) as RowNum FROM Artifacts) as ArtifactRank WHERE RowNum \u003d 1;", + "sql_explanation": "The SQL query uses the ROW_NUMBER function to assign a unique row number for each artifact ordered by the CreationDate. The outer query then selects the Name and HistoricalSignificance of the artifact with the earliest creation date (RowNum \u003d 1)." +}, { + "id": "1308", + "domain": "archeology", + "domain_description": "Detailed records on excavation sites, artifact analysis, historical context, and public outreach in archeology.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the name and creation date of the artifact with the second latest creation date?", + "sql_context": "CREATE TABLE Artifacts (ArtifactID INT, Name VARCHAR(100), CreationDate DATETIME); INSERT INTO Artifacts (ArtifactID, Name, CreationDate) VALUES (1, \u0027Ancient Dagger\u0027, \u00271500-01-01\u0027), (2, \u0027Modern Artifact\u0027, \u00272020-01-01\u0027);", + "sql": "SELECT Name, CreationDate FROM (SELECT Name, CreationDate, ROW_NUMBER() OVER (ORDER BY CreationDate DESC) as RowNum FROM Artifacts) as ArtifactRank WHERE RowNum \u003d 2;", + "sql_explanation": "The SQL query uses the ROW_NUMBER function to assign a unique row number for each artifact ordered by the CreationDate in descending order. The outer query then selects the Name and CreationDate of the artifact with the second latest creation date (RowNum \u003d 2)." +}, { + "id": "3955", + "domain": "archeology", + "domain_description": "Detailed records on excavation sites, artifact analysis, historical context, and public outreach in archeology.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many artifacts were found at each excavation site?", + "sql_context": "CREATE TABLE artifacts_count (site_id INT, artifact_count INT); INSERT INTO artifacts_count (site_id, artifact_count) VALUES (1, 20), (2, 15), (3, 12);", + "sql": "SELECT site_id, COUNT(*) OVER (PARTITION BY site_id) AS artifact_count FROM artifacts;", + "sql_explanation": "The query calculates the number of artifacts at each excavation site by using the COUNT() window function with the PARTITION BY clause on the site_id column." +}, { + "id": "57", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of autonomous vehicle accidents, compared to the previous month, per manufacturer?", + "sql_context": "CREATE TABLE MonthlyAutonomousVehicleAccidents (id INT, accident_date DATE, make VARCHAR(20), model VARCHAR(20), autonomy_level INT); INSERT INTO MonthlyAutonomousVehicleAccidents (id, accident_date, make, model, autonomy_level) VALUES (1, \u00272022-01-01\u0027, \u0027Tesla\u0027, \u0027Model S\u0027, 4), (2, \u00272022-01-01\u0027, \u0027Tesla\u0027, \u0027Model 3\u0027, 3), (3, \u00272022-02-01\u0027, \u0027Tesla\u0027, \u0027Model S\u0027, 5), (4, \u00272022-02-01\u0027, \u0027Tesla\u0027, \u0027Model 3\u0027, 4), (5, \u00272022-03-01\u0027, \u0027Tesla\u0027, \u0027Model S\u0027, 6), (6, \u00272022-03-01\u0027, \u0027Tesla\u0027, \u0027Model 3\u0027, 5), (7, \u00272022-01-01\u0027, \u0027Volvo\u0027, \u0027XC60\u0027, 0), (8, \u00272022-02-01\u0027, \u0027Volvo\u0027, \u0027XC60\u0027, 0), (9, \u00272022-03-01\u0027, \u0027Volvo\u0027, \u0027XC60\u0027, 0);", + "sql": "SELECT make, EXTRACT(MONTH FROM accident_date) AS month, (COUNT(*) - LAG(COUNT(*)) OVER (PARTITION BY make ORDER BY EXTRACT(MONTH FROM accident_date))) * 100.0 / LAG(COUNT(*)) OVER (PARTITION BY make ORDER BY EXTRACT(MONTH FROM accident_date)) AS pct_change FROM MonthlyAutonomousVehicleAccidents WHERE autonomy_level \u003e 0 GROUP BY month, make;", + "sql_explanation": "The SQL query calculates the number of autonomous vehicle accidents, compared to the previous month, per manufacturer. It uses the LAG function to access the previous row\u0027s COUNT(*) column value and calculates the percentage change. It groups the data by month and make columns." +}, { + "id": "1293", + "domain": "automotive", + "domain_description": "Vehicle safety testing results, autonomous driving research data, electric vehicle adoption statistics, and auto show information.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Rank autonomous vehicles in the \u0027autoshow\u0027 table by top speed in descending order, assigning row numbers.", + "sql_context": "CREATE TABLE autoshow (vehicle_type VARCHAR(10), top_speed INT);", + "sql": "SELECT vehicle_type, top_speed, ROW_NUMBER() OVER (PARTITION BY vehicle_type ORDER BY top_speed DESC) as row_num FROM autoshow WHERE vehicle_type LIKE \u0027%Autonomous%\u0027;", + "sql_explanation": "Select the vehicle_type and top_speed columns from the \u0027autoshow\u0027 table, only for rows with \u0027Autonomous\u0027 in the vehicle_type. Assign row numbers in descending order of top_speed." +}, { + "id": "367", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 3 decentralized applications with the highest number of transactions, along with their respective industry categories and the names of the countries in which they are registered.", + "sql_context": "CREATE TABLE dapps (dapp_id INT, dapp_name VARCHAR(255), total_transactions INT, industry_category VARCHAR(255), country_name VARCHAR(255));", + "sql": "SELECT dapp_name, total_transactions, industry_category, country_name FROM (SELECT dapp_name, total_transactions, industry_category, country_name, ROW_NUMBER() OVER (ORDER BY total_transactions DESC) as rank FROM dapps) d WHERE rank \u003c\u003d 3;", + "sql_explanation": "The SQL query uses a subquery with the ROW_NUMBER() function to rank the decentralized applications based on the total number of transactions. It then selects the top 3 applications by filtering the subquery results where the rank is less than or equal to 3. The query displays the dapp_name, total_transactions, industry_category, and country_name for the selected records." +}, { + "id": "642", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average block time for the Tezos network in the past month?", + "sql_context": "CREATE TABLE tezos_blocks (block_id INT, timestamp TIMESTAMP);", + "sql": "SELECT AVG(timestamp_diff) FROM (SELECT TIMESTAMPDIFF(SECOND, LAG(timestamp) OVER (ORDER BY block_id), timestamp) AS timestamp_diff FROM tezos_blocks WHERE timestamp \u003e\u003d NOW() - INTERVAL \u00271 month\u0027) subquery;", + "sql_explanation": "Calculate the average block time for the Tezos network in the past month by selecting the AVG function of the timestamp_diff column from a subquery that calculates the difference in seconds between the current timestamp and the previous timestamp, grouped by the block_id and ordered by the timestamp, where the timestamp is within the past month." +}, { + "id": "1144", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average block time (in seconds) for the Tezos network in the past month?", + "sql_context": "CREATE TABLE tezos_blocks (block_time TIMESTAMP, block_id BIGINT);", + "sql": "SELECT AVG(EXTRACT(EPOCH FROM block_time) - EXTRACT(EPOCH FROM LAG(block_time) OVER (ORDER BY block_time))) FROM tezos_blocks WHERE block_time \u003e\u003d NOW() - INTERVAL \u00271 month\u0027;", + "sql_explanation": "The SQL query calculates the average block time (in seconds) for the Tezos network in the past month. It calculates the time between each block and averages the result." +}, { + "id": "1971", + "domain": "blockchain", + "domain_description": "Comprehensive data on smart contracts, decentralized applications, digital assets, and regulatory frameworks in blockchain.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the dapps that have deployed the most smart contracts in the \u0027Solana\u0027 network.", + "sql_context": "CREATE TABLE solana_dapps (dapp_name VARCHAR(20), network VARCHAR(20), smart_contracts INT); INSERT INTO solana_dapps (dapp_name, network, smart_contracts) VALUES (\u0027Serum\u0027, \u0027Solana\u0027, 50), (\u0027Raydium\u0027, \u0027Solana\u0027, 60), (\u0027Orca\u0027, \u0027Solana\u0027, 70);", + "sql": "SELECT dapp_name, network, smart_contracts, RANK() OVER (ORDER BY smart_contracts DESC) as rank FROM solana_dapps WHERE network \u003d \u0027Solana\u0027;", + "sql_explanation": "This query uses a window function to rank the dapps by the number of smart contracts in the \u0027Solana\u0027 network and returns the ranked list." +}, { + "id": "75", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average population change for cities in the same country between consecutive years?", + "sql_context": "CREATE TABLE CityYearPopulation (CityId INT, Year INT, Population INT, PRIMARY KEY (CityId, Year)); INSERT INTO CityYearPopulation (CityId, Year, Population) VALUES (1, 2019, 8400000); INSERT INTO CityYearPopulation (CityId, Year, Population) VALUES (1, 2020, 8600000); INSERT INTO CityYearPopulation (CityId, Year, Population) VALUES (2, 2019, 3900000); INSERT INTO CityYearPopulation (CityId, Year, Population) VALUES (2, 2020, 4100000);", + "sql": "SELECT CityId, AVG(PopulationChange) OVER (PARTITION BY CityId ORDER BY Year ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) as AvgPopulationChange FROM (SELECT CityId, Year, (LAG(Population, 1) OVER (PARTITION BY CityId ORDER BY Year) - Population) as PopulationChange FROM CityYearPopulation) AS PopulationChangePerYear;", + "sql_explanation": "Calculates the average population change for cities in the same country between consecutive years." +}, { + "id": "172", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which departments had the most budget cuts in \u0027budget\u0027 table?", + "sql_context": "CREATE TABLE budget (program_id INT, program_name VARCHAR(255), budget DECIMAL(10,2), fiscal_year INT);", + "sql": "SELECT program_name, (LAG(budget, 1) OVER (ORDER BY fiscal_year) - budget) AS budget_cuts FROM budget WHERE (LAG(budget, 1) OVER (ORDER BY fiscal_year) - budget) \u003d (SELECT MAX((LAG(budget, 1) OVER (ORDER BY fiscal_year) - budget)) FROM budget) ORDER BY budget_cuts DESC LIMIT 1;", + "sql_explanation": "This query uses the window function LAG to calculate the budget cuts for each program from the previous fiscal year. It then selects the program with the maximum budget cut." +}, { + "id": "1077", + "domain": "government", + "domain_description": "Government data on open data initiatives, public participation, evidence-based policy making, and government transparency.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which cities had a population decrease between 2019 and 2020?", + "sql_context": "CREATE TABLE CityYearPopulation (CityId INT, Year INT, Population INT, PRIMARY KEY (CityId, Year)); INSERT INTO CityYearPopulation (CityId, Year, Population) VALUES (1, 2019, 8400000); INSERT INTO CityYearPopulation (CityId, Year, Population) VALUES (1, 2020, 8600000); INSERT INTO CityYearPopulation (CityId, Year, Population) VALUES (2, 2019, 3900000); INSERT INTO CityYearPopulation (CityId, Year, Population) VALUES (2, 2020, 3800000);", + "sql": "SELECT CityId, Year, Population, Population - LAG(Population, 1) OVER (PARTITION BY CityId ORDER BY Year) as PopulationChange FROM CityYearPopulation WHERE PopulationChange \u003c 0;", + "sql_explanation": "Identifies cities with a population decrease between 2019 and 2020." +}, { + "id": "535", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of health equity metrics scores for community health workers in California?", + "sql_context": "CREATE TABLE states (state_id INT, state_name VARCHAR(100)); INSERT INTO states (state_id, state_name) VALUES (1, \u0027California\u0027), (2, \u0027Texas\u0027), (3, \u0027New York\u0027); CREATE TABLE community_health_workers (worker_id INT, state_id INT, health_equity_metrics_score INT); INSERT INTO community_health_workers (worker_id, state_id, health_equity_metrics_score) VALUES (1, 1, 85), (2, 1, 90), (3, 2, 80), (4, 3, 95), (5, 1, 92);", + "sql": "SELECT state_id, health_equity_metrics_score, COUNT(*) OVER (PARTITION BY state_id ORDER BY health_equity_metrics_score) as rank FROM community_health_workers WHERE state_id \u003d 1 ORDER BY health_equity_metrics_score;", + "sql_explanation": "This query calculates the rank of health equity metrics scores for community health workers in California using window functions and partitions by state_id." +}, { + "id": "1019", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the change in mental health parity scores for each community health worker, from the beginning of the year to the end of the year?", + "sql_context": "CREATE TABLE mental_health_parity_history (worker_id INT, score_date DATE, mental_health_parity_score INT); INSERT INTO mental_health_parity_history (worker_id, score_date, mental_health_parity_score) VALUES (1, \u00272022-01-01\u0027, 90), (1, \u00272022-12-31\u0027, 95), (2, \u00272022-01-01\u0027, 85), (2, \u00272022-12-31\u0027, 90), (3, \u00272022-01-01\u0027, 80), (3, \u00272022-12-31\u0027, 85);", + "sql": "SELECT worker_id, mental_health_parity_score - LAG(mental_health_parity_score) OVER (PARTITION BY worker_id ORDER BY score_date) as score_change FROM mental_health_parity_history;", + "sql_explanation": "This query calculates the change in mental health parity scores for each community health worker, from the beginning of the year to the end of the year. It first selects the worker_id and mental_health_parity_score columns from the mental_health_parity_history table, and then calculates the score change as the mental_health_parity_score minus the lag of mental_health_parity_score, using the LAG() window function. The LAG() function returns the value of the previous row for the same worker_id, partitioned by worker_id and ordered by score_date. This results in the score change for each worker between the beginning and end of the year." +}, { + "id": "2308", + "domain": "healthcare", + "domain_description": "Healthcare data on mental health parity, cultural competency, health equity metrics, and community health workers.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the number of mental health visits per patient, ordered by the number of visits in descending order.", + "sql_context": "CREATE TABLE MentalHealthParity (PatientID int, MentalHealthVisits int); INSERT INTO MentalHealthParity (PatientID, MentalHealthVisits) VALUES (1, 5), (2, 3), (3, 6), (4, 4), (5, 8), (6, 7), (7, 6);", + "sql": "SELECT PatientID, MentalHealthVisits, ROW_NUMBER() OVER (ORDER BY MentalHealthVisits DESC) AS VisitRank FROM MentalHealthParity;", + "sql_explanation": "Calculate the row number for each record ordered by the MentalHealthVisits column in descending order, effectively ranking patients by the number of mental health visits." +}, { + "id": "543", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average word count of articles related to social justice issues in the United States, published in 2021 and 2022, partitioned by quarter?", + "sql_context": "CREATE TABLE articles (id INT, title TEXT, category TEXT, publish_date DATE, location TEXT, word_count INT); INSERT INTO articles (id, title, category, publish_date, location, word_count) VALUES (1, \u0027Social Justice Issue 1\u0027, \u0027social_justice\u0027, \u00272021-01-01\u0027, \u0027USA\u0027, 800), (2, \u0027Social Justice Issue 2\u0027, \u0027social_justice\u0027, \u00272022-06-05\u0027, \u0027USA\u0027, 1200);", + "sql": "SELECT AVG(word_count) OVER (PARTITION BY EXTRACT(YEAR_QUARTER FROM publish_date)) AS avg_word_count FROM articles WHERE category \u003d \u0027social_justice\u0027 AND location \u003d \u0027USA\u0027 AND YEAR(publish_date) BETWEEN 2021 AND 2022;", + "sql_explanation": "Calculate the average word count of articles related to social justice issues in the United States, published in 2021 and 2022, partitioned by quarter." +}, { + "id": "2224", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of articles by category in \u0027news_articles\u0027 table?", + "sql_context": "CREATE TABLE news_articles (id INT, title VARCHAR(100), publication_date DATE, category VARCHAR(50)); INSERT INTO news_articles (id, title, publication_date, category) VALUES (1, \u0027Article 1\u0027, \u00272022-01-01\u0027, \u0027Politics\u0027), (2, \u0027Article 2\u0027, \u00272022-01-02\u0027, \u0027Sports\u0027);", + "sql": "SELECT category, COUNT(*) as num_articles, ROW_NUMBER() OVER (ORDER BY COUNT(*) DESC) as rank FROM news_articles GROUP BY category;", + "sql_explanation": "The SQL query calculates the number of articles for each category in the \u0027news_articles\u0027 table and assigns a rank to each category based on the number of articles. It uses the COUNT function to count the number of articles for each category and the GROUP BY clause to group the results by category. The ROW_NUMBER function is used to assign a rank to each category based on the number of articles." +}, { + "id": "2361", + "domain": "journalism", + "domain_description": "News reporting data, audience demographics, media ethics information, and investigative journalism projects.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of articles by region in \u0027news_articles\u0027 table?", + "sql_context": "CREATE TABLE news_articles (id INT, title VARCHAR(100), publication_date DATE, category VARCHAR(50), author VARCHAR(50), region VARCHAR(50)); INSERT INTO news_articles (id, title, publication_date, category, author, region) VALUES (1, \u0027Article 1\u0027, \u00272022-01-01\u0027, \u0027Politics\u0027, \u0027John Doe\u0027, \u0027North America\u0027), (2, \u0027Article 2\u0027, \u00272022-01-02\u0027, \u0027Sports\u0027, \u0027Jane Smith\u0027, \u0027Europe\u0027);", + "sql": "SELECT region, COUNT(*) as num_articles, ROW_NUMBER() OVER (ORDER BY COUNT(*) DESC) as rank FROM news_articles GROUP BY region;", + "sql_explanation": "The SQL query calculates the number of articles for each region in the \u0027news_articles\u0027 table and assigns a rank to each region based on the number of articles. It uses the COUNT function to count the number of articles for each region and the GROUP BY clause to group the results by region. The ROW_NUMBER function is used to assign a rank to each region based on the number of articles." +}, { + "id": "575", + "domain": "non profit", + "domain_description": "Financial data, program impact, donor demographics, and volunteer management.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of donations from each continent in the last year?", + "sql_context": "CREATE TABLE donations (id INT, donation_amount DECIMAL(10, 2), donation_date DATE, continent TEXT); INSERT INTO donations VALUES (1, 200.00, \u00272021-01-01\u0027, \u0027Asia\u0027), (2, 300.00, \u00272021-02-01\u0027, \u0027Europe\u0027), (3, 400.00, \u00272020-12-01\u0027, \u0027Africa\u0027);", + "sql": "SELECT continent, SUM(donation_amount) / SUM(SUM(donation_amount)) OVER (PARTITION BY NULL) * 100.0 AS percentage FROM donations WHERE donation_date \u003e\u003d DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) GROUP BY continent;", + "sql_explanation": "This SQL query calculates the percentage of donations from each continent in the last year. It uses the SUM() function to add up all donation amounts for each continent in the last year and calculates the percentage of total donations for each continent." +}, { + "id": "713", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which programs received donations of more than $100 in a single day in 2022?", + "sql_context": "CREATE TABLE DailyDonations (DonationID int, ProgramName varchar(255), DonationAmount decimal(10,2), DonationDate date); INSERT INTO DailyDonations VALUES (1, \u0027Education\u0027, 150, \u00272022-01-01\u0027), (2, \u0027Healthcare\u0027, 100, \u00272022-02-01\u0027), (3, \u0027Environment\u0027, 125, \u00272022-03-01\u0027), (4, \u0027Education\u0027, 175, \u00272022-04-01\u0027), (5, \u0027Healthcare\u0027, 200, \u00272022-05-01\u0027);", + "sql": "SELECT ProgramName FROM (SELECT ProgramName, ROW_NUMBER() OVER (PARTITION BY ProgramName ORDER BY DonationDate) as Rank FROM DailyDonations WHERE DonationAmount \u003e 100) as DonationRanks WHERE Rank \u003d 1;", + "sql_explanation": "This query identifies the programs that received donations of more than $100 in a single day in 2022. It uses the ROW_NUMBER window function with the PARTITION BY clause to rank the donations by ProgramName and DonationDate, then filters the results to only include the first donation for each program." +}, { + "id": "2073", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Rank outcomes by date", + "sql_context": "CREATE TABLE Outcomes (Id INT, ProgramId INT, Outcome VARCHAR(50), OutcomeDate DATE); INSERT INTO Outcomes (Id, ProgramId, Outcome, OutcomeDate) VALUES (1, 1, \u0027Graduated\u0027, \u00272021-01-01\u0027), (2, 2, \u0027Fed 50 people\u0027, \u00272021-01-02\u0027);", + "sql": "SELECT Id, ProgramId, Outcome, OutcomeDate, RANK() OVER(PARTITION BY ProgramId ORDER BY OutcomeDate DESC) AS OutcomeRank FROM Outcomes;", + "sql_explanation": "This query ranks outcomes for each program based on date, assigning a rank to each outcome." +}, { + "id": "3442", + "domain": "non-profit", + "domain_description": "Donation records, program outcomes, volunteer engagement metrics, budget reports, and community impact assessments.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many volunteer hours were recorded for each program?", + "sql_context": "CREATE TABLE programs (id INT, name VARCHAR(255)); INSERT INTO programs (id, name) VALUES (1, \u0027Environment\u0027), (2, \u0027Arts\u0027); CREATE TABLE volunteer_hours (id INT, program_id INT, hours DECIMAL(10,2)); INSERT INTO volunteer_hours (id, program_id, hours) VALUES (1, 1, 5), (2, 2, 10), (3, 1, 15);", + "sql": "SELECT program_id, SUM(hours) OVER (PARTITION BY program_id) AS total_hours FROM volunteer_hours;", + "sql_explanation": "This query calculates the total number of volunteer hours for each program by partitioning the data based on the program_id and summing the hours within each partition." +}, { + "id": "310", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify users from underrepresented communities with more than 5 transactions in Q3 2019, and rank them by transaction value.", + "sql_context": "CREATE TABLE users (user_id INT, user_category VARCHAR(30)); CREATE TABLE transactions (transaction_id INT, user_id INT, transaction_value FLOAT, transaction_date DATE); INSERT INTO users (user_id, user_category) VALUES (1, \u0027Minority Female\u0027); INSERT INTO transactions (transaction_id, user_id, transaction_value, transaction_date) VALUES (1, 1, 100.00, \u00272019-09-01\u0027);", + "sql": "SELECT user_id, RANK() OVER (ORDER BY SUM(transaction_value) DESC) as rank FROM transactions INNER JOIN users ON transactions.user_id \u003d users.user_id WHERE EXTRACT(MONTH FROM transaction_date) BETWEEN 9 AND 11 HAVING COUNT(*) \u003e 5 GROUP BY user_id;", + "sql_explanation": "The SQL query identifies users from underrepresented communities with more than 5 transactions in Q3 2019, calculates their total transaction values, and assigns ranks based on the sum of transaction values." +}, { + "id": "1605", + "domain": "technology", + "domain_description": "Extensive data on ethical AI, technology for social good, digital divide, and technology accessibility.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the volunteers with the highest number of skills?", + "sql_context": "CREATE TABLE volunteers (id INT, volunteer_name VARCHAR(50), skill VARCHAR(50)); INSERT INTO volunteers (id, volunteer_name, skill) VALUES (1, \u0027Volunteer1\u0027, \u0027Coding\u0027); INSERT INTO volunteers (id, volunteer_name, skill) VALUES (2, \u0027Volunteer2\u0027, \u0027Design\u0027); INSERT INTO volunteers (id, volunteer_name, skill) VALUES (3, \u0027Volunteer3\u0027, \u0027Data Analysis\u0027);", + "sql": "SELECT volunteer_name, COUNT(skill) OVER (PARTITION BY volunteer_name) AS skill_count, RANK() OVER (ORDER BY COUNT(skill) DESC) AS rank FROM volunteers;", + "sql_explanation": "This query calculates the number of skills for each volunteer using the COUNT window function and ranks them in descending order based on the number of skills using the RANK window function." +}, { + "id": "272", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which agroecology projects have the highest and lowest total funding?", + "sql_context": "CREATE TABLE agroecology_projects (id INT, name TEXT, total_funding FLOAT); INSERT INTO agroecology_projects (id, name, total_funding) VALUES (1, \u0027Project 1\u0027, 50000.0), (2, \u0027Project 2\u0027, 25000.0), (3, \u0027Project 3\u0027, 75000.0);", + "sql": "SELECT name, total_funding FROM (SELECT name, total_funding, ROW_NUMBER() OVER (ORDER BY total_funding DESC) as rank FROM agroecology_projects) as ranked_projects WHERE rank \u003d 1 OR rank \u003d (SELECT COUNT(*) FROM agroecology_projects) ORDER BY total_funding;", + "sql_explanation": "This query identifies the agroecology projects with the highest and lowest total funding by using the ROW_NUMBER function and the OR operator to filter the projects with the highest and lowest total funding. It also uses a subquery to calculate the number of projects and order them by total funding." +}, { + "id": "468", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total production value (in USD) of organic farms in the \u0027agroecology\u0027 schema, in the top 3 largest states by area?", + "sql_context": "CREATE SCHEMA agroecology;CREATE TABLE organic_farms (id INT, state VARCHAR(50), production_value INT);", + "sql": "SELECT SUM(production_value) FROM agroecology.organic_farms WHERE state IN (SELECT state FROM (SELECT state, ROW_NUMBER() OVER (ORDER BY SUM(area_ha) DESC) rn FROM agroecology.organic_farms GROUP BY state) t WHERE rn \u003c\u003d 3);", + "sql_explanation": "This query calculates the sum of the \u0027production_value\u0027 column for all rows in the \u0027organic_farms\u0027 table within the \u0027agroecology\u0027 schema, but only for rows where the \u0027state\u0027 column is in the top 3 states by area. The subquery calculates the total area for each state and assigns a row number based on the total area, then selects the states with the top 3 row numbers." +}, { + "id": "1915", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the change in crop yield for each farmer over time, if available.", + "sql_context": "CREATE TABLE farmer_yields (farmer_id INT, yield_date DATE, crop_yield INT); INSERT INTO farmer_yields (farmer_id, yield_date, crop_yield) VALUES (1, \u00272021-01-01\u0027, 500), (1, \u00272021-02-01\u0027, 600), (2, \u00272021-01-01\u0027, 700), (2, \u00272021-02-01\u0027, 750);", + "sql": "SELECT farmer_id, yield_date, crop_yield, LAG(crop_yield) OVER (PARTITION BY farmer_id ORDER BY yield_date) AS prev_yield FROM farmer_yields;", + "sql_explanation": "The SQL query identifies the change in crop yield for each farmer over time by using the LAG function to retrieve the previous crop_yield value for each farmer. The data is partitioned by farmer_id and ordered by yield_date." +}, { + "id": "2013", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all unique crop types and their respective total yields for organic farms in the USA.", + "sql_context": "CREATE TABLE Farm (FarmID int, CropType varchar(50), FarmType varchar(20), Country varchar(50), Yield int); INSERT INTO Farm (FarmID, CropType, FarmType, Country, Yield) VALUES (1, \u0027Corn\u0027, \u0027Organic\u0027, \u0027USA\u0027, 150), (2, \u0027Soybeans\u0027, \u0027Conventional\u0027, \u0027Canada\u0027, 200), (3, \u0027Wheat\u0027, \u0027Urban\u0027, \u0027Mexico\u0027, 100), (4, \u0027Potatoes\u0027, \u0027Organic\u0027, \u0027USA\u0027, 180), (5, \u0027Rice\u0027, \u0027Organic\u0027, \u0027Mexico\u0027, 120);", + "sql": "SELECT DISTINCT CropType, SUM(Yield) OVER (PARTITION BY CropType) as TotalYield FROM Farm WHERE FarmType \u003d \u0027Organic\u0027 AND Country \u003d \u0027USA\u0027;", + "sql_explanation": "The SQL query calculates the total yield for each unique crop type (CropType) from organic farms in the USA by using the SUM() window function with the PARTITION BY clause to partition the data by CropType. The DISTINCT keyword is used to ensure that each crop type is only listed once." +}, { + "id": "2532", + "domain": "agriculture", + "domain_description": "Comprehensive data on agroecology, food justice, indigenous food systems, and urban agriculture.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Rank countries by the number of organic farms, in descending order.", + "sql_context": "CREATE TABLE Farm (FarmID int, FarmType varchar(20), Country varchar(50)); INSERT INTO Farm (FarmID, FarmType, Country) VALUES (1, \u0027Organic\u0027, \u0027USA\u0027), (2, \u0027Conventional\u0027, \u0027Canada\u0027), (3, \u0027Urban\u0027, \u0027Mexico\u0027), (4, \u0027Organic\u0027, \u0027USA\u0027), (5, \u0027Organic\u0027, \u0027Mexico\u0027), (6, \u0027Biodynamic\u0027, \u0027France\u0027);", + "sql": "SELECT Country, ROW_NUMBER() OVER (ORDER BY COUNT(*) DESC) as Rank FROM Farm WHERE FarmType \u003d \u0027Organic\u0027 GROUP BY Country;", + "sql_explanation": "The SQL query ranks countries by the number of organic farms by using the ROW_NUMBER() window function to assign a rank to each country based on the count of organic farms in descending order. The COUNT() function is used to count the number of organic farms for each country, and the GROUP BY clause is used to group the data by Country." +}, { + "id": "553", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which fish species have had a decrease in population in the past 5 years?", + "sql_context": "CREATE TABLE population_data (id INT, year INT, species VARCHAR(50), population INT); INSERT INTO population_data (id, year, species, population) VALUES (1, 2016, \u0027Salmon\u0027, 50000), (2, 2017, \u0027Salmon\u0027, 52000), (3, 2018, \u0027Salmon\u0027, 53000), (4, 2019, \u0027Salmon\u0027, 51000), (5, 2020, \u0027Salmon\u0027, 49000), (6, 2016, \u0027Tuna\u0027, 25000), (7, 2017, \u0027Tuna\u0027, 24000), (8, 2018, \u0027Tuna\u0027, 23000), (9, 2019, \u0027Tuna\u0027, 22000), (10, 2020, \u0027Tuna\u0027, 20000);", + "sql": "SELECT species, (population - LAG(population) OVER (PARTITION BY species ORDER BY year)) as population_change FROM population_data WHERE (population - LAG(population) OVER (PARTITION BY species ORDER BY year)) \u003c 0;", + "sql_explanation": "This query identifies which fish species have had a decrease in population in the past 5 years. It does so by using the LAG window function to compare the population of each species in consecutive years, and then filtering the table for records where the population has decreased." +}, { + "id": "1341", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of aquaculture sites per country, ranked in descending order?", + "sql_context": "CREATE TABLE aquaculture_sites_count (site_id INT, country VARCHAR(50), total_sites INT); INSERT INTO aquaculture_sites_count VALUES (1, \u0027China\u0027, 200), (2, \u0027Indonesia\u0027, 150), (3, \u0027India\u0027, 120), (4, \u0027Vietnam\u0027, 100), (5, \u0027Norway\u0027, 80);", + "sql": "SELECT country, SUM(total_sites) AS total_sites_sum, RANK() OVER (ORDER BY SUM(total_sites) DESC) AS total_sites_rank FROM aquaculture_sites_count GROUP BY country;", + "sql_explanation": "Rank the countries by the total number of aquaculture sites, in descending order." +}, { + "id": "1867", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top three countries with the highest biomass of farmed fish.", + "sql_context": "CREATE TABLE country_biomass (id INT, country VARCHAR(255), biomass FLOAT); INSERT INTO country_biomass (id, country, biomass) VALUES (1, \u0027Norway\u0027, 4000.0), (2, \u0027Chile\u0027, 5000.0), (3, \u0027China\u0027, 7000.0), (4, \u0027Scotland\u0027, 3500.0);", + "sql": "SELECT country, biomass FROM (SELECT country, biomass, ROW_NUMBER() OVER (ORDER BY biomass DESC) AS rn FROM country_biomass) tmp WHERE rn \u003c\u003d 3;", + "sql_explanation": "This query identifies the top three countries with the highest biomass of farmed fish by using the ROW_NUMBER function, ordering the data by biomass in descending order and filtering the data to only show the first three rows." +}, { + "id": "2189", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the growth rate and total fish weight for each species over time?", + "sql_context": "CREATE TABLE Species_Growth_Weight (Species_Name TEXT, Year INT, Growth_Rate FLOAT, Fish_Weight FLOAT); INSERT INTO Species_Growth_Weight (Species_Name, Year, Growth_Rate, Fish_Weight) VALUES (\u0027Tuna\u0027, 2019, 0.05, 1200000), (\u0027Cod\u0027, 2019, 0.04, 800000), (\u0027Herring\u0027, 2019, 0.03, 600000), (\u0027Tuna\u0027, 2020, 0.06, 1400000), (\u0027Cod\u0027, 2020, 0.05, 900000), (\u0027Herring\u0027, 2020, 0.04, 700000);", + "sql": "SELECT Species_Name, Growth_Rate, SUM(Fish_Weight) OVER (PARTITION BY Species_Name) AS Total_Fish_Weight FROM Species_Growth_Weight;", + "sql_explanation": "For each unique Species_Name, calculate the total fish weight using the SUM window function and return the growth rate." +}, { + "id": "2583", + "domain": "aquaculture", + "domain_description": "Aquatic farming data, fish stock management, ocean health metrics, and sustainable seafood trends.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 3 aquaculture farms with the highest water temperature?", + "sql_context": "CREATE TABLE Farm ( FarmID INT, FarmName VARCHAR(255), WaterTemperature DECIMAL(5,2) ); INSERT INTO Farm (FarmID, FarmName, WaterTemperature) VALUES (1, \u0027Farm A\u0027, 28.5), (2, \u0027Farm B\u0027, 12.3), (3, \u0027Farm C\u0027, 30.0), (4, \u0027Farm D\u0027, 29.5), (5, \u0027Farm E\u0027, 24.2);", + "sql": "SELECT FarmName, WaterTemperature, ROW_NUMBER() OVER (ORDER BY WaterTemperature DESC) as Rank FROM Farm WHERE Rank \u003c\u003d 3;", + "sql_explanation": "The SQL query assigns a row number based on the WaterTemperature column in descending order using the ROW_NUMBER function. The query then filters the results to only show the top 3 farms with the highest water temperature." +}, { + "id": "568", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 3 most popular cultural heritage sites in South America.", + "sql_context": "CREATE TABLE site_visits(visit_id INT, site_name TEXT, region TEXT, num_visits INT);", + "sql": "SELECT site_name, num_visits FROM (SELECT site_name, num_visits, ROW_NUMBER() OVER (PARTITION BY region ORDER BY num_visits DESC) AS rank FROM site_visits WHERE region \u003d \u0027South America\u0027) subquery WHERE rank \u003c\u003d 3;", + "sql_explanation": "The query identifies the top 3 most popular cultural heritage sites in South America by partitioning the site visits by region, calculating the row number for each site based on the number of visits, and filtering for the top 3 rows in each region." +}, { + "id": "610", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 3 most visited cultural heritage sites in Mexico for 2023.", + "sql_context": "CREATE TABLE site_visits (id INT, name TEXT, country TEXT, year INT, visitor_count INT); INSERT INTO site_visits (id, name, country, year, visitor_count) VALUES (1, \u0027Chichen Itza\u0027, \u0027Mexico\u0027, 2023, 800000); INSERT INTO site_visits (id, name, country, year, visitor_count) VALUES (2, \u0027Teotihuacan\u0027, \u0027Mexico\u0027, 2023, 750000); INSERT INTO site_visits (id, name, country, year, visitor_count) VALUES (3, \u0027Palenque\u0027, \u0027Mexico\u0027, 2023, 700000); INSERT INTO site_visits (id, name, country, year, visitor_count) VALUES (4, \u0027Tulum\u0027, \u0027Mexico\u0027, 2023, 650000);", + "sql": "SELECT name, visitor_count FROM (SELECT name, visitor_count, ROW_NUMBER() OVER (ORDER BY visitor_count DESC) AS rank FROM site_visits WHERE country \u003d \u0027Mexico\u0027 AND year \u003d 2023) AS ranked_sites WHERE rank \u003c\u003d 3;", + "sql_explanation": "The query uses the ROW_NUMBER function to rank the sites by visitor count in descending order for Mexico in 2023, then selects the top 3 sites based on the rank." +}, { + "id": "1602", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the moving average of energy consumption for the latest 3 virtual tours in each country?", + "sql_context": "CREATE TABLE EnergyConsumption (id INT, country VARCHAR(20), sequence INT, energy INT); INSERT INTO EnergyConsumption (id, country, sequence, energy) VALUES (1, \u0027Portugal\u0027, 1, 50), (2, \u0027Portugal\u0027, 2, 55), (3, \u0027Portugal\u0027, 3, 60), (4, \u0027Spain\u0027, 1, 60), (5, \u0027Spain\u0027, 2, 65), (6, \u0027Spain\u0027, 3, 70);", + "sql": "SELECT country, AVG(energy) OVER (PARTITION BY country ORDER BY sequence ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS moving_avg FROM EnergyConsumption;", + "sql_explanation": "Calculate the moving average of energy consumption for the latest 3 virtual tours in each country by using the AVG() function with partitioning by the country column, ordering by the sequence column, and selecting a range of 2 rows preceding and the current row." +}, { + "id": "2835", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the sum of visitor counts for each heritage site in France?", + "sql_context": "CREATE TABLE heritage_sites_france (id INT, country VARCHAR(50), name VARCHAR(100), visitor_count INT); INSERT INTO heritage_sites_france (id, country, name, visitor_count) VALUES (1, \u0027France\u0027, \u0027Site A\u0027, 1000), (2, \u0027France\u0027, \u0027Site B\u0027, 2000), (3, \u0027France\u0027, \u0027Site C\u0027, 3000);", + "sql": "SELECT name, SUM(visitor_count) OVER (PARTITION BY country) FROM heritage_sites_france WHERE country \u003d \u0027France\u0027;", + "sql_explanation": "This query calculates the sum of visitor counts for each heritage site in France by filtering for the country \u0027France\u0027, partitioning by \u0027country\u0027, and then calculating the sum using a window function." +}, { + "id": "3535", + "domain": "hospitality", + "domain_description": "Hospitality data on sustainable tourism, cultural heritage preservation, virtual tourism, and local economic impact.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Rank the top 2 countries with the highest local economic impact?", + "sql_context": "CREATE TABLE local_economic (economic_id INT, country TEXT, impact FLOAT); INSERT INTO local_economic (economic_id, country, impact) VALUES (1, \u0027Japan\u0027, 2000), (2, \u0027Germany\u0027, 1500), (3, \u0027Brazil\u0027, 1000);", + "sql": "SELECT country, RANK() OVER (ORDER BY impact DESC) as rank FROM local_economic WHERE rank \u003c\u003d 2;", + "sql_explanation": "This query ranks the top 2 countries with the highest local economic impact using the RANK function, ordering by the impact column in descending order. The WHERE clause limits the results to the top 2 countries." +}, { + "id": "1938", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the production rate trend for each well over time?", + "sql_context": "CREATE TABLE Production (ProductionID INT, WellID INT, ProductionDate DATE, ProductionRate FLOAT, Country VARCHAR(50)); INSERT INTO Production (ProductionID, WellID, ProductionDate, ProductionRate, Country) VALUES (1, 1, \u00272022-01-01\u0027, 500, \u0027USA\u0027), (2, 2, \u00272022-01-15\u0027, 600, \u0027Canada\u0027), (3, 3, \u00272022-02-01\u0027, 700, \u0027Mexico\u0027);", + "sql": "SELECT WellID, ProductionDate, ProductionRate, ROW_NUMBER() OVER (PARTITION BY WellID ORDER BY ProductionDate) AS RowNumber FROM Production;", + "sql_explanation": "This query calculates the production rate trend for each well over time. It uses the ROW_NUMBER function to assign a unique row number to each production rate for each well, ordered by production date. This allows us to see how the production rate for each well changes over time." +}, { + "id": "1959", + "domain": "oil and gas", + "domain_description": "Exploration data, production figures, infrastructure development, and market trends.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the daily production trend for a specific well?", + "sql_context": "CREATE TABLE production (prod_id INT, well_id INT, prod_date DATE, production_rate FLOAT); INSERT INTO production (prod_id, well_id, prod_date, production_rate) VALUES (1, 1, \u00272020-01-01\u0027, 1000), (2, 1, \u00272020-01-02\u0027, 1100), (3, 1, \u00272020-01-03\u0027, 1200), (4, 1, \u00272020-01-04\u0027, 1300), (5, 1, \u00272020-01-05\u0027, 1400);", + "sql": "SELECT prod_date, production_rate, LAG(production_rate, 1) OVER (ORDER BY prod_date) AS previous_day_rate FROM production WHERE well_id \u003d 1;", + "sql_explanation": "Display the daily production rate for a specific well and its previous day\u0027s rate." +}, { + "id": "803", + "domain": "real estate", + "domain_description": "Real estate data on inclusive housing policies, sustainable urbanism, property co-ownership, and housing affordability.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 2 most affordable properties based on their sustainability scores in the \u0027RenewableHeights\u0027 neighborhood, ordered by size.", + "sql_context": "CREATE TABLE Properties (PropertyID INT, Price INT, SustainabilityScore INT, Neighborhood VARCHAR(20), Size INT); INSERT INTO Properties (PropertyID, Price, SustainabilityScore, Neighborhood, Size) VALUES (1, 300000, 80, \u0027RenewableHeights\u0027, 1200), (2, 450000, 95, \u0027RenewableHeights\u0027, 1500), (3, 250000, 60, \u0027RenewableHeights\u0027, 1800);", + "sql": "SELECT * FROM (SELECT *, ROW_NUMBER() OVER (PARTITION BY Neighborhood ORDER BY Price, Size) AS rn FROM Properties WHERE Neighborhood \u003d \u0027RenewableHeights\u0027 ORDER BY Price, Size) sub WHERE rn \u003c\u003d 2;", + "sql_explanation": "This query first calculates row numbers (rn) for the RenewableHeights neighborhood based on price and size, then filters the top 2 rows and orders the final result by price and size." +}, { + "id": "831", + "domain": "arts culture", + "domain_description": "Data on art collections, cultural event attendance, artist demographics, museum operations, and heritage preservation efforts.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Rank artists in the \u0027ArtistsDemographics\u0027 table by age, in descending order, and return the top 2.", + "sql_context": "CREATE TABLE ArtistsDemographics (ArtistID INT, Age INT, Gender VARCHAR(10), Nationality VARCHAR(50)); INSERT INTO ArtistsDemographics (ArtistID, Age, Gender, Nationality) VALUES (1, 45, \u0027Male\u0027, \u0027American\u0027), (2, 34, \u0027Female\u0027, \u0027Canadian\u0027), (3, 50, \u0027Male\u0027, \u0027British\u0027), (4, 35, \u0027Female\u0027, \u0027Mexican\u0027), (5, 40, \u0027Non-binary\u0027, \u0027Australian\u0027);", + "sql": "SELECT ArtistID, Age, Gender, Nationality FROM (SELECT ArtistID, Age, Gender, Nationality, ROW_NUMBER() OVER (ORDER BY Age DESC) AS Rank FROM ArtistsDemographics) AS Subquery WHERE Rank \u003c\u003d 2;", + "sql_explanation": "The SQL query uses the ROW_NUMBER() function to rank artists by age in descending order. The outer query then filters the results to only include the top 2 ranked artists." +}, { + "id": "164", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the difference in average permit cost between high-rise and low-rise buildings in British Columbia in 2021?", + "sql_context": "CREATE TABLE permit_cost_comparison (cost_id INT, province VARCHAR(50), building_type VARCHAR(50), permit_cost FLOAT, structure_height INT, issue_date DATE); INSERT INTO permit_cost_comparison (cost_id, province, building_type, permit_cost, structure_height, issue_date) VALUES (7, \u0027British Columbia\u0027, \u0027High-rise\u0027, 1000000.00, 50, \u00272021-01-01\u0027); INSERT INTO permit_cost_comparison (cost_id, province, building_type, permit_cost, structure_height, issue_date) VALUES (8, \u0027British Columbia\u0027, \u0027Low-rise\u0027, 500000.00, 10, \u00272021-01-10\u0027);", + "sql": "SELECT AVG(permit_cost) - LAG(AVG(permit_cost)) OVER (PARTITION BY province ORDER BY EXTRACT(YEAR FROM issue_date)) FROM permit_cost_comparison WHERE province \u003d \u0027British Columbia\u0027 AND building_type IN (\u0027High-rise\u0027, \u0027Low-rise\u0027) AND issue_date BETWEEN \u00272021-01-01\u0027 AND \u00272021-12-31\u0027;", + "sql_explanation": "Calculates the difference in average permit cost between high-rise and low-rise buildings in British Columbia in 2021" +}, { + "id": "472", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the second highest permit cost for residential sustainable buildings in Washington state?", + "sql_context": "CREATE TABLE permit_costs (cost_id INT, state VARCHAR(50), building_type VARCHAR(50), permit_cost FLOAT, material_type VARCHAR(50)); INSERT INTO permit_costs (cost_id, state, building_type, permit_cost, material_type) VALUES (1, \u0027Washington\u0027, \u0027Residential\u0027, 200000.00, \u0027Sustainable\u0027); INSERT INTO permit_costs (cost_id, state, building_type, permit_cost, material_type) VALUES (2, \u0027Washington\u0027, \u0027Residential\u0027, 250000.00, \u0027Sustainable\u0027);", + "sql": "SELECT permit_cost FROM (SELECT permit_cost, ROW_NUMBER() OVER (ORDER BY permit_cost DESC) rn FROM permit_costs WHERE state \u003d \u0027Washington\u0027 AND building_type \u003d \u0027Residential\u0027 AND material_type \u003d \u0027Sustainable\u0027) t WHERE rn \u003d 2;", + "sql_explanation": "Identifies the second highest permit cost for residential sustainable buildings in Washington state" +}, { + "id": "1501", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average building permit issuance delay, in days, for projects in Texas, compared to the overall average delay?", + "sql_context": "CREATE TABLE BuildingPermits (State VARCHAR(2), Project VARCHAR(50), IssueDate DATE, PermitDate DATE);", + "sql": "SELECT AVG(DATEDIFF(IssueDate, PermitDate)) as TexasAvg, AVG(DATEDIFF(IssueDate, PermitDate)) over () as OverallAvg FROM BuildingPermits WHERE State \u003d \u0027TX\u0027;", + "sql_explanation": "This query calculates the average delay for projects in Texas and the overall average delay. It does this by finding the difference between the issue date and permit date for each project, and then taking the average of these differences." +}, { + "id": "1747", + "domain": "construction", + "domain_description": "Building permit data, construction labor statistics, project timeline information, and sustainable building practices.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top three regions with the highest average permit cost.", + "sql_context": "CREATE TABLE permit (permit_id INT, region VARCHAR(20), cost FLOAT); INSERT INTO permit VALUES (1, \u0027Northeast\u0027, 8000); INSERT INTO permit VALUES (2, \u0027Midwest\u0027, 5000); INSERT INTO permit VALUES (3, \u0027Southwest\u0027, 6000);", + "sql": "SELECT region, AVG(cost) as avg_cost, RANK() OVER (ORDER BY AVG(cost) DESC) as avg_cost_rank FROM permit GROUP BY region HAVING avg_cost_rank \u003c\u003d 3;", + "sql_explanation": "Lists the top three regions with the highest average permit cost." +}, { + "id": "1222", + "domain": "food service", + "domain_description": "Restaurant revenue data, menu engineering information, food safety inspection records, and sustainable sourcing practices.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Rank suppliers by their sustainable sourcing rating, for suppliers from South Africa.", + "sql_context": "CREATE TABLE Suppliers (SupplierID INT, SupplierName VARCHAR(255), Country VARCHAR(255), SustainabilityRating DECIMAL(3,2));", + "sql": "SELECT SupplierName, Country, SustainabilityRating, RANK() OVER (ORDER BY SustainabilityRating DESC) as SustainabilityRank FROM Suppliers WHERE Country \u003d \u0027South Africa\u0027;", + "sql_explanation": "This query ranks suppliers from South Africa by their sustainable sourcing rating, using the RANK() window function. The result is a list of suppliers with their corresponding sustainability rank." +}, { + "id": "101", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List all clinics in Arizona that have reduced their rural patient base by over 15% since 2018.", + "sql_context": "CREATE TABLE clinics (clinic_id INT, name TEXT, location TEXT, rural BOOLEAN);CREATE TABLE patients (patient_id INT, clinic_id INT, year INT, rural BOOLEAN);", + "sql": "SELECT c.name FROM clinics c JOIN (SELECT clinic_id, 100.0 * COUNT(*) FILTER (WHERE rural) / SUM(COUNT(*)) OVER (PARTITION BY clinic_id) AS reduction_ratio FROM patients WHERE year IN (2018, 2022) GROUP BY clinic_id) t ON c.clinic_id \u003d t.clinic_id WHERE t.reduction_ratio \u003e 15.0 AND c.state \u003d \u0027Arizona\u0027;", + "sql_explanation": "1. Calculates the percentage change in rural patients for each clinic. 2. Filters for clinics in Arizona with over 15% reduction since 2018." +}, { + "id": "782", + "domain": "rural health", + "domain_description": "Detailed records on healthcare access, disease prevalence, and resource allocation in rural health.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the name of the hospital with the lowest patient capacity in each state, ordered by state?", + "sql_context": "CREATE TABLE hospitals (id INT, state VARCHAR(255), name VARCHAR(255), patient_capacity INT); INSERT INTO hospitals (id, state, name, patient_capacity) VALUES (1, \u0027NY\u0027, \u0027Hospital A\u0027, 100), (2, \u0027CA\u0027, \u0027Hospital B\u0027, 150), (3, \u0027TX\u0027, \u0027Hospital C\u0027, 120);", + "sql": "SELECT state, name FROM (SELECT state, name, patient_capacity, ROW_NUMBER() OVER (PARTITION BY state ORDER BY patient_capacity ASC) as rank FROM hospitals) subquery WHERE rank \u003d 1 ORDER BY state;", + "sql_explanation": "The SQL query uses a subquery to assign a row number (rank) to each hospital within a state, ordered by patient_capacity in ascending order. The main query then selects the state and name from the subquery where the rank is 1, and orders the results by state." +}, { + "id": "3260", + "domain": "social media", + "domain_description": "Extensive data on user behavior, content trends, advertising performance, and data privacy in social media.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Rank comments by creation time for each user.", + "sql_context": "CREATE TABLE comments (id INT, post_id INT, user_id INT, content TEXT, created_at TIMESTAMP); INSERT INTO comments (id, post_id, user_id, content, created_at) VALUES (5, 3, 3, \u0027ÂĄInteresante!\u0027, \u00272021-01-03 12:01:00\u0027), (6, 4, 4, \u0027ImpressÃŖo incrível!\u0027, \u00272021-01-04 13:01:00\u0027);", + "sql": "SELECT c.*, ROW_NUMBER() OVER (PARTITION BY c.user_id ORDER BY c.created_at) as rank FROM comments c;", + "sql_explanation": "Calculate the rank of each comment within their user based on creation time." +}, { + "id": "574", + "domain": "trade unions", + "domain_description": "Union membership statistics, collective bargaining data, labor rights advocacy, and workplace safety metrics.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average severity of workplace safety incidents for each union?", + "sql_context": "CREATE TABLE workplace_safety (safety_id INT, union_name VARCHAR(50), incident_date DATE, incident_type VARCHAR(50), severity VARCHAR(50));CREATE VIEW union_safety_incidents AS SELECT union_name, severity FROM workplace_safety;", + "sql": "SELECT union_name, AVG(RANK() OVER (PARTITION BY union_name ORDER BY CASE severity WHEN \u0027High\u0027 THEN 1 WHEN \u0027Medium\u0027 THEN 2 WHEN \u0027Low\u0027 THEN 3 END)) as avg_severity FROM union_safety_incidents GROUP BY union_name;", + "sql_explanation": "This SQL query calculates the average severity of workplace safety incidents for each union by joining the \u0027workplace_safety\u0027 table with the \u0027union_safety_incidents\u0027 view on the union_name column. It then uses the AVG function with the RANK() window function to rank the severity of incidents for each union and calculate the average severity ranking for each union. The CASE statement is used to assign a ranking to each severity level." +}, { + "id": "3079", + "domain": "biotechnology", + "domain_description": "Genetic research data, bioprocess engineering information, biotech startup funding, and biosensor technology development.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Rank research labs by total funding in India.", + "sql_context": "CREATE TABLE research_labs (id INT, name TEXT, country TEXT, funding FLOAT); INSERT INTO research_labs (id, name, country, funding) VALUES (1, \u0027LabA\u0027, \u0027India\u0027, 1500000.0), (2, \u0027LabB\u0027, \u0027India\u0027, 1200000.0), (3, \u0027LabC\u0027, \u0027UK\u0027, 900000.0);", + "sql": "SELECT name, ROW_NUMBER() OVER (ORDER BY funding DESC) as rank FROM research_labs WHERE country \u003d \u0027India\u0027;", + "sql_explanation": "Rank research labs in India by total funding by using the ROW_NUMBER function with ORDER BY clause for descending order." +}, { + "id": "96", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Show the most recent incident date for each category in the SecurityIncidents table, filtered by \u00272022\u0027.", + "sql_context": "CREATE TABLE SecurityIncidents (id INT, incident_category VARCHAR(255), incident_date DATE); INSERT INTO SecurityIncidents (id, incident_category, incident_date) VALUES (1, \u0027Malware\u0027, \u00272022-01-15\u0027), (2, \u0027Phishing\u0027, \u00272022-02-10\u0027), (3, \u0027Network Intrusion\u0027, \u00272022-03-01\u0027), (4, \u0027Unauthorized Access\u0027, \u00272022-03-15\u0027), (5, \u0027Data Exfiltration\u0027, \u00272021-12-20\u0027), (6, \u0027Insider Threat\u0027, \u00272022-01-05\u0027), (7, \u0027Denial of Service\u0027, \u00272022-02-25\u0027), (8, \u0027Social Engineering\u0027, \u00272021-11-18\u0027);", + "sql": "SELECT incident_category, incident_date FROM (SELECT incident_category, incident_date, ROW_NUMBER() OVER (PARTITION BY incident_category ORDER BY incident_date DESC) AS rank FROM SecurityIncidents WHERE incident_date \u003e\u003d \u00272022-01-01\u0027 AND incident_date \u003c\u003d \u00272022-12-31\u0027) AS filtered_incidents WHERE rank \u003d 1;", + "sql_explanation": "This query filters SecurityIncidents table for the year 2022 and uses a window function (ROW_NUMBER) to rank the incident dates within each category in descending order, and then filters the most recent dates by selecting the rank 1 records." +}, { + "id": "495", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 5 most vulnerable systems based on their Common Vulnerability Scoring System (CVSS) score in the last month?", + "sql_context": "CREATE TABLE systems (system_id INT, system_name VARCHAR(255), cvss_score FLOAT, last_updated TIMESTAMP); INSERT INTO systems (system_id, system_name, cvss_score, last_updated) VALUES (1, \u0027Web Server 1\u0027, 7.5, \u00272022-01-01 10:00:00\u0027), (2, \u0027Database Server 1\u0027, 8.2, \u00272022-01-05 15:30:00\u0027), (3, \u0027Firewall 1\u0027, 4.3, \u00272022-01-10 08:15:00\u0027);", + "sql": "SELECT system_name, cvss_score FROM (SELECT system_name, cvss_score, ROW_NUMBER() OVER (ORDER BY cvss_score DESC) as rank FROM systems WHERE last_updated \u003e\u003d DATEADD(month, -1, CURRENT_TIMESTAMP)) subquery WHERE rank \u003c\u003d 5;", + "sql_explanation": "This query ranks systems based on their CVSS scores in descending order and assigns a row number to each system. It then filters the top 5 most vulnerable systems based on their row number." +}, { + "id": "1454", + "domain": "cybersecurity", + "domain_description": "Threat intelligence data, vulnerability assessments, security incident response metrics, and cybersecurity policy analysis.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the three most recent security incidents for each country, and their total impact value.", + "sql_context": "CREATE TABLE security_incidents (id INT, country VARCHAR(50), incident_time TIMESTAMP, impact_value INT); INSERT INTO security_incidents (id, country, incident_time, impact_value) VALUES (1, \u0027USA\u0027, \u00272022-01-01 10:00:00\u0027, 5000), (2, \u0027Canada\u0027, \u00272022-01-02 15:30:00\u0027, 7000), (3, \u0027USA\u0027, \u00272022-01-03 08:45:00\u0027, 6000);", + "sql": "SELECT country, incident_time, impact_value, ROW_NUMBER() OVER (PARTITION BY country ORDER BY incident_time DESC) as rn FROM security_incidents WHERE rn \u003c\u003d 3;", + "sql_explanation": "This query partitions the data by country and orders it by incident_time in descending order. The ROW_NUMBER() function assigns a row number within each partition, starting from 1 for the latest incident_time. We then filter for rows with rn \u003c\u003d 3 to get the top 3 incidents for each country." +}, { + "id": "223", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many employees have been hired by each supplier, partitioned by year and ordered by the number of employees hired?", + "sql_context": "CREATE TABLE employment (employment_id INT, supplier_id INT, employee_name VARCHAR(255), employment_date DATE); INSERT INTO employment (employment_id, supplier_id, employee_name, employment_date) VALUES (1, 1, \u0027John Doe\u0027, \u00272021-01-01\u0027), (2, 1, \u0027Jane Doe\u0027, \u00272022-02-01\u0027);", + "sql": "SELECT supplier_id, DATE_TRUNC(\u0027year\u0027, employment_date) AS year, COUNT(DISTINCT employee_name) AS number_of_employees, RANK() OVER (ORDER BY COUNT(DISTINCT employee_name) DESC) AS ranking FROM employment GROUP BY supplier_id, year ORDER BY number_of_employees DESC;", + "sql_explanation": "This query partitions the data by supplier ID and year, calculates the number of distinct employees hired for each partition, and orders the partitions in descending order by the number of employees hired. It then assigns a ranking to each partition using the RANK function." +}, { + "id": "728", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the percentage of total waste diverted from disposal for each program in the \u0027waste_management\u0027 table.", + "sql_context": "CREATE TABLE waste_management (program VARCHAR(50), waste_type VARCHAR(50), quantity INT); INSERT INTO waste_management (program, waste_type, quantity) VALUES (\u0027recycling\u0027, \u0027plastic\u0027, 200), (\u0027recycling\u0027, \u0027paper\u0027, 300), (\u0027disposal\u0027, \u0027metal\u0027, 150);", + "sql": "SELECT program, 100.0 * SUM(CASE WHEN waste_type !\u003d \u0027disposal\u0027 THEN quantity ELSE 0 END) OVER (PARTITION BY program) / SUM(quantity) OVER (PARTITION BY program) AS pct_diverted FROM waste_management;", + "sql_explanation": "The SQL query calculates the percentage of total waste diverted from disposal for each program in the \u0027waste_management\u0027 table by partitioning the data by the program name and summing the quantity for non-disposal waste types. It then calculates the percentage of diverted waste for each program by dividing the sum of non-disposal waste by the total waste quantity for each program." +}, { + "id": "931", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify workers who have produced more than twice the average number of units, ordered by the highest number of units produced.", + "sql_context": "CREATE TABLE worker (id INT, name VARCHAR(50), units_produced INT); INSERT INTO worker (id, name, units_produced) VALUES (1, \u0027John Doe\u0027, 1000), (2, \u0027Jane Smith\u0027, 1200), (3, \u0027Mike Johnson\u0027, 1500), (4, \u0027Alice Williams\u0027, 1800), (5, \u0027Bob Brown\u0027, 800), (6, \u0027Charlie Green\u0027, 700);", + "sql": "SELECT name, units_produced FROM (SELECT name, units_produced, AVG(units_produced) OVER () as avg_units FROM worker) t WHERE units_produced \u003e 2 * avg_units ORDER BY units_produced DESC;", + "sql_explanation": "The SQL query identifies workers who have produced more than twice the average number of units by calculating the average number of units produced using the window function AVG(). It then filters the records based on the condition that the number of units produced is greater than twice the average and orders the results by the number of units produced in descending order." +}, { + "id": "1498", + "domain": "manufacturing", + "domain_description": "Detailed records on ethical manufacturing, circular economy, workforce development, and industry 4.0.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "data definition", + "sql_task_type_description": "creating, altering, or dropping tables and other database objects", + "sql_prompt": "Create a view named \u0027top_ethical_ratings\u0027 showing the top 3 companies by ethical rating", + "sql_context": "CREATE TABLE ethical_manufacturing (id INT PRIMARY KEY, company VARCHAR(50), location VARCHAR(50), ethical_rating FLOAT); INSERT INTO ethical_manufacturing (id, company, location, ethical_rating) VALUES (1, \u0027GreenTech\u0027, \u0027USA\u0027, 4.2), (2, \u0027EcoInnovations\u0027, \u0027Canada\u0027, 4.6), (3, \u0027SustainableSolutions\u0027, \u0027Mexico\u0027, 4.8);", + "sql": "CREATE VIEW top_ethical_ratings AS SELECT * FROM (SELECT *, ROW_NUMBER() OVER (ORDER BY ethical_rating DESC) rn FROM ethical_manufacturing) x WHERE rn \u003c\u003d 3;", + "sql_explanation": "1. The \u0027CREATE VIEW\u0027 command initializes a new view called \u0027top_ethical_ratings\u0027. 2. The subquery with \u0027ROW_NUMBER()\u0027 ranks companies by ethical rating in descending order. 3. The final \u0027SELECT\u0027 statement retrieves the top 3 records from the subquery." +}, { + "id": "566", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the change in infectious disease cases between consecutive months?", + "sql_context": "CREATE TABLE infectious_disease_monthly (month INT, district VARCHAR(20), cases INT); INSERT INTO infectious_disease_monthly (month, district, cases) VALUES (1, \u0027East Delhi\u0027, 100), (2, \u0027East Delhi\u0027, 120), (1, \u0027South Delhi\u0027, 150), (2, \u0027South Delhi\u0027, 180);", + "sql": "SELECT month, district, cases, LAG(cases, 1) OVER (PARTITION BY district ORDER BY month) AS prev_cases, cases - LAG(cases, 1) OVER (PARTITION BY district ORDER BY month) AS change FROM infectious_disease_monthly;", + "sql_explanation": "Calculates the change in infectious disease cases between consecutive months by using the LAG function with ORDER BY clause and PARTITION BY clause. The change is calculated by subtracting the previous month\u0027s cases from the current month\u0027s cases." +}, { + "id": "1529", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the hospital capacity utilization rate by hospital name, ordered within each state?", + "sql_context": "CREATE TABLE CapacityUtilization (StateName VARCHAR(50), HospitalName VARCHAR(50), Capacity INT, Utilization INT); INSERT INTO CapacityUtilization (StateName, HospitalName, Capacity, Utilization) VALUES (\u0027Alabama\u0027, \u0027HospitalA\u0027, 250, 75), (\u0027Alabama\u0027, \u0027HospitalB\u0027, 300, 80), (\u0027Alaska\u0027, \u0027HospitalX\u0027, 50, 60), (\u0027Arizona\u0027, \u0027HospitalY\u0027, 400, 90), (\u0027Arizona\u0027, \u0027HospitalZ\u0027, 350, 85);", + "sql": "SELECT StateName, HospitalName, Utilization, PERCENT_RANK() OVER (PARTITION BY StateName ORDER BY Utilization DESC) AS PercentRank FROM CapacityUtilization", + "sql_explanation": "Calculate the hospital capacity utilization rate by hospital name and rank the hospitals within each state based on the Utilization column. Partition the result set by the StateName column and sort by Utilization in descending order." +}, { + "id": "3877", + "domain": "public health", + "domain_description": "Community health statistics, infectious disease tracking data, healthcare access metrics, and public health policy analysis.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Number of infectious disease cases in each region, ordered by the highest number of cases.", + "sql_context": "CREATE TABLE infectious_disease (region VARCHAR(10), cases INT); INSERT INTO infectious_disease (region, cases) VALUES (\u0027North\u0027, 100), (\u0027South\u0027, 150), (\u0027East\u0027, 200), (\u0027West\u0027, 50);", + "sql": "SELECT region, cases, RANK() OVER (ORDER BY cases DESC) AS rank FROM infectious_disease;", + "sql_explanation": "Ranks the number of infectious disease cases in each region in descending order by using the RANK function with ORDER BY clause." +}, { + "id": "3042", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the 5-year trend for crime in Houston?", + "sql_context": "CREATE TABLE crime_trend (id INT, crime VARCHAR(20), city VARCHAR(20), year INT, count INT); INSERT INTO crime_trend (id, crime, city, year, count) VALUES (1, \u0027Murder\u0027, \u0027Houston\u0027, 2016, 120), (2, \u0027Robbery\u0027, \u0027Houston\u0027, 2016, 350), (3, \u0027Assault\u0027, \u0027Houston\u0027, 2016, 600), (4, \u0027Murder\u0027, \u0027Houston\u0027, 2017, 130), (5, \u0027Robbery\u0027, \u0027Houston\u0027, 2017, 360), (6, \u0027Assault\u0027, \u0027Houston\u0027, 2017, 610), (7, \u0027Murder\u0027, \u0027Houston\u0027, 2018, 140), (8, \u0027Robbery\u0027, \u0027Houston\u0027, 2018, 370), (9, \u0027Assault\u0027, \u0027Houston\u0027, 2018, 620), (10, \u0027Murder\u0027, \u0027Houston\u0027, 2019, 150), (11, \u0027Robbery\u0027, \u0027Houston\u0027, 2019, 380), (12, \u0027Assault\u0027, \u0027Houston\u0027, 2019, 630);", + "sql": "SELECT year, (count - LAG(count, 1) OVER (ORDER BY year)) as trend FROM crime_trend WHERE city \u003d \u0027Houston\u0027;", + "sql_explanation": "This query calculates the 5-year trend for crime in Houston by subtracting the count of crimes in the previous year from the current year for each crime type. The LAG function is used to access the previous row\u0027s value for the count column. The query only shows the year and the trend for each year." +}, { + "id": "3199", + "domain": "public safety", + "domain_description": "Crime statistics, emergency response times, community policing metrics, and disaster preparedness data.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 3 neighborhoods with the highest crime rate?", + "sql_context": "CREATE TABLE neighborhoods (name VARCHAR(255), crime_count INT); INSERT INTO neighborhoods (name, crime_count) VALUES (\u0027Central Park\u0027, 25), (\u0027Harlem\u0027, 75), (\u0027Brooklyn\u0027, 120);", + "sql": "SELECT name, crime_count, RANK() OVER (ORDER BY crime_count DESC) FROM neighborhoods WHERE RANK() \u003c\u003d 3;", + "sql_explanation": "Rank the neighborhoods based on crime_count and select the top 3 neighborhoods with the highest crime rate." +}, { + "id": "174", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the percentage change in average CO2 emission per capita for each continent between 2019 and 2020.", + "sql_context": "CREATE TABLE EmissionsData (Continent VARCHAR(50), Year INT, CO2Emission DECIMAL(5,2), Population INT); INSERT INTO EmissionsData (Continent, Year, CO2Emission, Population) VALUES (\u0027Asia\u0027, 2020, 5.3, 4600000000), (\u0027Asia\u0027, 2019, 4.6, 4580000000), (\u0027Africa\u0027, 2020, 2.1, 1300000000), (\u0027Africa\u0027, 2019, 1.8, 1280000000);", + "sql": "SELECT Continent, (AVG(CO2Emission/Population) - LAG(AVG(CO2Emission/Population)) OVER (PARTITION BY Continent ORDER BY Year)) * 100.0 / LAG(AVG(CO2Emission/Population)) OVER (PARTITION BY Continent ORDER BY Year) as PercentageChange FROM EmissionsData GROUP BY Continent, Year;", + "sql_explanation": "Calculate the average CO2 emission per capita for each continent in both 2019 and 2020, and then find the percentage change in temperature between the two years for each continent." +}, { + "id": "398", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 3 countries in Africa with the highest CO2 emissions in the last 10 years?", + "sql_context": "CREATE TABLE africa_emissions (country VARCHAR(50), year INT, co2_emissions INT); INSERT INTO africa_emissions (country, year, co2_emissions) VALUES (\u0027South Africa\u0027, 2011, 450000), (\u0027South Africa\u0027, 2012, 460000), (\u0027Egypt\u0027, 2011, 250000), (\u0027Egypt\u0027, 2012, 260000);", + "sql": "SELECT country, SUM(co2_emissions) as total_emissions, RANK() OVER (ORDER BY SUM(co2_emissions) DESC) as rank FROM africa_emissions WHERE country IN (\u0027South Africa\u0027, \u0027Egypt\u0027, \u0027Nigeria\u0027) AND year BETWEEN 2012 AND 2021 GROUP BY country;", + "sql_explanation": "Calculate the total CO2 emissions in the last 10 years for the selected countries in Africa and rank them by the highest emissions." +}, { + "id": "608", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average annual rainfall change in Kenya and Nigeria since 2000, ranked by the greatest change?", + "sql_context": "CREATE TABLE rainfall_data (country VARCHAR(20), year INT, avg_rainfall FLOAT); INSERT INTO rainfall_data (country, year, avg_rainfall) VALUES (\u0027Kenya\u0027, 2000, 800), (\u0027Kenya\u0027, 2001, 810), (\u0027Nigeria\u0027, 2000, 1200), (\u0027Nigeria\u0027, 2001, 1215);", + "sql": "SELECT country, AVG(avg_rainfall) as avg_rainfall_change, ROW_NUMBER() OVER (ORDER BY AVG(avg_rainfall) DESC) as rank FROM rainfall_data WHERE country IN (\u0027Kenya\u0027, \u0027Nigeria\u0027) AND year \u003e\u003d 2000 GROUP BY country;", + "sql_explanation": "Calculate the average rainfall change since 2000 for Kenya and Nigeria, then rank them by the greatest change." +}, { + "id": "1067", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total CO2 emission for each continent and year, and rank them.", + "sql_context": "CREATE TABLE EmissionsData (Continent VARCHAR(50), Year INT, CO2Emission DECIMAL(5,2), Population INT); INSERT INTO EmissionsData (Continent, Year, CO2Emission, Population) VALUES (\u0027Asia\u0027, 2020, 5.3, 4600000000), (\u0027Asia\u0027, 2019, 4.6, 4580000000), (\u0027Africa\u0027, 2020, 2.1, 1300000000), (\u0027Africa\u0027, 2019, 1.8, 1280000000);", + "sql": "SELECT Continent, Year, SUM(CO2Emission) as TotalCO2Emission, RANK() OVER (PARTITION BY Year ORDER BY SUM(CO2Emission) DESC) as Rank FROM EmissionsData GROUP BY Continent, Year;", + "sql_explanation": "Calculate the total CO2 emission for each continent and year, and then rank them in descending order." +}, { + "id": "1221", + "domain": "climate change", + "domain_description": "Climate change data on climate mitigation, climate adaptation, climate finance, and climate communication.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the change in emissions for India from 2018 to 2020?", + "sql_context": "CREATE TABLE emissions_india (year INT, total_emissions INT); INSERT INTO emissions_india (year, total_emissions) VALUES (2018, 3000), (2019, 3200), (2020, 3000);", + "sql": "SELECT (total_emissions::DECIMAL(10,2)-LAG(total_emissions) OVER ())/LAG(total_emissions) OVER ()*100 AS emission_change FROM emissions_india WHERE year IN (2019, 2020);", + "sql_explanation": "This query calculates the change in emissions for India from 2018 to 2020 by subtracting the previous year\u0027s total_emissions from the current year\u0027s total_emissions, dividing the result by the previous year\u0027s total_emissions, then multiplying by 100 to express as a percentage. The query uses the window function LAG() OVER () to access the previous year\u0027s total_emissions." +}, { + "id": "432", + "domain": "fashion retail", + "domain_description": "Fashion trend data, textile sourcing info, customer size diversity, and sustainable fashion metrics.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the rank of each country based on the total quantity of sustainable fabric sourced in 2021 and 2022?", + "sql_context": "CREATE TABLE sourcing (year INT, country VARCHAR(20), fabric_type VARCHAR(20), quantity INT); INSERT INTO sourcing (year, country, fabric_type, quantity) VALUES (2022, \u0027India\u0027, \u0027sustainable\u0027, 3000), (2022, \u0027India\u0027, \u0027organic_cotton\u0027, 5000), (2021, \u0027Brazil\u0027, \u0027recycled_polyester\u0027, 4000), (2021, \u0027Brazil\u0027, \u0027sustainable\u0027, 6000), (2021, \u0027Nepal\u0027, \u0027organic_cotton\u0027, 2000), (2021, \u0027Nepal\u0027, \u0027sustainable\u0027, 4000), (2022, \u0027Brazil\u0027, \u0027sustainable\u0027, 7000), (2022, \u0027Nepal\u0027, \u0027sustainable\u0027, 5000), (2022, \u0027Nepal\u0027, \u0027organic_cotton\u0027, 6000), (2021, \u0027India\u0027, \u0027sustainable\u0027, 8000);", + "sql": "SELECT country, RANK() OVER (ORDER BY total_quantity DESC) as rank FROM (SELECT country, SUM(quantity) as total_quantity FROM sourcing WHERE year IN (2021, 2022) AND fabric_type LIKE \u0027sustainable%\u0027 GROUP BY country) as subquery;", + "sql_explanation": "This query calculates the rank of each country based on the total quantity of sustainable fabric sourced in 2021 and 2022 by selecting the country and rank from a subquery that calculates the total quantity of sustainable fabric sourced in 2021 and 2022 for each country. The results are then ordered by the total quantity in descending order and the rank is calculated using the RANK() window function." +}, { + "id": "611", + "domain": "legal services", + "domain_description": "Case outcomes, legal precedents, attorney performance metrics, client demographics, and billing information.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the total billing amount for attorneys in the \u0027Criminal Law\u0027 practice area, for the year 2020, partitioned by attorney\u0027s last name and ordered by the total billing amount in descending order.", + "sql_context": "CREATE TABLE Attorneys (AttorneyID INT, FirstName VARCHAR(50), LastName VARCHAR(50), PracticeArea VARCHAR(50), TotalBilling FLOAT, BillingYear INT); INSERT INTO Attorneys (AttorneyID, FirstName, LastName, PracticeArea, TotalBilling, BillingYear) VALUES (1, \u0027Clara\u0027, \u0027Rivera\u0027, \u0027Personal Injury\u0027, 8000.00, 2020), (2, \u0027Jamal\u0027, \u0027Lee\u0027, \u0027Personal Injury\u0027, 6000.00, 2019), (3, \u0027Sophia\u0027, \u0027Gomez\u0027, \u0027Criminal Law\u0027, 9000.00, 2020);", + "sql": "SELECT LastName, SUM(TotalBilling) OVER (PARTITION BY PracticeArea, LastName, BillingYear) AS TotalBilling FROM Attorneys WHERE PracticeArea \u003d \u0027Criminal Law\u0027 AND BillingYear \u003d 2020 ORDER BY TotalBilling DESC;", + "sql_explanation": "The SQL query selects the LastName column and the sum of the TotalBilling column (aliased as TotalBilling) for each unique PracticeArea, LastName and BillingYear (partitioned by PracticeArea, LastName, BillingYear) from the Attorneys table where PracticeArea is \u0027Criminal Law\u0027 and BillingYear is 2020, and orders the results by the TotalBilling column in descending order." +}, { + "id": "807", + "domain": "marine biology", + "domain_description": "Comprehensive data on marine species, oceanography, conservation efforts, and climate change impacts in marine biology.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which marine mammal species have a population size in the top 20%?", + "sql_context": "CREATE TABLE marine_species (name TEXT, category TEXT, population INT); INSERT INTO marine_species (name, category, population) VALUES (\u0027Blue Whale\u0027, \u0027Mammal\u0027, 10000), (\u0027Dolphin\u0027, \u0027Mammal\u0027, 25000), (\u0027Clownfish\u0027, \u0027Fish\u0027, 150000);", + "sql": "SELECT name, population FROM (SELECT name, population, PERCENT_RANK() OVER (ORDER BY population DESC) as rank FROM marine_species WHERE category \u003d \u0027Mammal\u0027) as ranked_species WHERE rank \u003c\u003d 0.2;", + "sql_explanation": "This query calculates the percent rank of each marine mammal species based on population size, ordered in descending order. It then selects the name and population size of the marine mammal species that are in the top 20% based on population size." +}, { + "id": "228", + "domain": "music industry", + "domain_description": "Music streaming data, concert ticket sales, artist demographics, and music festival trends.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 3 artists with the highest total concert revenue.", + "sql_context": "CREATE TABLE Concerts (ConcertID INT, Artist VARCHAR(50), City VARCHAR(50), Revenue DECIMAL(10,2)); INSERT INTO Concerts (ConcertID, Artist, City, Revenue) VALUES (1, \u0027Taylor Swift\u0027, \u0027Los Angeles\u0027, 500000.00), (2, \u0027BTS\u0027, \u0027New York\u0027, 750000.00), (3, \u0027Adele\u0027, \u0027London\u0027, 600000.00), (4, \u0027Taylor Swift\u0027, \u0027Paris\u0027, 400000.00), (5, \u0027BTS\u0027, \u0027Tokyo\u0027, 900000.00);", + "sql": "SELECT Artist, SUM(Revenue) as TotalRevenue FROM Concerts GROUP BY Artist ORDER BY TotalRevenue DESC; SELECT * FROM (SELECT Artist, SUM(Revenue) as TotalRevenue, ROW_NUMBER() OVER (ORDER BY SUM(Revenue) DESC) as Rank FROM Concerts GROUP BY Artist) WHERE Rank \u003c\u003d 3;", + "sql_explanation": "The SQL query first calculates the total revenue for each artist by using the SUM() function and grouping the data by artist with the GROUP BY clause. Then, it ranks the artists based on their total revenue using the ROW_NUMBER() window function and filters the top 3 artists with the highest total revenue." +}, { + "id": "855", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the rank of each cargo by volume category?", + "sql_context": "CREATE TABLE VolumeCategory (VolumeCategoryID INT, VolumeRange VARCHAR(50), LowerLimit INT, UpperLimit INT); INSERT INTO VolumeCategory (VolumeCategoryID, VolumeRange, LowerLimit, UpperLimit) VALUES (1, \u0027up to 10000\u0027, 0, 10000); INSERT INTO VolumeCategory (VolumeCategoryID, VolumeRange, LowerLimit, UpperLimit) VALUES (2, \u002710000-50000\u0027, 10000, 50000);", + "sql": "SELECT CargoName, Weight, RANK() OVER (PARTITION BY vc.VolumeCategoryID ORDER BY Weight DESC) as Rank FROM Cargo c JOIN VolumeCategory vc ON c.Weight BETWEEN vc.LowerLimit AND vc.UpperLimit;", + "sql_explanation": "The SQL query partitions the data by volume category and orders it by Weight in descending order. It then assigns a rank within each partition." +}, { + "id": "2380", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many TEUs were handled by each port in the cargo_handling table in reverse chronological order?", + "sql_context": "CREATE TABLE cargo_handling (port_id INT, port_name VARCHAR(50), teu_count INT, handling_date DATE); INSERT INTO cargo_handling (port_id, port_name, teu_count, handling_date) VALUES (1, \u0027Port_A\u0027, 2000, \u00272022-01-01\u0027), (2, \u0027Port_B\u0027, 3000, \u00272022-01-02\u0027), (3, \u0027Port_C\u0027, 1000, \u00272022-01-03\u0027);", + "sql": "SELECT port_name, teu_count, ROW_NUMBER() OVER (PARTITION BY port_name ORDER BY handling_date DESC) as rn FROM cargo_handling;", + "sql_explanation": "This SQL query uses the ROW_NUMBER window function to rank TEU handling counts for each port in reverse chronological order. It partitions the data by port_name and orders it by handling_date in descending order." +}, { + "id": "3071", + "domain": "ocean shipping", + "domain_description": "Detailed records on cargo handling, fleet management, port operations, and regulatory compliance in ocean shipping.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who is the top-performing port in terms of cargo handling?", + "sql_context": "CREATE TABLE ports (port_id INT, port_name VARCHAR(50), total_cargo INT); INSERT INTO ports VALUES (1, \u0027Port of Shanghai\u0027, 43032442); INSERT INTO ports VALUES (2, \u0027Port of Singapore\u0027, 37439402); INSERT INTO ports VALUES (3, \u0027Port of Shenzhen\u0027, 27162000);", + "sql": "SELECT port_name, ROW_NUMBER() OVER (ORDER BY total_cargo DESC) as rank FROM ports WHERE row_number() \u003d 1;", + "sql_explanation": "This query identifies the top-performing port in terms of cargo handling by using the ROW_NUMBER function to order the ports by total_cargo in descending order and then returning the port_name with a rank of 1." +}, { + "id": "277", + "domain": "transportation", + "domain_description": "Transportation data on electric vehicle adoption, autonomous vehicles, public transportation systems, and multimodal mobility.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 3 most expensive electric taxi rides by ride_distance.", + "sql_context": "CREATE TABLE taxi_trips (ride_id INT, ride_start_time TIMESTAMP, ride_end_time TIMESTAMP, ride_distance FLOAT, fare FLOAT, vehicle_type VARCHAR(10));", + "sql": "SELECT ride_id, ride_distance, fare FROM (SELECT ride_id, ride_distance, fare, ROW_NUMBER() OVER (PARTITION BY vehicle_type ORDER BY ride_distance DESC, fare DESC) AS rank FROM taxi_trips WHERE vehicle_type \u003d \u0027Electric Taxi\u0027) AS subquery WHERE rank \u003c\u003d 3;", + "sql_explanation": "This query uses the ROW_NUMBER() function to partition the records by vehicle_type and order them by ride_distance and fare in descending order. The outer query selects the top 3 records for electric taxis." +}, { + "id": "1411", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the population trend in Arctic indigenous communities since 2000?", + "sql_context": "CREATE TABLE community_population (community VARCHAR(50), year INT, population INT); INSERT INTO community_population (community, year, population) VALUES (\u0027Inuit\u0027, 2000, 50000), (\u0027Inuit\u0027, 2001, 50500);", + "sql": "SELECT c.community, c.year, c.population, LAG(c.population) OVER (PARTITION BY c.community ORDER BY c.year) as prev_year_population FROM community_population c;", + "sql_explanation": "The SQL query calculates the population trend by comparing the current year\u0027s population with the previous year\u0027s population for each community." +}, { + "id": "2137", + "domain": "arctic research", + "domain_description": "In-depth data on climate change, biodiversity, indigenous communities, and resource management in arctic research.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the number of days with extreme weather events in each region over the last 5 years?", + "sql_context": "CREATE TABLE extreme_weather_data (region VARCHAR(255), year INT, days_with_extreme_weather INT);", + "sql": "SELECT region, SUM(days_with_extreme_weather) OVER (PARTITION BY region) FROM extreme_weather_data WHERE year BETWEEN 2018 AND 2022;", + "sql_explanation": "Using window functions, the query partitions the extreme_weather_data table by region and calculates the total number of days with extreme weather events for each region over the last 5 years." +}, { + "id": "59", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which natural skincare brands in the USA have the highest revenue growth?", + "sql_context": "CREATE TABLE skincare_revenue (id INT, brand VARCHAR(50), revenue DECIMAL(10,2), year INT, country VARCHAR(50)); INSERT INTO skincare_revenue (id, brand, revenue, year, country) VALUES (1, \u0027Brand C\u0027, 1000.00, 2021, \u0027USA\u0027);", + "sql": "SELECT brand, (revenue - LAG(revenue, 1) OVER (PARTITION BY brand ORDER BY year)) / ABS(LAG(revenue, 1) OVER (PARTITION BY brand ORDER BY year)) * 100.0 AS growth_percentage FROM skincare_revenue WHERE country \u003d \u0027USA\u0027 AND revenue IS NOT NULL GROUP BY brand, revenue, year HAVING growth_percentage IS NOT NULL ORDER BY growth_percentage DESC;", + "sql_explanation": "This SQL query calculates the revenue growth percentage of natural skincare brands in the USA by subtracting the previous year\u0027s revenue from the current year\u0027s revenue, dividing by the previous year\u0027s revenue, and multiplying by 100.0 to convert to a percentage. It filters by country and excludes null revenue values, then groups by brand, revenue, and year, and orders by the growth percentage in descending order." +}, { + "id": "412", + "domain": "beauty industry", + "domain_description": "Cosmetics sales data, beauty product ingredient transparency, consumer preferences, and sustainability metrics.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 2 most expensive beauty products for each brand.", + "sql_context": "CREATE TABLE beauty_products (product_name TEXT, price DECIMAL(5,2), brand TEXT); INSERT INTO beauty_products (product_name, price, brand) VALUES (\u0027Cleanser\u0027, 15.99, \u0027Natural Glow\u0027), (\u0027Toner\u0027, 14.99, \u0027Natural Glow\u0027), (\u0027Moisturizer\u0027, 29.99, \u0027Natural Glow\u0027), (\u0027Cleanser\u0027, 25.99, \u0027Pure Beauty\u0027), (\u0027Toner\u0027, 24.99, \u0027Pure Beauty\u0027);", + "sql": "SELECT brand, product_name, price FROM (SELECT brand, product_name, price, ROW_NUMBER() OVER (PARTITION BY brand ORDER BY price DESC) as rank FROM beauty_products WHERE brand IN (\u0027Natural Glow\u0027, \u0027Pure Beauty\u0027)) sub WHERE rank \u003c\u003d 2;", + "sql_explanation": "This query lists the top 2 most expensive beauty products for each brand. It does this by using the ROW_NUMBER function to assign a rank to each product within its brand, based on price in descending order. The subquery then selects the brand, product_name, and price columns from the ranked records, where the rank is less than or equal to 2." +}, { + "id": "779", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 2 regions with the highest average match duration", + "sql_context": "CREATE TABLE gameactivity (player_id INT, game_id INT, game_region VARCHAR(50), joined_at TIMESTAMP, left_at TIMESTAMP); INSERT INTO gameactivity (player_id, game_id, game_region, joined_at, left_at) VALUES (1, 1001, \u0027APAC\u0027, \u00272022-01-01 10:00:00\u0027, \u00272022-01-01 12:00:00\u0027);", + "sql": "SELECT game_region, AVG(DATEDIFF(second, joined_at, left_at)) as avg_duration, RANK() OVER (ORDER BY AVG(DATEDIFF(second, joined_at, left_at)) DESC) as rank FROM gameactivity GROUP BY game_region", + "sql_explanation": "Calculate the average match duration for each region, and rank them using the RANK() function based on the average match duration." +}, { + "id": "1069", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 3 players with the highest scores in the \u0027Action\u0027 game category?", + "sql_context": "CREATE TABLE Scores (PlayerID int, PlayerName varchar(50), Game varchar(50), Score int); INSERT INTO Scores (PlayerID, PlayerName, Game, Score) VALUES (1, \u0027Player1\u0027, \u0027Game1\u0027, 1000), (2, \u0027Player2\u0027, \u0027Game1\u0027, 1200), (3, \u0027Player3\u0027, \u0027Game1\u0027, 1500), (4, \u0027Player4\u0027, \u0027Game1\u0027, 800);", + "sql": "SELECT * FROM (SELECT PlayerID, PlayerName, Game, Score, ROW_NUMBER() OVER (PARTITION BY Game ORDER BY Score DESC) as Rank FROM Scores) T WHERE T.Game \u003d \u0027Game1\u0027 AND T.Rank \u003c\u003d 3;", + "sql_explanation": "The SQL query first assigns a rank to each player in each game based on their score using the ROW_NUMBER window function. It then selects the top 3 players with the highest scores in the \u0027Action\u0027 game category by filtering the ranked table for the \u0027Game1\u0027 game and the top 3 ranks." +}, { + "id": "1769", + "domain": "gaming industry", + "domain_description": "Player analytics, game performance metrics, eSports data, and virtual reality trends.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find top 3 countries with the highest average gameplay duration", + "sql_context": "CREATE TABLE gamesessions (player_id INT, game_id INT, country VARCHAR(50), session_duration INT); INSERT INTO gamesessions (player_id, game_id, country, session_duration) VALUES (1, 1001, \u0027USA\u0027, 180), (2, 1001, \u0027Canada\u0027, 220);", + "sql": "SELECT country, AVG(session_duration) as avg_duration, RANK() OVER (ORDER BY AVG(session_duration) DESC) as rank FROM gamesessions GROUP BY country", + "sql_explanation": "Calculate the average session duration for each country, then rank them using the RANK() function based on the average session duration in descending order." +}, { + "id": "943", + "domain": "human resources", + "domain_description": "Employee demographics, talent acquisition data, diversity and inclusion metrics, and training program statistics.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average salary of employees who have been with the company for more than five years, partitioned by department?", + "sql_context": "CREATE TABLE EmployeeDemographics (EmployeeID INT, Department VARCHAR(20), YearsWithCompany INT); INSERT INTO EmployeeDemographics (EmployeeID, Department, YearsWithCompany) VALUES (1, \u0027IT\u0027, 6), (2, \u0027IT\u0027, 3), (3, \u0027HR\u0027, 8), (4, \u0027HR\u0027, 1);", + "sql": "SELECT Department, AVG(Salary) OVER (PARTITION BY Department) AS Avg_Salary FROM Employees WHERE EmployeeID IN (SELECT EmployeeID FROM EmployeeDemographics WHERE YearsWithCompany \u003e 5);", + "sql_explanation": "The SQL query calculates the average salary of employees who have been with the company for more than five years, partitioned by department by first filtering the data based on the number of years with the company and then calculating the average salary for those employees using the AVG function and the PARTITION BY clause." +}, { + "id": "36", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify mines with a significant increase in gold production compared to the previous day.", + "sql_context": "CREATE TABLE daily_mine_gold_production (mine_id INT, production_date DATE, gold_production FLOAT); INSERT INTO daily_mine_gold_production (mine_id, production_date, gold_production) VALUES (1, \u00272021-01-01\u0027, 100), (1, \u00272021-01-02\u0027, 110), (1, \u00272021-01-03\u0027, 140), (1, \u00272021-01-04\u0027, 150), (2, \u00272021-01-01\u0027, 140), (2, \u00272021-01-02\u0027, 150), (2, \u00272021-01-03\u0027, 160), (2, \u00272021-01-04\u0027, 180);", + "sql": "SELECT mine_id, production_date, gold_production, LAG(gold_production) OVER (PARTITION BY mine_id ORDER BY production_date) as prev_day_production, gold_production - LAG(gold_production) OVER (PARTITION BY mine_id ORDER BY production_date) as production_change FROM daily_mine_gold_production WHERE gold_production \u003e 1.2 * LAG(gold_production) OVER (PARTITION BY mine_id ORDER BY production_date);", + "sql_explanation": "The SQL query identifies the mines with a significant increase in gold production compared to the previous day by partitioning the data based on mine_id, ordering by production_date, and using the LAG function to access the previous day\u0027s gold_production value. The query then calculates the production_change by subtracting the previous day\u0027s production from the current day\u0027s production. The query filters the results to only show mines with a production_change greater than 1.2 times the previous day\u0027s production." +}, { + "id": "126", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the daily change in water usage for the South African mining operations.", + "sql_context": "CREATE TABLE Mining_Operation (Operation_ID INT, Mine_Name VARCHAR(50), Location VARCHAR(50), Operation_Type VARCHAR(50), Start_Date DATE, End_Date DATE); CREATE TABLE Environmental_Impact (Impact_ID INT, Operation_ID INT, Date DATE, Carbon_Emissions INT, Water_Usage INT, Waste_Generation INT);", + "sql": "SELECT Operation_ID, Date, Water_Usage, LAG(Water_Usage, 1) OVER (PARTITION BY Operation_ID ORDER BY Date) AS Previous_Day_Water, (Water_Usage - LAG(Water_Usage, 1) OVER (PARTITION BY Operation_ID ORDER BY Date)) AS Daily_Change_Water FROM Environmental_Impact WHERE Location \u003d \u0027South Africa\u0027;", + "sql_explanation": "This query calculates the daily change in water usage for the South African mining operations." +}, { + "id": "275", + "domain": "mining industry", + "domain_description": "Mining operation data, environmental impact stats, workforce diversity, and resource depletion metrics.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the daily production of gold for the last 30 days, partitioned by site?", + "sql_context": "CREATE TABLE site (site_id INT, site_name VARCHAR(50)); INSERT INTO site (site_id, site_name) VALUES (1, \u0027Site A\u0027), (2, \u0027Site B\u0027); CREATE TABLE production (production_id INT, site_id INT, product VARCHAR(10), production_date DATE, quantity INT); INSERT INTO production (production_id, site_id, product, production_date, quantity) VALUES (1, 1, \u0027gold\u0027, \u00272021-01-01\u0027, 50), (2, 1, \u0027gold\u0027, \u00272021-01-02\u0027, 60), (3, 1, \u0027gold\u0027, \u00272021-01-03\u0027, 70), (4, 2, \u0027gold\u0027, \u00272021-01-01\u0027, 80), (5, 2, \u0027gold\u0027, \u00272021-01-02\u0027, 90), (6, 2, \u0027gold\u0027, \u00272021-01-03\u0027, 100);", + "sql": "SELECT site_id, product, production_date, quantity, ROW_NUMBER() OVER (PARTITION BY site_id, production_date ORDER BY site_id, production_date) AS daily_production FROM production WHERE product \u003d \u0027gold\u0027 AND production_date \u003e\u003d DATEADD(day, -30, GETDATE());", + "sql_explanation": "This query calculates the daily production of gold for the last 30 days, partitioned by site, by selecting the site_id, product, production_date, and quantity columns from the production table. It filters for gold products and production dates in the last 30 days. Then, it uses the ROW_NUMBER() function to partition the results by site_id and production_date and order the results by site_id and production_date. This results in a unique daily production for each site." +}, { + "id": "489", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What was the market share of DrugX in Q2 2021?", + "sql_context": "CREATE TABLE sales_data (drug_name VARCHAR(100), sales_quantity INT, quarter INT, year INT); INSERT INTO sales_data (drug_name, sales_quantity, quarter, year) VALUES (\u0027DrugX\u0027, 1200, 2, 2021), (\u0027DrugY\u0027, 800, 2, 2021), (\u0027DrugX\u0027, 1500, 3, 2021), (\u0027DrugY\u0027, 900, 3, 2021);", + "sql": "SELECT (SUM(sales_quantity) FILTER (WHERE drug_name \u003d \u0027DrugX\u0027) OVER (PARTITION BY year)) * 100.0 / NULLIF(SUM(sales_quantity) OVER (PARTITION BY year), 0) AS market_share FROM sales_data WHERE year \u003d 2021 AND quarter \u003d 2;", + "sql_explanation": "The SQL query calculates the market share of DrugX in Q2 2021 by first summing the sales quantities for DrugX in that quarter and then dividing by the total sales quantity for all drugs in that quarter (expressed as a percentage)." +}, { + "id": "1593", + "domain": "pharmaceuticals", + "domain_description": "Clinical trial outcomes, drug approval data, sales figures, R\u0026D expenditures, and market access strategies.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total R\u0026D expenditure per clinical trial phase, ranked by total expenditure?", + "sql_context": "CREATE TABLE RDExpenses (TrialID int, Phase varchar(50), Expenditure decimal(18,2)); INSERT INTO RDExpenses (TrialID, Phase, Expenditure) VALUES (1, \u0027Phase1\u0027, 500000.00), (2, \u0027Phase2\u0027, 800000.00), (3, \u0027Phase3\u0027, 1200000.00), (4, \u0027Phase1\u0027, 700000.00), (5, \u0027Phase2\u0027, 900000.00), (6, \u0027Phase3\u0027, 1500000.00);", + "sql": "SELECT Phase, SUM(Expenditure) as TotalExpenditure, ROW_NUMBER() OVER (ORDER BY SUM(Expenditure) DESC) as ExpenditureRank FROM RDExpenses GROUP BY Phase;", + "sql_explanation": "This SQL query calculates the total R\u0026D expenditure per clinical trial phase using the RDExpenses table, and assigns a rank to each phase based on their total expenditure using the ROW_NUMBER() window function with the ordering by the total expenditure in descending order." +}, { + "id": "516", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the monthly water consumption trend for Cape Town, South Africa from January 2019 to December 2021.", + "sql_context": "CREATE TABLE cape_town_water_usage (id INT, month VARCHAR(9), year INT, monthly_consumption FLOAT); INSERT INTO cape_town_water_usage (id, month, year, monthly_consumption) VALUES (1, \u0027January\u0027, 2019, 120), (2, \u0027February\u0027, 2019, 125), (3, \u0027March\u0027, 2019, 130), (4, \u0027January\u0027, 2020, 110), (5, \u0027February\u0027, 2020, 115), (6, \u0027March\u0027, 2020, 120), (7, \u0027January\u0027, 2021, 105), (8, \u0027February\u0027, 2021, 110), (9, \u0027March\u0027, 2021, 115);", + "sql": "SELECT year, AVG(monthly_consumption) as average_monthly_consumption, LEAD(AVG(monthly_consumption)) OVER (ORDER BY year) as next_year_average_monthly_consumption FROM cape_town_water_usage GROUP BY year ORDER BY year;", + "sql_explanation": "Determine the monthly water consumption trend for Cape Town from January 2019 to December 2021 by using the LEAD() function to retrieve the next year\u0027s average monthly consumption." +}, { + "id": "578", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum number of consecutive days without rain in the city of Cape Town in the year 2020?", + "sql_context": "CREATE TABLE RainfallData (date DATE, city VARCHAR(20), rainfall FLOAT);", + "sql": "SELECT DATEDIFF(date, LAG(date) OVER (PARTITION BY city ORDER BY date)) + 1 AS consecutive_days_no_rain FROM RainfallData WHERE city \u003d \u0027Cape Town\u0027 AND rainfall \u003d 0 ORDER BY consecutive_days_no_rain DESC LIMIT 1;", + "sql_explanation": "This query selects the difference in days between the current date and the previous date, adds 1 to the result, and partitions the result set by city in descending order based on the number of consecutive days with no rain in the RainfallData table where the city is \u0027Cape Town\u0027 and the rainfall is 0. The query returns the top 1 record." +}, { + "id": "1162", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the change in monthly water consumption between 2020 and 2021 for Sydney, Australia.", + "sql_context": "CREATE TABLE australia_water_usage (id INT, city VARCHAR(50), year INT, monthly_consumption FLOAT); INSERT INTO australia_water_usage (id, city, year, monthly_consumption) VALUES (1, \u0027Sydney\u0027, 2020, 140), (2, \u0027Sydney\u0027, 2021, 145);", + "sql": "SELECT city, (LAG(monthly_consumption) OVER (PARTITION BY city ORDER BY year)) - monthly_consumption AS consumption_change FROM australia_water_usage WHERE city \u003d \u0027Sydney\u0027;", + "sql_explanation": "Calculate the change in monthly water consumption between 2020 and 2021 for Sydney by using the LAG() function to retrieve the previous year\u0027s consumption and subtracting it from the current year\u0027s consumption." +}, { + "id": "1763", + "domain": "water resources", + "domain_description": "Water usage metrics, drought impact assessments, wastewater treatment data, and water conservation initiatives.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which facilities in each district have the highest wastewater volume, and what are their ranks?", + "sql_context": "CREATE TABLE facilities (id INT, facility_name VARCHAR(50), district_id INT, total_employees INT, wastewater_volume_cubic_meters INT); INSERT INTO facilities (id, facility_name, district_id, total_employees, wastewater_volume_cubic_meters) VALUES (1, \u0027Wastewater Treatment Plant 1\u0027, 1, 25, 1500); INSERT INTO facilities (id, facility_name, district_id, total_employees, wastewater_volume_cubic_meters) VALUES (2, \u0027Wastewater Treatment Plant 2\u0027, 2, 30, 1800);", + "sql": "SELECT id, facility_name, district_id, RANK() OVER (PARTITION BY district_id ORDER BY wastewater_volume_cubic_meters DESC) as rank FROM facilities;", + "sql_explanation": "The SQL query ranks facilities based on their wastewater volume within their respective districts with the help of the RANK() window function with partitioning by district_id." +}, { + "id": "596", + "domain": "arts and culture", + "domain_description": "Audience demographics, event attendance, program impact, and funding sources.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average donation amount per attendee by state, sorted by the highest average donation?", + "sql_context": "CREATE TABLE Attendees (ID INT, AttendeeName TEXT, State TEXT); INSERT INTO Attendees (ID, AttendeeName, State) VALUES (1, \u0027Jane Doe\u0027, \u0027California\u0027), (2, \u0027John Smith\u0027, \u0027New York\u0027), (3, \u0027Alice Johnson\u0027, \u0027Texas\u0027); CREATE TABLE Donations (ID INT, AttendeeID INT, DonationAmount DECIMAL(10,2), DonationDate DATE); INSERT INTO Donations (ID, AttendeeID, DonationAmount, DonationDate) VALUES (1, 1, 100.00, \u00272022-01-01\u0027), (2, 2, 200.00, \u00272022-02-01\u0027), (3, 3, 150.00, \u00272022-03-01\u0027), (4, 1, 50.00, \u00272022-04-01\u0027);", + "sql": "SELECT State, AVG(DonationAmount) as AvgDonation, ROW_NUMBER() OVER (ORDER BY AVG(DonationAmount) DESC) as Rank FROM Donations JOIN Attendees ON Donations.AttendeeID \u003d Attendees.ID GROUP BY State ORDER BY Rank;", + "sql_explanation": "This SQL query calculates the average donation amount per attendee by state, sorted by the highest average donation. It does this by using the AVG() function to find the average donation amount, the JOIN clause to combine the \u0027Donations\u0027 and \u0027Attendees\u0027 tables, and the GROUP BY clause to group the results by state. The ROW_NUMBER() window function is used to assign a unique rank to each state based on the average donation amount, and the final result is ordered by this rank in descending order." +}, { + "id": "355", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the trend in threat intelligence over the past month?", + "sql_context": "CREATE TABLE threat_intelligence (threat_id INT, threat_level INT, threat_date DATE); INSERT INTO threat_intelligence VALUES (1, 5, \u00272021-01-01\u0027), (2, 3, \u00272021-01-02\u0027), (3, 7, \u00272021-01-03\u0027);", + "sql": "SELECT DATE_TRUNC(\u0027day\u0027, threat_date) AS threat_day, AVG(threat_level) OVER (ORDER BY threat_date ROWS BETWEEN 29 PRECEDING AND CURRENT ROW) AS avg_threat_level FROM threat_intelligence WHERE threat_date \u003e\u003d DATEADD(month, -1, CURRENT_DATE);", + "sql_explanation": "This query uses the AVG window function to find the average threat level for each day in the past month, using the ORDER BY clause with the ROWS BETWEEN clause to calculate the moving average of the threat level." +}, { + "id": "715", + "domain": "defense industry", + "domain_description": "Defense contract data, military equipment maintenance, threat intelligence metrics, and veteran employment stats.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 5 defense contractors in the USA in terms of contract value?", + "sql_context": "CREATE TABLE defense_contractors (contractor_id INT, contractor_name VARCHAR(255), contract_value FLOAT, country VARCHAR(255)); INSERT INTO defense_contractors (contractor_id, contractor_name, contract_value, country) VALUES (1, \u0027Lockheed Martin\u0027, 6000000, \u0027USA\u0027), (2, \u0027Boeing\u0027, 5000000, \u0027USA\u0027), (3, \u0027Raytheon\u0027, 4000000, \u0027USA\u0027), (4, \u0027Northrop Grumman\u0027, 3500000, \u0027USA\u0027), (5, \u0027General Dynamics\u0027, 3000000, \u0027USA\u0027);", + "sql": "SELECT contractor_name, contract_value FROM (SELECT contractor_name, contract_value, RANK() OVER (ORDER BY contract_value DESC) as rnk FROM defense_contractors WHERE country \u003d \u0027USA\u0027) t WHERE rnk \u003c\u003d 5;", + "sql_explanation": "This query retrieves the top 5 defense contractors in the USA in terms of contract value by using the RANK() window function to order defense contractors by their contract_value in descending order for the country USA and filtering for the top 5 ranked contractors." +}, { + "id": "111", + "domain": "defense security", + "domain_description": "Detailed records on military technology, intelligence operations, national security, and cybersecurity strategies.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Determine the percentage change in national security budgets for the last 3 years, per region.", + "sql_context": "CREATE TABLE budgets (budget_year INT, region_id INT, budget_amount INT); INSERT INTO budgets (budget_year, region_id, budget_amount) VALUES (2019, 1, 500), (2020, 1, 600), (2021, 1, 700), (2019, 2, 400), (2020, 2, 450), (2021, 2, 500);", + "sql": "SELECT budget_year, region_id, budget_amount, (budget_amount - LAG(budget_amount, 1) OVER (PARTITION BY region_id ORDER BY budget_year)) * 100.0 / LAG(budget_amount, 1) OVER (PARTITION BY region_id ORDER BY budget_year) as percentage_change FROM budgets WHERE budget_year \u003e\u003d YEAR(CURRENT_DATE) - 3;", + "sql_explanation": "This query determines the percentage change in national security budgets (budget_amount) for the last 3 years, per region, using a window function with the LAG function. It calculates the percentage change by subtracting the previous budget amount from the current budget amount and dividing by the previous budget amount. The LAG(budget_amount, 1) clause specifies to lag the budget amount by 1 row. The PARTITION BY region_id clause specifies to partition the data by region_id and the ORDER BY budget_year clause specifies to order the data by budget_year." +}, { + "id": "496", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 3 donors by total donation amount for \u0027refugee_support\u0027 program in \u0027community_development\u0027 database.", + "sql_context": "CREATE TABLE donors (id INT, name VARCHAR(255), organization VARCHAR(255), total_donation DECIMAL(10, 2)); INSERT INTO donors (id, name, organization, total_donation) VALUES (1, \u0027John Doe\u0027, \u0027Donor Organization A\u0027, 5000.00), (2, \u0027Jane Smith\u0027, \u0027Donor Organization B\u0027, 7000.00), (3, \u0027Robert Johnson\u0027, \u0027Donor Organization C\u0027, 6000.00);", + "sql": "SELECT name, organization, total_donation FROM (SELECT name, organization, total_donation, ROW_NUMBER() OVER (ORDER BY total_donation DESC) AS rank FROM donors WHERE program \u003d \u0027refugee_support\u0027) subquery WHERE rank \u003c\u003d 3;", + "sql_explanation": "This query uses a subquery and the ROW_NUMBER() function to rank the donors by their total donation amount for the \u0027refugee_support\u0027 program. The outer query then selects the top 3 donors based on their rank." +}, { + "id": "856", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "How many volunteers with \u0027Medical\u0027 skills were assigned before a volunteer with \u0027Engineering\u0027 skills?", + "sql_context": "CREATE TABLE volunteers_ext (id INT, name VARCHAR(50), age INT, gender VARCHAR(10), skill VARCHAR(50), assignment_date DATE, end_date DATE); INSERT INTO volunteers_ext (id, name, age, gender, skill, assignment_date, end_date) VALUES (1, \u0027David\u0027, 25, \u0027Male\u0027, \u0027Medical\u0027, \u00272022-06-01\u0027, \u00272022-09-30\u0027), (2, \u0027Emma\u0027, 30, \u0027Female\u0027, \u0027Engineering\u0027, \u00272022-07-15\u0027, \u00272023-06-30\u0027);", + "sql": "SELECT COUNT(*) FROM (SELECT skill, assignment_date, LAG(skill) OVER (ORDER BY assignment_date) AS prev_skill FROM volunteers_ext WHERE skill \u003d \u0027Medical\u0027) t WHERE prev_skill \u003d \u0027Engineering\u0027;", + "sql_explanation": "This query identifies the volunteers with \u0027Medical\u0027 skills and checks if the previous skill was \u0027Engineering\u0027 using the LAG function, then calculates the count." +}, { + "id": "3194", + "domain": "humanitarian aid", + "domain_description": "Extensive data on disaster response, refugee support, community development, and advocacy in humanitarian aid.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average donation amount per day for each country?", + "sql_context": "CREATE TABLE daily_donations (country TEXT, donation_date DATE, donation FLOAT); INSERT INTO daily_donations (country, donation_date, donation) VALUES (\u0027Haiti\u0027, \u00272021-01-01\u0027, 50.00), (\u0027Haiti\u0027, \u00272021-01-02\u0027, 50.00), (\u0027Pakistan\u0027, \u00272021-01-01\u0027, 100.00), (\u0027Pakistan\u0027, \u00272021-01-02\u0027, 100.00), (\u0027Syria\u0027, \u00272021-01-01\u0027, 150.00), (\u0027Syria\u0027, \u00272021-01-02\u0027, 150.00), (\u0027Afghanistan\u0027, \u00272021-01-01\u0027, 200.00), (\u0027Afghanistan\u0027, \u00272021-01-02\u0027, 200.00);", + "sql": "SELECT country, AVG(donation) OVER (PARTITION BY country) AS avg_donation_per_day FROM daily_donations;", + "sql_explanation": "This query retrieves the average donation amount per day for each country by partitioning the daily_donations table by country and then computing the average of the donation column using the window function AVG(donation) OVER." +}, { + "id": "140", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of waste generated by the top 3 waste-generating countries in 2020?", + "sql_context": "CREATE TABLE yearly_waste (country VARCHAR(50), year INT, total_waste FLOAT); INSERT INTO yearly_waste (country, year, total_waste) VALUES (\u0027USA\u0027, 2020, 260), (\u0027China\u0027, 2020, 240), (\u0027India\u0027, 2020, 160), (\u0027Germany\u0027, 2020, 120), (\u0027Brazil\u0027, 2020, 100);", + "sql": "SELECT SUM(total_waste) / (SELECT SUM(total_waste) FROM yearly_waste WHERE year \u003d 2020) AS percentage_of_waste FROM yearly_waste WHERE country IN (SELECT country FROM (SELECT country, ROW_NUMBER() OVER (ORDER BY total_waste DESC) rn FROM yearly_waste WHERE year \u003d 2020) t WHERE rn \u003c\u003d 3);", + "sql_explanation": "This query calculates the percentage of waste generated by the top 3 waste-generating countries in 2020 by first calculating the sum of total waste for those countries, then dividing it by the sum of total waste for all countries in 2020." +}, { + "id": "874", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average recycling rate per month in \u0027CityE\u0027?", + "sql_context": "CREATE TABLE CityE (RecyclingQuantity INT, GenerationDate DATE); INSERT INTO CityE (RecyclingQuantity, GenerationDate) VALUES (250, \u00272021-01-01\u0027), (300, \u00272021-02-01\u0027), (350, \u00272021-03-01\u0027);", + "sql": "SELECT AVG(RecyclingQuantity) FROM (SELECT RecyclingQuantity, ROW_NUMBER() OVER (PARTITION BY EXTRACT(MONTH FROM GenerationDate) ORDER BY GenerationDate) as rn FROM CityE) tmp WHERE rn \u003d 1;", + "sql_explanation": "Calculate the average recycling rate per month by partitioning the data by month, assigning row numbers, filtering row number 1 (first recycling in each month), and then calculating the average recycling quantity." +}, { + "id": "1009", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the daily waste quantity for each type, ranked by the highest quantity, for the Southeast region?", + "sql_context": "CREATE TABLE waste_types (waste_type VARCHAR(255), region VARCHAR(255), waste_quantity INT, date DATE); INSERT INTO waste_types (waste_type, region, waste_quantity, date) VALUES (\u0027Plastic\u0027, \u0027Southeast\u0027, 100, \u00272021-01-01\u0027), (\u0027Plastic\u0027, \u0027Southeast\u0027, 150, \u00272021-01-02\u0027), (\u0027Paper\u0027, \u0027Southeast\u0027, 200, \u00272021-01-01\u0027), (\u0027Paper\u0027, \u0027Southeast\u0027, 250, \u00272021-01-02\u0027);", + "sql": "SELECT waste_type, region, waste_quantity, date, RANK() OVER (PARTITION BY waste_type ORDER BY waste_quantity DESC) as daily_waste_rank FROM waste_types WHERE region \u003d \u0027Southeast\u0027;", + "sql_explanation": "This query ranks the daily waste quantity for each type, partitioned by waste type and ordered by the highest quantity, for the Southeast region." +}, { + "id": "1273", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the monthly change in landfill capacity in 2021?", + "sql_context": "CREATE TABLE landfill_capacity (id INT, date DATE, capacity INT); INSERT INTO landfill_capacity (id, date, capacity) VALUES (1, \u00272021-01-01\u0027, 10000), (2, \u00272021-02-01\u0027, 9500), (3, \u00272021-03-01\u0027, 9200), (4, \u00272021-04-01\u0027, 8900);", + "sql": "SELECT date, capacity - LAG(capacity) OVER (ORDER BY date) AS change_in_capacity FROM landfill_capacity WHERE date BETWEEN \u00272021-01-01\u0027 AND \u00272021-12-31\u0027 ORDER BY date;", + "sql_explanation": "This query calculates the monthly change in landfill capacity in 2021 by selecting the \u0027date\u0027 column and calculating the difference between the current and previous \u0027capacity\u0027 values, ordered by \u0027date\u0027, where the \u0027date\u0027 is between \u00272021-01-01\u0027 and \u00272021-12-31\u0027." +}, { + "id": "1760", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the change in landfill capacity for the North American region between 2019 and 2020?", + "sql_context": "CREATE TABLE landfill_capacity (id INT, region VARCHAR(255), year INT, capacity INT); INSERT INTO landfill_capacity (id, region, year, capacity) VALUES (1, \u0027North America\u0027, 2019, 20000), (2, \u0027North America\u0027, 2020, 22000), (3, \u0027Europe\u0027, 2019, 18000), (4, \u0027Europe\u0027, 2020, 20000);", + "sql": "SELECT region, (capacity - LAG(capacity) OVER (PARTITION BY region ORDER BY year)) AS change FROM landfill_capacity WHERE region \u003d \u0027North America\u0027;", + "sql_explanation": "The SQL query calculates the change in landfill capacity for the North American region between 2019 and 2020 by using the LAG() window function to subtract the previous year\u0027s capacity from the current year\u0027s capacity for the North American region." +}, { + "id": "1907", + "domain": "waste management", + "domain_description": "Waste generation metrics, recycling rates, landfill capacity data, and circular economy initiatives.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the change in waste generation for Africa between 2016 and 2017?", + "sql_context": "CREATE TABLE waste_generation (region VARCHAR(50), year INT, waste_quantity FLOAT); INSERT INTO waste_generation (region, year, waste_quantity) VALUES (\u0027Africa\u0027, 2016, 120.0), (\u0027Africa\u0027, 2017, 125.5);", + "sql": "SELECT (LAG(waste_quantity, 1) OVER (PARTITION BY region ORDER BY year) - waste_quantity) * 100 FROM waste_generation WHERE region \u003d \u0027Africa\u0027;", + "sql_explanation": "Calculates the change in waste generation for Africa between the years 2016 and 2017 by using the LAG function to access the previous year\u0027s waste quantity, subtracting the current year\u0027s waste quantity, and multiplying it by 100 to convert to percentage." +}, { + "id": "113", + "domain": "cannabis industry", + "domain_description": "Cannabis production data, dispensary sales stats, regulatory compliance metrics, and social equity trends.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 5 strains with the highest average retail price in Massachusetts dispensaries during Q3 2022.", + "sql_context": "CREATE TABLE strains (id INT, name TEXT, type TEXT); INSERT INTO strains (id, name, type) VALUES (12, \u0027Blue Dream\u0027, \u0027Hybrid\u0027), (13, \u0027OG Kush\u0027, \u0027Indica\u0027), (14, \u0027Jack Herer\u0027, \u0027Sativa\u0027); CREATE TABLE sales (id INT, strain_id INT, retail_price DECIMAL, sale_date DATE, state TEXT); INSERT INTO sales (id, strain_id, retail_price, sale_date, state) VALUES (33, 12, 38.00, \u00272022-07-01\u0027, \u0027Massachusetts\u0027), (34, 13, 40.00, \u00272022-08-15\u0027, \u0027Massachusetts\u0027), (35, 14, 42.00, \u00272022-09-30\u0027, \u0027Massachusetts\u0027);", + "sql": "SELECT name, AVG(retail_price) FROM (SELECT name, retail_price, ROW_NUMBER() OVER (PARTITION BY name ORDER BY retail_price DESC) rn FROM sales WHERE state \u003d \u0027Massachusetts\u0027 AND sale_date \u003e\u003d \u00272022-07-01\u0027 AND sale_date \u003c \u00272022-10-01\u0027) tmp WHERE rn \u003c\u003d 5 GROUP BY name ORDER BY AVG(retail_price) DESC;", + "sql_explanation": "Find the top 5 strains with the highest average retail price in Massachusetts dispensaries during Q3 2022." +}, { + "id": "1114", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 3 agricultural innovations by funding in the Western region.", + "sql_context": "CREATE TABLE Innovations (id INT PRIMARY KEY, name VARCHAR(50), region VARCHAR(20), funding FLOAT);", + "sql": "SELECT name, funding FROM (SELECT name, funding, ROW_NUMBER() OVER (PARTITION BY region ORDER BY funding DESC) rn FROM Innovations WHERE region \u003d \u0027Western\u0027) tmp WHERE rn \u003c\u003d 3;", + "sql_explanation": "This query identifies the top 3 agricultural innovations by funding in the Western region by using a window function to partition the data by region and order by funding in descending order. It then filters the top 3 rows per region." +}, { + "id": "1729", + "domain": "rural development", + "domain_description": "Agricultural innovation metrics, rural infrastructure projects, community development initiatives, and economic diversification efforts.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Calculate the average cost of rural community projects per sector and rank them in ascending order.", + "sql_context": "CREATE TABLE community_projects (id INT, project_name VARCHAR(255), location VARCHAR(255), sector VARCHAR(255), cost FLOAT); INSERT INTO community_projects (id, project_name, location, sector, cost) VALUES (1, \u0027Cultural Center\u0027, \u0027Village A\u0027, \u0027Culture\u0027, 18000.00), (2, \u0027Community Health Center\u0027, \u0027Village B\u0027, \u0027Health\u0027, 35000.00), (3, \u0027Sports Facility\u0027, \u0027Village C\u0027, \u0027Sports\u0027, 12000.00), (4, \u0027Public Library\u0027, \u0027Village D\u0027, \u0027Education\u0027, 24000.00);", + "sql": "SELECT sector, AVG(cost) AS avg_cost, RANK() OVER (ORDER BY AVG(cost)) AS sector_rank FROM community_projects GROUP BY sector ORDER BY avg_cost ASC;", + "sql_explanation": "This query calculates the average cost of community projects per sector by using the AVG() window function. The sectors are then ranked in ascending order based on the average cost using the RANK() window function." +}, { + "id": "106", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average temperature (in Kelvin) per spacecraft per week, ranked in descending order?", + "sql_context": "CREATE TABLE spacecraft_temperatures (spacecraft_name TEXT, temperature FLOAT, mission_date DATE);", + "sql": "SELECT spacecraft_name, DATE_TRUNC(\u0027week\u0027, mission_date) as mission_week, AVG(temperature) as avg_temperature, RANK() OVER (PARTITION BY spacecraft_name ORDER BY AVG(temperature) DESC) as temp_rank FROM spacecraft_temperatures GROUP BY spacecraft_name, mission_week ORDER BY spacecraft_name, temp_rank;", + "sql_explanation": "This query calculates the average temperature per spacecraft per week by truncating the mission dates to the week level using the DATE_TRUNC() function, and then grouping the records by spacecraft_name and mission_week. The query calculates the average temperature for each spacecraft per week, ranks the weeks for each spacecraft by average temperature in descending order, and assigns a temperature rank to each week using the RANK() window function. The query then orders the results by spacecraft name and temperature rank." +}, { + "id": "643", + "domain": "space exploration", + "domain_description": "Spacecraft manufacturing data, space mission records, astronaut medical data, and astrophysics research.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the number of spacecraft missions per week, and rank them in descending order?", + "sql_context": "CREATE TABLE spacecraft_missions (spacecraft_name TEXT, mission_date DATE);", + "sql": "SELECT DATE_TRUNC(\u0027week\u0027, mission_date) as mission_week, COUNT(*) as mission_count, RANK() OVER (ORDER BY COUNT(*) DESC) as mission_rank FROM spacecraft_missions GROUP BY mission_week ORDER BY mission_rank;", + "sql_explanation": "This query calculates the number of spacecraft missions per week by truncating the mission dates to the week level using the DATE_TRUNC() function, and then grouping the records by mission_week. The query calculates the mission count for each week, ranks the weeks by mission count in descending order, and assigns a mission rank to each week using the RANK() window function." +}, { + "id": "293", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the transaction amount difference between the previous day and the current day for each customer?", + "sql_context": "CREATE TABLE transactions (customer_id INT, transaction_date DATE, amount DECIMAL(10,2)); INSERT INTO transactions (customer_id, transaction_date, amount) VALUES (1, \u00272022-01-01\u0027, 100), (1, \u00272022-01-02\u0027, 150), (2, \u00272022-01-01\u0027, 50), (2, \u00272022-01-02\u0027, 200);", + "sql": "SELECT customer_id, transaction_date, amount, LAG(amount) OVER (PARTITION BY customer_id ORDER BY transaction_date) AS previous_day_amount, amount - LAG(amount) OVER (PARTITION BY customer_id ORDER BY transaction_date) AS difference FROM transactions;", + "sql_explanation": "This SQL query calculates the transaction amount difference between the previous day and the current day for each customer by using the LAG function to get the previous day\u0027s amount and then subtracting it from the current day\u0027s amount. The query uses the PARTITION BY clause to partition the data by customer_id and the ORDER BY clause to order it by transaction_date." +}, { + "id": "326", + "domain": "financial services", + "domain_description": "Detailed financial data including investment strategies, risk management, fraud detection, customer analytics, and regulatory compliance.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 5 customers by transaction value in the last week?", + "sql_context": "CREATE TABLE customers (customer_id INT, transaction_date DATE, transaction_value FLOAT); INSERT INTO customers VALUES (1, \u00272021-01-01\u0027, 100.0), (1, \u00272021-02-01\u0027, 200.0), (2, \u00272021-03-01\u0027, 150.0);", + "sql": "SELECT customer_id, RANK() OVER (ORDER BY SUM(transaction_value) DESC ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) AS customer_rank FROM customers WHERE transaction_date \u003e\u003d DATEADD(week, -1, CURRENT_DATE) GROUP BY customer_id HAVING COUNT(*) \u003e\u003d 7;", + "sql_explanation": "The SQL query identifies the top 5 customers by transaction value in the last week by using the RANK function. It calculates the sum of transaction_value for each customer in the last week by using the SUM window function and orders it in descending order. The ROWS BETWEEN 6 PRECEDING AND CURRENT ROW clause defines the window frame to include the current row and the 6 preceding rows. The WHERE clause filters the data to include only the transactions in the last week and the HAVING clause filters the data to include only the customers with at least 7 transactions." +}, { + "id": "985", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What are the top 3 countries with the highest military expenditure as a percentage of GDP?", + "sql_context": "CREATE TABLE MilitaryExpenditure (id INT, country VARCHAR(255), military_expenditure DECIMAL(10,2), gdp DECIMAL(10,2)); INSERT INTO MilitaryExpenditure (id, country, military_expenditure, gdp) VALUES (1, \u0027Country1\u0027, 0.15, 50000000), (2, \u0027Country2\u0027, 0.20, 60000000), (3, \u0027Country3\u0027, 0.10, 40000000), (4, \u0027Country4\u0027, 0.12, 45000000);", + "sql": "SELECT country FROM (SELECT country, ROW_NUMBER() OVER (ORDER BY (military_expenditure / gdp) DESC) AS rank FROM MilitaryExpenditure) AS ranked_military_expenditure WHERE rank \u003c\u003d 3;", + "sql_explanation": "The SQL query retrieves the top 3 countries with the highest military expenditure as a percentage of GDP by selecting the country column from a subquery that ranks the countries based on their military expenditure as a percentage of GDP and then filtering to only include the top 3. The subquery uses the ROW_NUMBER() window function to rank the countries based on their military expenditure as a percentage of GDP, which is calculated by dividing the military_expenditure column by the gdp column." +}, { + "id": "1802", + "domain": "defense contractors", + "domain_description": "Military equipment sales data, defense project timelines, contract negotiations, and geopolitical risk assessments.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the geopolitical risk score for each defense project in the Asia-Pacific region, ranked by score?", + "sql_context": "CREATE TABLE Defense_Projects (project_id INT, project_name VARCHAR(255), region VARCHAR(255), risk_score FLOAT); INSERT INTO Defense_Projects (project_id, project_name, region, risk_score) VALUES (1, \u0027Project A\u0027, \u0027Asia-Pacific\u0027, 75), (2, \u0027Project B\u0027, \u0027Asia-Pacific\u0027, 80), (3, \u0027Project C\u0027, \u0027Europe\u0027, 60), (4, \u0027Project D\u0027, \u0027Asia-Pacific\u0027, 65);", + "sql": "SELECT project_name, region, risk_score, RANK() OVER (ORDER BY risk_score DESC) AS risk_rank FROM Defense_Projects WHERE region \u003d \u0027Asia-Pacific\u0027;", + "sql_explanation": "The SQL query calculates the geopolitical risk score for each defense project in the Asia-Pacific region, ranked by score. It first filters the data for projects in the Asia-Pacific region, then calculates the rank of each project based on its risk score using the RANK() window function." +}, { + "id": "1086", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which states have more than 5 disability support programs?", + "sql_context": "CREATE TABLE states (state_id INT, state_name VARCHAR(50), num_programs INT); INSERT INTO states (state_id, state_name, num_programs) VALUES (1, \u0027California\u0027, 7), (2, \u0027Texas\u0027, 3), (3, \u0027New York\u0027, 6), (4, \u0027Florida\u0027, 4);", + "sql": "SELECT state_name FROM (SELECT state_name, ROW_NUMBER() OVER (ORDER BY num_programs DESC) as rn FROM states) t WHERE rn \u003c\u003d (SELECT COUNT(*) FROM states WHERE num_programs \u003e 5);", + "sql_explanation": "This query ranks states by the number of disability support programs in descending order and then returns state names that have more than 5 programs." +}, { + "id": "2795", + "domain": "disability services", + "domain_description": "Comprehensive data on disability accommodations, support programs, policy advocacy, and inclusion efforts in disability services.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the percentage of the total budget allocated for support programs in each state?", + "sql_context": "CREATE TABLE StateDisabilityBudget (StateName VARCHAR(50), ProgramName VARCHAR(50), Budget NUMERIC(18,2)); INSERT INTO StateDisabilityBudget VALUES (\u0027California\u0027, \u0027Disability Support Program 1\u0027, 500000.00), (\u0027California\u0027, \u0027Disability Support Program 2\u0027, 750000.00), (\u0027California\u0027, \u0027Disability Support Program 3\u0027, 900000.00), (\u0027New York\u0027, \u0027Disability Support Program 1\u0027, 600000.00), (\u0027New York\u0027, \u0027Disability Support Program 2\u0027, 800000.00), (\u0027New York\u0027, \u0027Disability Support Program 3\u0027, 1000000.00);", + "sql": "SELECT StateName, Budget, (Budget::DECIMAL / SUM(Budget) OVER ()) * 100 as Percentage FROM StateDisabilityBudget;", + "sql_explanation": "Calculates the percentage of the total budget allocated for support programs in each state by dividing the budget for each state by the total budget and multiplying by 100. Applies the SUM window function to calculate the total budget across all states, then uses the PERCENT_RANK function to calculate the percentage of the total budget for each state." +}, { + "id": "1648", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Rank the Education department\u0027s schools based on the number of students?", + "sql_context": "CREATE TABLE Education_Dept (School_Name VARCHAR(255), Dept_Name VARCHAR(255), Number_Of_Students INT); INSERT INTO Education_Dept VALUES (\u0027School A\u0027, \u0027Education\u0027, 1000), (\u0027School B\u0027, \u0027Education\u0027, 1200), (\u0027School C\u0027, \u0027Education\u0027, 800);", + "sql": "SELECT School_Name, Number_Of_Students, ROW_NUMBER() OVER (ORDER BY Number_Of_Students DESC) AS Rank FROM Education_Dept WHERE Dept_Name \u003d \u0027Education\u0027;", + "sql_explanation": "Rank the schools in the Education department based on the number of students by ordering the data by Number_Of_Students in descending order and assigning a unique row number to each row." +}, { + "id": "2867", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total number of permits issued by month and type?", + "sql_context": "CREATE TABLE Permits (ID INT, PermitType VARCHAR(50), IssueMonth VARCHAR(50)); INSERT INTO Permits VALUES (1, \u0027Building\u0027, \u0027January\u0027), (2, \u0027Plumbing\u0027, \u0027February\u0027), (3, \u0027Building\u0027, \u0027March\u0027);", + "sql": "SELECT IssueMonth, PermitType, COUNT(*) OVER (PARTITION BY IssueMonth, PermitType) AS PermitCount FROM Permits;", + "sql_explanation": "This query counts the total number of permits issued by month and type by partitioning the data by \u0027IssueMonth\u0027 and \u0027PermitType\u0027 and counting the rows for each group." +}, { + "id": "2883", + "domain": "government services", + "domain_description": "Public service delivery, budget allocation, citizen feedback, and policy impact.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Display the provinces and their respective citizen satisfaction scores with public transportation, ranked from highest to lowest.", + "sql_context": "CREATE TABLE Provinces (Province VARCHAR(255), SatisfactionScore INT); INSERT INTO Provinces (Province, SatisfactionScore) VALUES (\u0027Alberta\u0027, 75), (\u0027British Columbia\u0027, 85), (\u0027Ontario\u0027, 90), (\u0027Quebec\u0027, 80);", + "sql": "SELECT Province, SatisfactionScore, ROW_NUMBER() OVER (ORDER BY SatisfactionScore DESC) AS Rank FROM Provinces;", + "sql_explanation": "The SQL query calculates the row number for each province based on the SatisfactionScore in descending order. This effectively ranks the provinces from highest to lowest based on their citizen satisfaction scores with public transportation." +}, { + "id": "257", + "domain": "media entertainment", + "domain_description": "Audience demographics, viewership trends, content ratings, production budgets, and marketing strategies for movies, TV shows, and music.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the percentage change in the number of subscribers for streaming services by quarter.", + "sql_context": "CREATE TABLE subscribers (service VARCHAR(50), subscriber_count INT, quarter VARCHAR(10), year INT); INSERT INTO subscribers (service, subscriber_count, quarter, year) VALUES (\u0027Netflix\u0027, 200000000, \u0027Q1\u0027, 2020), (\u0027Netflix\u0027, 208000000, \u0027Q2\u0027, 2020), (\u0027Netflix\u0027, 215000000, \u0027Q3\u0027, 2020);", + "sql": "SELECT service, quarter, year, LAG(subscriber_count) OVER(PARTITION BY service ORDER BY year, quarter) as prev_subscriber_count, (subscriber_count - COALESCE(prev_subscriber_count, subscriber_count)) * 100.0 / subscriber_count as pct_change FROM subscribers;", + "sql_explanation": "Calculate the percentage change in subscriber count by comparing the current subscriber count to the previous quarter\u0027s subscriber count for each streaming service." +}, { + "id": "2460", + "domain": "media entertainment", + "domain_description": "Audience demographics, viewership trends, content ratings, production budgets, and marketing strategies for movies, TV shows, and music.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Who are the top 5 actors with the highest number of followers on social media?", + "sql_context": "CREATE TABLE actors (id INT, name VARCHAR(255), followers INT); INSERT INTO actors (id, name, followers) VALUES (1, \u0027Dwayne Johnson\u0027, 150000000), (2, \u0027Emma Watson\u0027, 65000000), (3, \u0027Tom Holland\u0027, 75000000), (4, \u0027Scarlett Johansson\u0027, 120000000), (5, \u0027Robert Downey Jr.\u0027, 125000000), (6, \u0027Chris Evans\u0027, 80000000);", + "sql": "SELECT name FROM (SELECT name, ROW_NUMBER() OVER (ORDER BY followers DESC) AS rank FROM actors) AS subquery WHERE rank \u003c\u003d 5;", + "sql_explanation": "This query retrieves the names of the top 5 actors with the highest number of followers on social media by using a subquery and the \u0027ROW_NUMBER()\u0027 function to order actors by their \u0027followers\u0027 in descending order and filtering the first 5 results." +}, { + "id": "1051", + "domain": "rare earth elements", + "domain_description": "Rare earth element production data, market trends, environmental impact stats, and supply chain transparency.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the lanthanum production difference between 2018 and 2017 for each processor.", + "sql_context": "CREATE TABLE LanthanumProduction (Processor VARCHAR(50), Year INT, Production FLOAT); INSERT INTO LanthanumProduction(Processor, Year, Production) VALUES (\u0027ProcessorA\u0027, 2017, 451.5), (\u0027ProcessorA\u0027, 2018, 456.7), (\u0027ProcessorA\u0027, 2019, 462.1), (\u0027ProcessorB\u0027, 2017, 389.1), (\u0027ProcessorB\u0027, 2018, 393.5), (\u0027ProcessorB\u0027, 2019, 399.8);", + "sql": "SELECT Processor, Production - LAG(Production) OVER (PARTITION BY Processor ORDER BY Year) as Difference FROM LanthanumProduction WHERE Processor IN (\u0027ProcessorA\u0027, \u0027ProcessorB\u0027);", + "sql_explanation": "This query calculates the lanthanum production difference between 2017 and 2018 for each processor by subtracting the previous year\u0027s production using the LEAD() function, filtering for ProcessorA and ProcessorB." +}, { + "id": "234", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "List the top 3 hottest days in Brazil in the last 30 days?", + "sql_context": "CREATE TABLE WeatherData (id INT, Location VARCHAR(255), Temperature INT, Timestamp DATETIME); INSERT INTO WeatherData (id, Location, Temperature, Timestamp) VALUES (1, \u0027Brazil\u0027, 35, \u00272022-05-15 12:00:00\u0027), (2, \u0027Brazil\u0027, 38, \u00272022-05-16 12:00:00\u0027);", + "sql": "SELECT Location, Temperature, Timestamp FROM (SELECT Location, Temperature, Timestamp, ROW_NUMBER() OVER (PARTITION BY Location ORDER BY Temperature DESC) as rn FROM WeatherData WHERE Timestamp BETWEEN DATEADD(day, -30, GETDATE()) AND GETDATE()) t WHERE rn \u003c\u003d 3;", + "sql_explanation": "The SQL query identifies the top 3 hottest days in Brazil by filtering the WeatherData table for records in the last 30 days, then assigning a row number within each location based on temperature in descending order. Finally, the outer query selects the location, temperature, and timestamp for the top 3 hottest days in Brazil." +}, { + "id": "287", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 2 solar radiation values for each day in \u0027Brazil\u0027 for the month of January?", + "sql_context": "CREATE TABLE iot_sensors (id INT, name TEXT, country TEXT); INSERT INTO iot_sensors (id, name, country) VALUES (1, \u0027IS1\u0027, \u0027Brazil\u0027), (2, \u0027IS2\u0027, \u0027Argentina\u0027); CREATE TABLE solar_radiation (id INT, sensor_id INT, timestamp TIMESTAMP, radiation FLOAT); INSERT INTO solar_radiation (id, sensor_id, timestamp, radiation) VALUES (1, 1, \u00272021-01-01 12:00:00\u0027, 800), (2, 1, \u00272021-01-01 16:00:00\u0027, 900), (3, 1, \u00272021-01-01 20:00:00\u0027, 750), (4, 2, \u00272021-01-01 12:00:00\u0027, 650), (5, 2, \u00272021-01-01 16:00:00\u0027, 700), (6, 2, \u00272021-01-01 20:00:00\u0027, 600);", + "sql": "SELECT sensor_id, timestamp, radiation FROM (SELECT sensor_id, timestamp, radiation, RANK() OVER (PARTITION BY timestamp ORDER BY radiation DESC) rnk FROM solar_radiation WHERE country \u003d \u0027Brazil\u0027 AND EXTRACT(MONTH FROM timestamp) \u003d 1) t WHERE rnk \u003c\u003d 2;", + "sql_explanation": "Find the top 2 solar radiation values for each day in \u0027Brazil\u0027 for the month of January. Partition the solar radiation data by timestamp, order by radiation in descending order, and select the top 2 records of each partition." +}, { + "id": "984", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 3 satellite images with the highest resolution for plot_id 456", + "sql_context": "CREATE TABLE satellite_image (image_id INT, plot_id INT, resolution INT); INSERT INTO satellite_image (image_id, plot_id, resolution) VALUES (1, 456, 1080), (2, 456, 1440), (3, 456, 2160), (4, 456, 720), (5, 456, 1080);", + "sql": "SELECT image_id, resolution FROM (SELECT image_id, resolution, ROW_NUMBER() OVER (ORDER BY resolution DESC) row_num FROM satellite_image WHERE plot_id \u003d 456) tmp WHERE row_num \u003c\u003d 3;", + "sql_explanation": "This query first selects the image_id and resolution from a subquery that calculates the row number for each row in descending order of resolution. It then selects the rows with a row number of 1, 2, or 3, which are the top 3 satellite images with the highest resolution for plot_id 456." +}, { + "id": "2545", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the difference in capture time between consecutive satellite images in the \u0027satellite_images\u0027 table?", + "sql_context": "CREATE TABLE satellite_images (image_id INT, image_url TEXT, capture_time TIMESTAMP); INSERT INTO satellite_images (image_id, image_url, capture_time) VALUES (1, \u0027image1.jpg\u0027, \u00272022-01-01 10:00:00\u0027), (2, \u0027image2.jpg\u0027, \u00272021-05-01 10:00:00\u0027);", + "sql": "SELECT image_id, capture_time, capture_time - LEAD(capture_time) OVER (ORDER BY capture_time) diff FROM satellite_images;", + "sql_explanation": "Using the window function LEAD() with the ORDER BY clause, we calculate the difference in capture time between consecutive satellite images." +}, { + "id": "3935", + "domain": "precision agriculture", + "domain_description": "Precision farming data, satellite imagery analysis, IoT sensor metrics, and agricultural automation trends.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the earliest capture time for each satellite image in the \u0027satellite_images\u0027 table?", + "sql_context": "CREATE TABLE satellite_images (image_id INT, image_url TEXT, capture_time TIMESTAMP); INSERT INTO satellite_images (image_id, image_url, capture_time) VALUES (1, \u0027image1.jpg\u0027, \u00272022-01-01 10:00:00\u0027), (2, \u0027image2.jpg\u0027, \u00272021-05-01 10:00:00\u0027);", + "sql": "SELECT image_id, MIN(capture_time) OVER (PARTITION BY image_id) FROM satellite_images;", + "sql_explanation": "Using the window function MIN() with the PARTITION BY clause, we calculate the earliest capture time for each satellite image." +}, { + "id": "628", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Which train routes in Paris have more than 5 stations?", + "sql_context": "CREATE TABLE train_routes (route_id INT, city VARCHAR(50), num_stations INT); INSERT INTO train_routes (route_id, city, num_stations) VALUES (101, \u0027Paris\u0027, 7), (102, \u0027Paris\u0027, 4), (103, \u0027Berlin\u0027, 9), (104, \u0027London\u0027, 6);", + "sql": "SELECT route_id, city, num_stations FROM (SELECT route_id, city, num_stations, ROW_NUMBER() OVER (PARTITION BY city ORDER BY num_stations DESC) as rnk FROM train_routes) tmp WHERE rnk \u003d 1 AND city \u003d \u0027Paris\u0027;", + "sql_explanation": "Rank train routes in each city by the number of stations in descending order and then select those routes with more than 5 stations for the city of \u0027Paris\u0027." +}, { + "id": "1257", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the total fare collected for each mode of transportation, for the last year, ordered by the least profitable mode?", + "sql_context": "CREATE TABLE fare_collection (id INT, trip_id INT, mode VARCHAR(10), fare DECIMAL(5,2)); INSERT INTO fare_collection (id, trip_id, mode, fare) VALUES (1, 1, \u0027bus\u0027, 2.50), (2, 2, \u0027metro\u0027, 3.00), (3, 1, \u0027train\u0027, 5.00);", + "sql": "SELECT SUM(fare) OVER (PARTITION BY mode ORDER BY SUM(fare) ASC) as total_fare, mode FROM fare_collection WHERE trip_date \u003e\u003d DATEADD(year, -1, GETDATE()) GROUP BY mode;", + "sql_explanation": "This query calculates the total fare collected for each mode of transportation, for the last 12 months, ordered by the least profitable mode. It uses the SUM function with the OVER clause to calculate the total fare for each mode, and the PARTITION BY clause to separate the data by mode. The ORDER BY clause ranks the modes by the least profitable mode." +}, { + "id": "2037", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Identify the top 3 busiest stations in the transportation system.", + "sql_context": "CREATE TABLE ENTRIES (station_name TEXT, entries INT); INSERT INTO ENTRIES (station_name, entries) VALUES (\u0027Station1\u0027, 200), (\u0027Station2\u0027, 150), (\u0027Station3\u0027, 250), (\u0027Station4\u0027, 300), (\u0027Station5\u0027, 100);", + "sql": "SELECT station_name FROM (SELECT station_name, DENSE_RANK() OVER (ORDER BY entries DESC) AS rank FROM ENTRIES) subquery WHERE rank \u003c\u003d 3;", + "sql_explanation": "This query identifies the top 3 busiest stations in the transportation system by using a subquery with the DENSE_RANK function to rank stations by their entries in descending order, and then selecting station_name where the rank is less than or equal to 3." +}, { + "id": "2125", + "domain": "public transportation", + "domain_description": "Extensive data on route planning, fare collection, vehicle maintenance, and accessibility in public transportation.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average time between train cleanings for each line?", + "sql_context": "CREATE TABLE trains (id INT, line VARCHAR(10), clean_date DATE); INSERT INTO trains (id, line, clean_date) VALUES (1, \u0027Red\u0027, \u00272022-01-01\u0027), (2, \u0027Green\u0027, \u00272022-01-02\u0027), (3, \u0027Blue\u0027, \u00272022-01-03\u0027);", + "sql": "SELECT line, AVG(DATEDIFF(\u0027day\u0027, LAG(clean_date) OVER (PARTITION BY line ORDER BY clean_date), clean_date)) FROM trains GROUP BY line;", + "sql_explanation": "This SQL query calculates the average time between train cleanings for each line by using the AVG() function to find the average number of days between cleanings for each line in the trains table. The LAG() function is used to access the previous clean date for each line, and the DATEDIFF() function is used to find the number of days between the current and previous clean dates. The query then groups the results by line and calculates the average." +}, { + "id": "981", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the maximum and minimum population of animals for each species?", + "sql_context": "CREATE TABLE animal_population (species VARCHAR(50), population INT); INSERT INTO animal_population (species, population) VALUES (\u0027Tiger\u0027, 300), (\u0027Lion\u0027, 250), (\u0027Elephant\u0027, 500), (\u0027Giraffe\u0027, 200);", + "sql": "SELECT species, MIN(population) OVER (PARTITION BY species) as min_population, MAX(population) OVER (PARTITION BY species) as max_population FROM animal_population ORDER BY species;", + "sql_explanation": "This query calculates the minimum and maximum population for each species by partitioning the data based on the \u0027species\u0027 column and then calculating the minimum and maximum of the \u0027population\u0027 column within each partition. It then orders the results by the \u0027species\u0027 column." +}, { + "id": "1130", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the population growth rate for each region and year?", + "sql_context": "CREATE TABLE animal_population (region VARCHAR(50), population INT, year INT); INSERT INTO animal_population (region, population, year) VALUES (\u0027Africa\u0027, 500, 2020), (\u0027Africa\u0027, 510, 2021), (\u0027Asia\u0027, 800, 2020), (\u0027Asia\u0027, 820, 2021), (\u0027Americas\u0027, 300, 2020), (\u0027Americas\u0027, 310, 2021);", + "sql": "SELECT region, year, (LAG(population) OVER (PARTITION BY region ORDER BY year) - population) * 100.0 / population as growth_rate FROM animal_population ORDER BY region, year;", + "sql_explanation": "This query calculates the population growth rate for each region and year by comparing the population of each region in consecutive years. It uses the LAG function to access the previous row\u0027s population value and then calculates the growth rate based on the difference between the previous and current population values. It then orders the results by the \u0027region\u0027 and \u0027year\u0027 columns." +}, { + "id": "1833", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the difference in the number of attendees for each community education program between the first and last occurrence?", + "sql_context": "CREATE TABLE community_education (program_name VARCHAR(255), location VARCHAR(255), date DATE, num_attendees INT); INSERT INTO community_education (program_name, location, date, num_attendees) VALUES (\u0027Wildlife Awareness\u0027, \u0027New York\u0027, \u00272020-01-01\u0027, 50), (\u0027Wildlife Awareness\u0027, \u0027Florida\u0027, \u00272020-03-10\u0027, 75), (\u0027Nature Walk\u0027, \u0027California\u0027, \u00272019-05-15\u0027, 25), (\u0027Nature Walk\u0027, \u0027California\u0027, \u00272020-05-15\u0027, 35);", + "sql": "SELECT program_name, num_attendees - FIRST_VALUE(num_attendees) OVER (PARTITION BY program_name ORDER BY date) as diff FROM community_education;", + "sql_explanation": "Calculate the difference in the number of attendees for each community education program between the first and last occurrence by subtracting the first value of the \u0027num_attendees\u0027 column from the \u0027num_attendees\u0027 column, partitioned by \u0027program_name\u0027 and ordered by \u0027date\u0027." +}, { + "id": "3044", + "domain": "wildlife conservation", + "domain_description": "Animal population data, habitat preservation efforts, and community education programs.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the average population of animals for each region?", + "sql_context": "CREATE TABLE animal_population (region VARCHAR(50), population INT); INSERT INTO animal_population (region, population) VALUES (\u0027Africa\u0027, 1000), (\u0027Asia\u0027, 1500), (\u0027Americas\u0027, 800);", + "sql": "SELECT AVG(population) OVER (PARTITION BY region) as avg_population FROM animal_population ORDER BY region;", + "sql_explanation": "This query calculates the average population for each region by partitioning the data based on the \u0027region\u0027 column and then calculating the average of the \u0027population\u0027 column within each partition. It then orders the results by the \u0027region\u0027 column." +}, { + "id": "43", + "domain": "hospitality technology", + "domain_description": "Hotel tech adoption metrics, online travel agency data, virtual tour engagement stats, and hospitality AI trends.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the market share of the top 3 AI chatbot providers for hotels in the MEA region?", + "sql_context": "CREATE TABLE ai_chatbot_providers (provider_id INT, provider_name TEXT, region TEXT, hotels_served INT); INSERT INTO ai_chatbot_providers (provider_id, provider_name, region, hotels_served) VALUES (1, \u0027Provider A\u0027, \u0027MEA\u0027, 1200), (2, \u0027Provider B\u0027, \u0027MEA\u0027, 850);", + "sql": "SELECT provider_name, ROUND(hotels_served * 100.0 / (SELECT SUM(hotels_served) FROM ai_chatbot_providers WHERE region \u003d \u0027MEA\u0027), 2) AS market_share FROM ai_chatbot_providers WHERE region \u003d \u0027MEA\u0027 AND provider_id IN (SELECT provider_id FROM (SELECT provider_id, RANK() OVER (ORDER BY hotels_served DESC) AS rank FROM ai_chatbot_providers WHERE region \u003d \u0027MEA\u0027) sub WHERE rank \u003c\u003d 3);", + "sql_explanation": "The SQL query calculates the market share of the top 3 AI chatbot providers for hotels in the MEA region by filtering providers based on the \u0027region\u0027 column and selecting providers with a provider_id within the top 3 providers based on the number of hotels served. It uses a subquery with RANK() to determine the rank of each provider in the MEA region and then filters providers with a rank less than or equal to 3. The market share is calculated by dividing the number of hotels served by the total number of hotels served by all providers in the MEA region, then multiplying by 100 and rounding to 2 decimal places using the ROUND function." +}, { + "id": "504", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "Find the top 5 models with the highest fairness score, for models trained on the \u0027Europe\u0027 or \u0027North America\u0027 regions.", + "sql_context": "CREATE TABLE model_details (model_id INT, fairness_score FLOAT, region_id INT);", + "sql": "SELECT model_id, fairness_score FROM (SELECT model_id, fairness_score, region_id, ROW_NUMBER() OVER (PARTITION BY region_id ORDER BY fairness_score DESC) rn FROM model_details WHERE region_id IN (2, 1)) md WHERE rn \u003c\u003d 5;", + "sql_explanation": "This query finds the top 5 models with the highest fairness score, for models trained on the \u0027Europe\u0027 or \u0027North America\u0027 regions. It does so by using a subquery with the ROW_NUMBER() window function. The PARTITION BY clause is used to partition the data by region_id, while the WHERE clause filters the data to only show the rows with the \u0027Europe\u0027 or \u0027North America\u0027 regions. The outer query then filters the results to only show the data for the top 5 rows based on the partitioned ROW_NUMBER." +}, { + "id": "2392", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the running total of safe AI research funding by year, with a partition for each research category?", + "sql_context": "CREATE SCHEMA ai_safety; CREATE TABLE funding (year INT, category VARCHAR(20), amount DECIMAL(10, 2)); INSERT INTO funding (year, category, amount) VALUES (2018, \u0027verifiability\u0027, 1000000.00), (2019, \u0027robustness\u0027, 1500000.00), (2020, \u0027verifiability\u0027, 1200000.00), (2021, \u0027transparency\u0027, 1700000.00);", + "sql": "SELECT year, category, amount, SUM(amount) OVER (PARTITION BY category ORDER BY year) as running_total FROM ai_safety.funding;", + "sql_explanation": "The SQL query uses the SUM() function with the OVER clause to calculate the running total of safe AI research funding by year, partitioned by research category and ordered by year." +}, { + "id": "2728", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the sum of fairness ratings for each AI model in the \u0027model_fairness\u0027 table?", + "sql_context": "CREATE TABLE model_fairness (model_name TEXT, fairness_rating FLOAT); INSERT INTO model_fairness (model_name, fairness_rating) VALUES (\u0027ModelA\u0027, 4.2), (\u0027ModelB\u0027, 4.6), (\u0027ModelC\u0027, 4.8);", + "sql": "SELECT model_name, SUM(fairness_rating) OVER (PARTITION BY model_name) AS total_fairness_rating FROM model_fairness;", + "sql_explanation": "This SQL query calculates the sum of fairness ratings for each AI model in the \u0027model_fairness\u0027 table. It uses the window function SUM with the PARTITION BY clause to partition the data by model_name and calculate the total fairness_rating for each partition." +}, { + "id": "2840", + "domain": "artificial intelligence", + "domain_description": "AI data on algorithmic fairness, AI safety, explainable AI, and creative AI applications.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the distribution of fairness scores for AI algorithms in different regions?", + "sql_context": "CREATE TABLE algorithmic_fairness (id INT, region VARCHAR, algorithm VARCHAR, fairness FLOAT);", + "sql": "SELECT region, algorithm, PERCENT_RANK() OVER (PARTITION BY region ORDER BY fairness) FROM algorithmic_fairness;", + "sql_explanation": "The SQL query calculates the distribution of fairness scores for AI algorithms in different regions in the algorithmic_fairness table by partitioning the data by region and then using the PERCENT_RANK function to calculate the percent rank of each fairness score. The data is ordered by region and fairness to calculate the percent rank." +}, { + "id": "798", + "domain": "sustainable infrastructure", + "domain_description": "Green building data, renewable energy infrastructure projects, carbon offset initiatives, and smart city technology adoption.", + "sql_complexity": "window functions", + "sql_complexity_description": "window functions (e.g., ROW_NUMBER, LEAD, LAG, RANk, NTILE, PERCENT_RANK, etc.) with partitioning and ordering", + "sql_task_type": "analytics and reporting", + "sql_task_type_description": "generating reports, dashboards, and analytical insights", + "sql_prompt": "What is the previous offset amount for each carbon offset initiative, starting with the initiative that began on 2021-01-01?", + "sql_context": "CREATE TABLE carbon_offset_initiatives (id INT, project_name VARCHAR(255), location VARCHAR(255), offset_amount INT, start_date DATE, end_date DATE); INSERT INTO carbon_offset_initiatives (id, project_name, location, offset_amount, start_date, end_date) VALUES (1, \u0027Tree Planting\u0027, \u0027New York\u0027, 5000, \u00272020-01-01\u0027, \u00272022-12-31\u0027); INSERT INTO carbon_offset_initiatives (id, project_name, location, offset_amount, start_date, end_date) VALUES (2, \u0027Wind Turbines\u0027, \u0027Texas\u0027, 10000, \u00272019-06-01\u0027, \u00272024-05-31\u0027); INSERT INTO carbon_offset_initiatives (id, project_name, location, offset_amount, start_date, end_date) VALUES (3, \u0027Solar Farms\u0027, \u0027California\u0027, 15000, \u00272021-01-01\u0027, \u00272025-12-31\u0027);", + "sql": "SELECT project_name, location, offset_amount, start_date, LAG(offset_amount) OVER (ORDER BY start_date) as previous_offset_amount FROM carbon_offset_initiatives WHERE start_date \u003e\u003d \u00272021-01-01\u0027;", + "sql_explanation": "This query orders the data by start_date and assigns the offset_amount of the previous initiative (previous_offset_amount) to each initiative using the LAG() window function, starting with the initiative that began on 2021-01-01." +}] \ No newline at end of file diff --git a/cloudbuild.yaml.txt b/cloudbuild.yaml.txt new file mode 100644 index 0000000..272f5c4 --- /dev/null +++ b/cloudbuild.yaml.txt @@ -0,0 +1,10 @@ +steps: + - name: 'gcr.io/cloud-builders/docker' + args: ['build', '-t', 'gcr.io/gtm-kqqwvx2-zgi2z/tagging-server', '.'] + - name: 'gcr.io/cloud-builders/docker' + args: ['push', 'gcr.io/gtm-kqqwvx2-zgi2z/tagging-server'] + - name: 'gcr.io/cloud-builders/gcloud' + args: ['run', 'deploy', 'tagging-server', '--image', 'gcr.io/gtm-kqqwvx2-zgi2z/tagging-server', '--platform', 'managed', '--region', 'us-central1'] + +images: + - gcr.io/gtm-kqqwvx2-zgi2z/tagging-server diff --git a/name: CI on: [push, pull_request] jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Set up Node.js uses: actions/setup-node@v2 with: node-version: '14' - name: Cache dependencies uses: actions/cache@v2 with: path: ~/.npm key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }} restore-keys: | ${{ runner.os }}-node- - run: npm install - run: npm run build b/name: CI on: [push, pull_request] jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Set up Node.js uses: actions/setup-node@v2 with: node-version: '14' - name: Cache dependencies uses: actions/cache@v2 with: path: ~/.npm key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }} restore-keys: | ${{ runner.os }}-node- - run: npm install - run: npm run build new file mode 100644 index 0000000..e69de29